diff --git a/.gitignore b/.gitignore index cae4012..8cef70c 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,8 @@ # Created by https://www.gitignore.io +### Gradle ### +build/ + ### Maven ### target/ pom.xml.tag diff --git a/README.md b/README.md index c8f9908..219485f 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,63 @@ # couchbase-java-importer -This is a pluggable importer for Couchbase + +This is a pluggable importer for Couchbase. So far it supports importing documents from a CSV file, a MongoDB or CouchDB instance. + +## How to Build + +``` +$ ./gradlew build +``` + +## How to Use + +``` +$ cd build/distributions/ +$ unzip couchbase-java-importer.zip +``` + +At this step you need to choose what kind of import you want to do. Configuration samples are available at the root of the repository. Let's pretend you want to import a MongoDB collection called `restaurants` from the database `test` and add a field called `couchbaseType` with the value `restaurant` for every imported documents. Let's also pretend you have a local MongoDB and local Couchbase instance. For that you would need the following configuration + +``` +# Hostnames, comma separated list of Couchbase node IP or hostname +hostnames: localhost,127.0.0.1 +# Buket name +bucket: default +# Bucket password +password: +# Log to write succesfully imported keys +successLogFilename: succes.out +# Log to write unsuccesfully imported keys +errorLogFilename: error.out +# Default RequestCancelledException delay in milliseconds and maximum number of retries +requestCancelledExceptionDelay: 31000 +requestCancelledExceptionRetries: 100 +# Default TemporaryFailureException delay in milliseconds and maximum number of retries +temporaryFailureExceptionDelay: 100 +temporaryFailureExceptionRetries: 100 +# Default upsert timeout in milliseconds +importTimeout: 500 +# Choose between CSV, COUCHDB, MONGODB +choosenImporter: MONGODB +mongodb: + # Give a valid connection string to connect to a MongoDB instance + connectionString: "mongodb://127.0.0.1:27017/" + # Name of the MondoDB database to connect to + dbName: "test" + # Name of the collection to import + collectionName: "restaurants" + # Couchbase does not have collection, we usually use a type field. As there could already be a type field in Mongo, you can specify another fieldName to be used as type + typeField: "type" + # type of the documents that will be imported + type: "restaurant" +``` + +This is the content of the MongoDB sample configuration. To run the import copy the configuration file and run the importer: + +``` +$ cp ../../../application-mongodb.yml.sample application.yml +$ ./bin/couchbase-java-importer +``` + +Once the import as ran you should have one file called `success.out` that contains the id of every document imported. If something went wrong you should also have a file called `error.out`. + +Every configuration samples contains comments that should help you understand the various import options. diff --git a/application-couchdb.yml.sample b/application-couchdb.yml.sample new file mode 100644 index 0000000..b67ce10 --- /dev/null +++ b/application-couchdb.yml.sample @@ -0,0 +1,23 @@ +# Hostnames, comma separated list of Couchbase node IP or hostname +hostnames: localhost,127.0.0.1 +# Buket name +bucket: default +# Bucket password +password: +# Log to write succesfully imported keys +successLogFilename: succes.out +# Log to write unsuccesfully imported keys +errorLogFilename: error.out +# Default RequestCancelledException delay in milliseconds and maximum number of retries +requestCancelledExceptionDelay: 31000 +requestCancelledExceptionRetries: 100 +# Default TemporaryFailureException delay in milliseconds and maximum number of retries +temporaryFailureExceptionDelay: 100 +temporaryFailureExceptionRetries: 100 +# Default upsert timeout in milliseconds +importTimeout: 500 +# Choose between CSV, COUCHDB, JSON_GENERATOR +choosenImporter: COUCHDB +couchdb: + # Download URL + downloadURL: http://127.0.0.1:5984/database_export/_all_docs?include_docs=true \ No newline at end of file diff --git a/application-csv.yml.sample b/application-csv.yml.sample new file mode 100644 index 0000000..cb5111d --- /dev/null +++ b/application-csv.yml.sample @@ -0,0 +1,57 @@ +# Hostnames, comma separated list of Couchbase node IP or hostname +hostnames: localhost,127.0.0.1 +# Buket name +bucket: default +# Bucket password +password: +# Log to write succesfully imported keys +successLogFilename: succes.out +# Log to write unsuccesfully imported keys +errorLogFilename: error.out +# Default RequestCancelledException delay in milliseconds and maximum number of retries +requestCancelledExceptionDelay: 31000 +requestCancelledExceptionRetries: 100 +# Default TemporaryFailureException delay in milliseconds and maximum number of retries +temporaryFailureExceptionDelay: 100 +temporaryFailureExceptionRetries: 100 +# Default upsert timeout in milliseconds +importTimeout: 500 +# Choose between CSV, COUCHDB, JSON_GENERATOR +choosenImporter: CSV +csv: + # CSV Separating char for rows + columnSeparator: ';' + # CSV quotes + quoteChar: '' + # Path to the CSV file to import + csvFilePath: /home/couchbase/csvimporter/advocates.csv + # Skip the first line of the CSV for field names + skipFirstLineForNames: true + # Any format usable by the Java SimpleDateFormat Class + dateFormat: EEE MMM dd HH:mm:ss z yyyy + # Language tag used by Java's Locale class + languageTag: FR_FR + # Number of columns to import + totalcolumns: 10 + # Column index to use the column value as id + keyColumIndex: 0 + # The value of this field will be added as key prefix + keyPrefix: "advocate::" + #Give the type of the columns, could be String, Long, Double, Boolean, Date. Must be the exact same size as the number of columns in your file + columType: + - STRING + - STRING + - STRING + - STRING + - STRING + - DATE + - LONG + # Choose the name of the fields for each column, mandatory if skipFirstLineForNames is set to true. + columName: + - id + - type + - firstname + - lastname + - location + - creationDate + - count \ No newline at end of file diff --git a/application-mongodb.yml.sample b/application-mongodb.yml.sample index ea4dc19..a8bbb64 100644 --- a/application-mongodb.yml.sample +++ b/application-mongodb.yml.sample @@ -19,8 +19,13 @@ importTimeout: 500 # Choose between CSV, COUCHDB, MONGODB, JSON_GENERATOR choosenImporter: MONGODB mongodb: + # Give a valid connection string to connect to a MongoDB instance connectionString: "mongodb://127.0.0.1:27017/" + # Name of the MondoDB database to connect to dbName: "test" + # Name of the collection to import collectionName: "restaurants" + # Couchbase does not have collection, we usually use a type field. As there could already be a type field in Mongo, you can specify another fieldName to be used as type typeField: "type" + # type of the documents that will be imported type: "restaurant"