initial commit

rwynn · Mar 11, 2017 · cb7e7b7 · cb7e7b7
1 parent 8eb6115
commit cb7e7b7
Show file tree

Hide file tree

Showing 3 changed files with 971 additions and 0 deletions.
diff --git a/Makefile b/Makefile
@@ -0,0 +1,44 @@
+GB =go build
+BUILD_DIR =build
+
+ENV_LINUX =env GOOS=linux GOARCH=amd64
+ENV_WIN =env GOOS=windows GOARCH=amd64
+ENV_DARWIN =env GOOS=darwin GOARCH=amd64
+
+OUT_LINUX_DIR =$(BUILD_DIR)/linux-amd64
+OUT_WIN_DIR =$(BUILD_DIR)/windows-amd64
+OUT_DARWIN_DIR =$(BUILD_DIR)/darwin-amd64
+
+OUT_LINUX =$(OUT_LINUX_DIR)/mongofluxd
+OUT_WIN =$(OUT_WIN_DIR)/mongofluxd.exe
+OUT_DARWIN =$(OUT_DARWIN_DIR)/mongofluxd
+
+SHA256_LINUX =$(OUT_LINUX_DIR)/sha256.txt
+SHA256_WIN =$(OUT_WIN_DIR)/sha256.txt
+SHA256_DARWIN =$(OUT_DARWIN_DIR)/sha256.txt
+
+MD5_LINUX =$(OUT_LINUX_DIR)/md5.txt
+MD5_WIN =$(OUT_WIN_DIR)/md5.txt
+MD5_DARWIN =$(OUT_DARWIN_DIR)/md5.txt
+
+GIT_TAG =$(shell git rev-parse --short HEAD)
+
+LDFLAGS =-ldflags="-s -w"
+
+TARGET = mongofluxd
+
+all: $(TARGET)
+
+$(TARGET): $(TARGET).go
+	$(ENV_LINUX) $(GB) $(LDFLAGS) -v -o $(OUT_LINUX)
+	$(ENV_WIN) $(GB) $(LDFLAGS) -v -o $(OUT_WIN)
+	$(ENV_DARWIN) $(GB) $(LDFLAGS) -v -o $(OUT_DARWIN)
+	sha256sum $(OUT_LINUX) > $(SHA256_LINUX)
+	sha256sum $(OUT_WIN) > $(SHA256_WIN)
+	sha256sum $(OUT_DARWIN) > $(SHA256_DARWIN)
+	md5sum $(OUT_LINUX) > $(MD5_LINUX)
+	md5sum $(OUT_WIN) > $(MD5_WIN)
+	md5sum $(OUT_DARWIN) > $(MD5_DARWIN)
+	zip -r $(BUILD_DIR)/$(TARGET)-$(GIT_TAG).zip $(BUILD_DIR)
+clean:
+	$(RM) -R $(BUILD_DIR)
diff --git a/README.md b/README.md
@@ -1,2 +1,112 @@
 # mongofluxd
 Real time sync from MongoDB into InfluxDB
+
+### Installation
+
+Download the latest [release](https://github.com/rwynn/mongofluxd/releases) or install with go get
+
+	go get -v github.com/rwynn/mongofluxd
+
+### Usage
+
+Since mongofluxd uses the mongodb oplog it is required that MongoDB is configured to produce an oplog.
+
+This can be ensured by doing one of the following:
++ Setting up [replica sets](http://docs.mongodb.org/manual/tutorial/deploy-replica-set/)
++ Passing --master to the mongod process
++ Setting the following in /etc/mongod.conf
+
+	```
+	master = true
+	```
+
+Run mongofluxd with the -f option to point to a configuration file.  The configuration format is toml.
+
+A configuration looks like this:
+
+```toml
+influx-url = "http://localhost:8086"
+influx-skip-verify = true
+influx-auto-create-db = true
+#influx-pem-file = "/path/to/cert.pem"
+influx-clients = 10
+
+mongo-url = "localhost"
+# use the default MongoDB port on localhost
+mongo-skip-verify = true
+#mongo-pem-file = "/path/to/cert.pem"
+
+replay = false
+# process all events from the beginning of the oplog
+
+resume = false
+# save the timestamps of processed events for resuming later
+
+resume-name = "mongofluxd"
+# the key to store timestamps under in the collection mongoflux.resume
+
+verbose = false
+# output some information when points are written
+
+direct-reads = true
+# read events directly out of mongodb collections in addition to tailing the oplog
+
+exit-after-direct-reads = true
+# exit the process after direct reads have completed. defaults to false to continuously read events from the oplog
+
+[[measurement]]
+# this measurement will only apply to the collection test in db test
+# measurements are stored in an Influx DB matching the name of the MongoDB database
+namespace = "test.test"
+# fields must be document properties of type int, float, bool, or string
+# nested fields like "e.f" are supported, e.g. { e: { f: 1.5 }}
+fields = ["c", "d"]
+# optionally override the field to take time from.  defaults to the insertion ts at second precision
+# recommended if you need ms precision.  use Mongo's native Date object to get ms precision
+timefield = "t"
+# optionally override the time precision.  defaults to "s" since MongoDB oplog entries are to the second
+# use in conjunction with timefield and native Mongo Date to get ms precision
+precision = "ms"
+
+[[measurement]]
+namespace = "db.products"
+# optional tags must be top level document properties with string values
+tags = ["sku", "category"]
+fields = ["sales", "price"]
+# set the retention policy for this measurement
+retention = "RP1" 
+# override the measurement name which defaults to the name of the MongoDB collection
+measure = "sales"
+```
+
+### Some numbers
+
+Load 100K documents of time series data into MongoDB.
+
+        // sleep for 2ms to ensure t is 2ms apart
+	for (var i=0; i<100000; ++i) { sleep(2); var t = new Date(); db.test.insert({c: 1, d: 5.5, t: t}); }
+
+Run monfluxd with direct reads on test.test (config contents above)
+
+	time ./mongofluxd -f flux.toml
+
+	real    0m2.167s
+	user    0m2.028s
+	sys     0m0.444s
+
+Verify it all got into InfluxDB
+
+	Connected to http://localhost:8086 version 1.2.0
+	InfluxDB shell version: 1.2.0
+	> use test;
+	Using database test
+	> select count(*) from test;
+	name: test
+	time count_c count_d
+	---- ------- -------
+	0    100000  100000
+
+
+On a VirtualBox VM with 4 virtual cores and 4096 mb of memory, syncing 100K documents from MongoDB to InfluxDB
+took only 2.167 seconds for a throughput of 46,146 points per second.
+