From 7a0f9402e39facd0c48bd1b2dc01d864edb4d387 Mon Sep 17 00:00:00 2001 From: Emily Date: Fri, 15 Jan 2016 17:24:52 +0100 Subject: [PATCH 01/14] Micro benchmark driver tests --- Rakefile | 26 +++++++++ profile/benchmarking.rb | 11 ++++ profile/benchmarking/helper.rb | 59 +++++++++++++++++++ profile/benchmarking/micro.rb | 104 +++++++++++++++++++++++++++++++++ 4 files changed, 200 insertions(+) create mode 100644 profile/benchmarking.rb create mode 100644 profile/benchmarking/helper.rb create mode 100644 profile/benchmarking/micro.rb diff --git a/Rakefile b/Rakefile index 0e9edfa805..dd5afab403 100644 --- a/Rakefile +++ b/Rakefile @@ -41,3 +41,29 @@ namespace :docs do system "yardoc -o #{out} --title mongo-#{Mongo::VERSION}" end end + +require_relative "profile/benchmarking" + +namespace :benchmark do + desc "Run the driver benchmark tests" + + namespace :micro do + desc "Run the driver micro benchmark tests" + task :flat do + puts "MICRO BENCHMARK:: FLAT" + Mongo::Benchmarking::Micro.run(:flat) + end + + task :deep do + puts "MICRO BENCHMARK:: DEEP" + Mongo::Benchmarking::Micro.run(:deep) + end + + task :full do + puts "MICRO BENCHMARK:: FULL" + Mongo::Benchmarking::Micro.run(:full) + end + + task :all => [:flat, :deep, :full ] + end +end \ No newline at end of file diff --git a/profile/benchmarking.rb b/profile/benchmarking.rb new file mode 100644 index 0000000000..e4f889daf7 --- /dev/null +++ b/profile/benchmarking.rb @@ -0,0 +1,11 @@ +module Mongo + module Benchmarking + + CURRENT_PATH = File.expand_path(File.dirname(__FILE__)) + MICRO_TESTS_PATH = "#{CURRENT_PATH}/benchmarking/data/micro/" + + require 'benchmark' + require_relative 'benchmarking/helper' + require_relative 'benchmarking/micro' + end +end diff --git a/profile/benchmarking/helper.rb b/profile/benchmarking/helper.rb new file mode 100644 index 0000000000..e2c144bcb3 --- /dev/null +++ b/profile/benchmarking/helper.rb @@ -0,0 +1,59 @@ +module Mongo + + # Helper functions used by benchmarking tasks + module Benchmarking + + extend self + + # Load a json file and represent each document as a Hash. + # + # @example Load a file. + # Benchmarking.load_file(file_name) + # + # @param [ String ] The file name. + # + # @return [ Array ] A list of extended-json documents. + # + # @since 2.2.2 + def load_file(file_name) + File.open(file_name, "r") do |f| + f.each_line.collect do |line| + parse_json(line) + end + end + end + + # Load a json document as a Hash and convert BSON-specific types. + # Replace the _id field as an BSON::ObjectId if it's represented as '$oid'. + # + # @example Parse a json document. + # Benchmarking.parse_json(document) + # + # @param [ Hash ] The json document. + # + # @return [ Hash ] An extended-json document. + # + # @since 2.2.2 + def parse_json(document) + JSON.parse(document).tap do |doc| + if doc['_id'] && doc['_id']['$oid'] + doc['_id'] = BSON::ObjectId.from_string(doc['_id']['$oid']) + end + end + end + + # Get the median of values in a list. + # + # @example Get the median. + # Benchmarking.median(values) + # + # @param [ Array ] The values to get the median of. + # + # @return [ Numeric ] The median of the list. + # + # @since 2.2.2 + def median(values) + values.sort![values.size/2-1] + end + end +end diff --git a/profile/benchmarking/micro.rb b/profile/benchmarking/micro.rb new file mode 100644 index 0000000000..e69a4de222 --- /dev/null +++ b/profile/benchmarking/micro.rb @@ -0,0 +1,104 @@ +module Mongo + module Benchmarking + module Micro + + # The number of repetitions of the test to do before timing. + # + # @return [ Integer ] The number of warmup repetitions. + # + # @since 2.2.2 + WARMUP_REPETITIONS = 10 + + # The number of times to run and time the test. + # + # @return [ Integer ] The number of test repetitions. + # + # @since 2.2.2 + TEST_REPETITIONS = 100 + + extend self + + # Run a micro benchmark test. + # + # @example Run a test. + # Benchmarking::Micro.run(:flat) + # + # @param [ Symbol ] The type of test to run. + # + # @return [ Array ] An array of results. + # + # @since 2.2.2 + def run(type) + file = type.to_s << "_bson.json" + file_path = MICRO_TESTS_PATH + file + ['encode', 'decode'].collect do |method| + result = send(method, file_path, TEST_REPETITIONS) + puts "#{method} : #{result}" + end + end + + # Run an encoding micro benchmark test. + # + # @example Run an encoding test. + # Benchmarking::Micro.encode(file_name, 100) + # + # @param [ String ] The name of the file with data for the test. + # @param [ Integer ] The number of test repetitions. + # + # @return [ Numeric ] The median of the results. + # + # @since 2.2.2 + def encode(file_name, repetitions) + data = Benchmarking.load_file(file_name) + doc = BSON::Document.new(data.first) + + # WARMUP_REPETITIONS.times do + # doc.to_bson + # end + + Benchmarking.median(repetitions.times.collect do + Benchmark.realtime do + 10_000.times do + doc.to_bson + end + end + end) + end + + # Run a decoding micro benchmark test. + # + # @example Run an decoding test. + # Benchmarking::Micro.decode(file_name, 100) + # + # @param [ String ] The name of the file with data for the test. + # @param [ Integer ] The number of test repetitions. + # + # @return [ Numeric ] The median of the results. + # + # @since 2.2.2 + def decode(file_name, repetitions) + data = Benchmarking.load_file(file_name) + buffer = BSON::Document.new(data.first).to_bson + + bytes = buffer.get_bytes(buffer.length) + buffers = Array.new((10_000 * TEST_REPETITIONS) + WARMUP_REPETITIONS) + buffers.fill { BSON::ByteBuffer.new(bytes) } + + # WARMUP_REPETITIONS.times do + # BSON::Document.from_bson(buffers.shift) + # end + + results = repetitions.times.collect do + 10_000.times.collect do + buf = buffers.shift + Benchmark.realtime do + BSON::Document.from_bson(buf) + end + end.reduce(&:+) + end + + Benchmarking.median(results) + end + end + end +end From 98af5431cdbb1edd4c766f03170997e0f8fd76aa Mon Sep 17 00:00:00 2001 From: Emily Date: Mon, 18 Jan 2016 11:21:24 +0100 Subject: [PATCH 02/14] Reset read position on buffer instead of creating new ones --- profile/benchmarking/micro.rb | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/profile/benchmarking/micro.rb b/profile/benchmarking/micro.rb index e69a4de222..b560a86133 100644 --- a/profile/benchmarking/micro.rb +++ b/profile/benchmarking/micro.rb @@ -80,21 +80,17 @@ def decode(file_name, repetitions) data = Benchmarking.load_file(file_name) buffer = BSON::Document.new(data.first).to_bson - bytes = buffer.get_bytes(buffer.length) - buffers = Array.new((10_000 * TEST_REPETITIONS) + WARMUP_REPETITIONS) - buffers.fill { BSON::ByteBuffer.new(bytes) } - # WARMUP_REPETITIONS.times do # BSON::Document.from_bson(buffers.shift) # end results = repetitions.times.collect do - 10_000.times.collect do - buf = buffers.shift - Benchmark.realtime do - BSON::Document.from_bson(buf) + Benchmark.realtime do + 10_000.times do + buffer.reset_read_position + BSON::Document.from_bson(buffer) end - end.reduce(&:+) + end end Benchmarking.median(results) From 485a4bd6bd50096483aaec89c67c3f5d50748408 Mon Sep 17 00:00:00 2001 From: Emily Date: Mon, 18 Jan 2016 11:35:16 +0100 Subject: [PATCH 03/14] Single-document command benchmarking --- Rakefile | 12 +++++- profile/benchmarking.rb | 1 + profile/benchmarking/single_doc.rb | 59 ++++++++++++++++++++++++++++++ 3 files changed, 71 insertions(+), 1 deletion(-) create mode 100644 profile/benchmarking/single_doc.rb diff --git a/Rakefile b/Rakefile index dd5afab403..fcc3bdca98 100644 --- a/Rakefile +++ b/Rakefile @@ -66,4 +66,14 @@ namespace :benchmark do task :all => [:flat, :deep, :full ] end -end \ No newline at end of file + + namespace :single_doc do + desc "Run the common driver single-document benchmarking tests" + task :command do + puts "SINGLE-DOC BENCHMARK:: COMMAND" + Mongo::Benchmarking::SingleDoc.run(:command) + end + + + end +end diff --git a/profile/benchmarking.rb b/profile/benchmarking.rb index e4f889daf7..bf5387f114 100644 --- a/profile/benchmarking.rb +++ b/profile/benchmarking.rb @@ -7,5 +7,6 @@ module Benchmarking require 'benchmark' require_relative 'benchmarking/helper' require_relative 'benchmarking/micro' + require_relative 'benchmarking/single_doc' end end diff --git a/profile/benchmarking/single_doc.rb b/profile/benchmarking/single_doc.rb new file mode 100644 index 0000000000..dae775defc --- /dev/null +++ b/profile/benchmarking/single_doc.rb @@ -0,0 +1,59 @@ +module Mongo + module Benchmarking + module SingleDoc + + # The number of repetitions of the test to do before timing. + # + # @return [ Integer ] The number of warmup repetitions. + # + # @since 2.2.2 + WARMUP_REPETITIONS = 10 + + # The number of times to run and time the test. + # + # @return [ Integer ] The number of test repetitions. + # + # @since 2.2.2 + TEST_REPETITIONS = 5 + + extend self + + # Run a micro benchmark test. + # + # @example Run a test. + # Benchmarking::Micro.run(:flat) + # + # @param [ Symbol ] The type of test to run. + # + # @return [ Array ] An array of results. + # + # @since 2.2.2 + def run(type) + Mongo::Logger.logger.level = ::Logger::WARN + puts "#{type} : #{send(type, TEST_REPETITIONS)}" + end + + private + + def client + @client ||= Mongo::Client.new(["localhost:27017"], database: 'perftest') + end + + def collection + @collection ||= client[:corpus] + end + + def command(repetitions) + monitor = client.cluster.servers.first.monitor + results = repetitions.times.collect do + Benchmark.realtime do + 10_000.times do + monitor.connection.ismaster + end + end + end + Benchmarking.median(results) + end + end + end +end From 4b5c02f0451b83d838f9850a9776ad6514f57cf0 Mon Sep 17 00:00:00 2001 From: Emily Date: Mon, 18 Jan 2016 12:03:09 +0100 Subject: [PATCH 04/14] Single document find by id task --- Rakefile | 4 +++ profile/benchmarking.rb | 1 + profile/benchmarking/single_doc.rb | 48 ++++++++++++++++++++++++------ 3 files changed, 44 insertions(+), 9 deletions(-) diff --git a/Rakefile b/Rakefile index fcc3bdca98..ce5ff15312 100644 --- a/Rakefile +++ b/Rakefile @@ -74,6 +74,10 @@ namespace :benchmark do Mongo::Benchmarking::SingleDoc.run(:command) end + task :find_one_by_id do + puts "SINGLE_DOC BENCHMARK:: FIND ONE BY ID" + Mongo::Benchmarking::SingleDoc.run(:find_one_by_id) + end end end diff --git a/profile/benchmarking.rb b/profile/benchmarking.rb index bf5387f114..7bb3bfbeeb 100644 --- a/profile/benchmarking.rb +++ b/profile/benchmarking.rb @@ -3,6 +3,7 @@ module Benchmarking CURRENT_PATH = File.expand_path(File.dirname(__FILE__)) MICRO_TESTS_PATH = "#{CURRENT_PATH}/benchmarking/data/micro/" + TWEET_DOCUMENT_PATH = "#{CURRENT_PATH}/benchmarking/data/single-doc/" require 'benchmark' require_relative 'benchmarking/helper' diff --git a/profile/benchmarking/single_doc.rb b/profile/benchmarking/single_doc.rb index dae775defc..c1a664fefa 100644 --- a/profile/benchmarking/single_doc.rb +++ b/profile/benchmarking/single_doc.rb @@ -14,7 +14,14 @@ module SingleDoc # @return [ Integer ] The number of test repetitions. # # @since 2.2.2 - TEST_REPETITIONS = 5 + TEST_REPETITIONS = 1 + + # The file containing the single tweet document. + # + # @return [ String ] The file containing the tweet document. + # + # @since 2.2.2 + TWEET_DOCUMENT_FILE = TWEET_DOCUMENT_PATH + 'TWEET.json' extend self @@ -35,14 +42,6 @@ def run(type) private - def client - @client ||= Mongo::Client.new(["localhost:27017"], database: 'perftest') - end - - def collection - @collection ||= client[:corpus] - end - def command(repetitions) monitor = client.cluster.servers.first.monitor results = repetitions.times.collect do @@ -54,6 +53,37 @@ def command(repetitions) end Benchmarking.median(results) end + + def find_one_by_id(repetitions) + client.database.drop + doc = tweet_document + + 10_000.times do |i| + doc[:_id] = i + collection.insert_one(doc) + end + + results = repetitions.times.collect do + Benchmark.realtime do + 10_000.times do |i| + collection.find({ _id: i }, limit: -1).first + end + end + end + Benchmarking.median(results) + end + + def client + @client ||= Mongo::Client.new(["localhost:27017"], database: 'perftest') + end + + def collection + @collection ||= client[:corpus] + end + + def tweet_document + Benchmarking.load_file(TWEET_DOCUMENT_FILE).first + end end end end From 24e0d71b7fe07ecbfc71623671e52a291a70ecaf Mon Sep 17 00:00:00 2001 From: Emily Date: Mon, 18 Jan 2016 12:37:00 +0100 Subject: [PATCH 05/14] Small and Large insert one benchmarks --- Rakefile | 15 ++++++-- profile/benchmarking.rb | 2 +- profile/benchmarking/single_doc.rb | 55 ++++++++++++++++++++++++++---- 3 files changed, 63 insertions(+), 9 deletions(-) diff --git a/Rakefile b/Rakefile index ce5ff15312..265815f8ea 100644 --- a/Rakefile +++ b/Rakefile @@ -74,10 +74,21 @@ namespace :benchmark do Mongo::Benchmarking::SingleDoc.run(:command) end - task :find_one_by_id do + task :find_one do puts "SINGLE_DOC BENCHMARK:: FIND ONE BY ID" - Mongo::Benchmarking::SingleDoc.run(:find_one_by_id) + Mongo::Benchmarking::SingleDoc.run(:find_one) end + task :insert_one_small do + puts "SINGLE_DOC BENCHMARK:: INSERT ONE SMALL DOCUMENT" + Mongo::Benchmarking::SingleDoc.run(:insert_one_small) + end + + task :insert_one_large do + puts "SINGLE_DOC BENCHMARK:: INSERT ONE LARGE DOCUMENT" + Mongo::Benchmarking::SingleDoc.run(:insert_one_large) + end + + task :all => [:command, :find_one, :insert_one_small, :insert_one_large ] end end diff --git a/profile/benchmarking.rb b/profile/benchmarking.rb index 7bb3bfbeeb..e84356ace2 100644 --- a/profile/benchmarking.rb +++ b/profile/benchmarking.rb @@ -3,7 +3,7 @@ module Benchmarking CURRENT_PATH = File.expand_path(File.dirname(__FILE__)) MICRO_TESTS_PATH = "#{CURRENT_PATH}/benchmarking/data/micro/" - TWEET_DOCUMENT_PATH = "#{CURRENT_PATH}/benchmarking/data/single-doc/" + SINGLE_DOCUMENTS_PATH = "#{CURRENT_PATH}/benchmarking/data/single-doc/" require 'benchmark' require_relative 'benchmarking/helper' diff --git a/profile/benchmarking/single_doc.rb b/profile/benchmarking/single_doc.rb index c1a664fefa..5b4c6d36bc 100644 --- a/profile/benchmarking/single_doc.rb +++ b/profile/benchmarking/single_doc.rb @@ -14,14 +14,28 @@ module SingleDoc # @return [ Integer ] The number of test repetitions. # # @since 2.2.2 - TEST_REPETITIONS = 1 + TEST_REPETITIONS = 10 # The file containing the single tweet document. # # @return [ String ] The file containing the tweet document. # # @since 2.2.2 - TWEET_DOCUMENT_FILE = TWEET_DOCUMENT_PATH + 'TWEET.json' + TWEET_DOCUMENT_FILE = SINGLE_DOCUMENTS_PATH + 'TWEET.json' + + # The file containing the single small document. + # + # @return [ String ] The file containing the small document. + # + # @since 2.2.2 + SMALL_DOCUMENT_FILE = SINGLE_DOCUMENTS_PATH + 'SMALL_DOC.json' + + # The file containing the single large document. + # + # @return [ String ] The file containing the large document. + # + # @since 2.2.2 + LARGE_DOCUMENT_FILE = SINGLE_DOCUMENTS_PATH + 'LARGE_DOC.json' extend self @@ -37,12 +51,12 @@ module SingleDoc # @since 2.2.2 def run(type) Mongo::Logger.logger.level = ::Logger::WARN - puts "#{type} : #{send(type, TEST_REPETITIONS)}" + puts "#{type} : #{send(type)}" end private - def command(repetitions) + def command(repetitions = TEST_REPETITIONS) monitor = client.cluster.servers.first.monitor results = repetitions.times.collect do Benchmark.realtime do @@ -54,7 +68,7 @@ def command(repetitions) Benchmarking.median(results) end - def find_one_by_id(repetitions) + def find_one(repetitions = TEST_REPETITIONS) client.database.drop doc = tweet_document @@ -73,17 +87,46 @@ def find_one_by_id(repetitions) Benchmarking.median(results) end + def insert_one_large(repetitions = TEST_REPETITIONS) + insert_one(repetitions, 10, large_document) + end + + def insert_one_small(repetitions = TEST_REPETITIONS) + insert_one(repetitions, 10_000, small_document) + end + + def insert_one(repetitions, do_repetitions, doc) + client.database.drop + + results = repetitions.times.collect do + Benchmark.realtime do + do_repetitions.times do + collection.insert_one(doc) + end + end + end + Benchmarking.median(results) + end + def client @client ||= Mongo::Client.new(["localhost:27017"], database: 'perftest') end def collection - @collection ||= client[:corpus] + @collection ||= client[:corpus].tap { |coll| coll.create } end def tweet_document Benchmarking.load_file(TWEET_DOCUMENT_FILE).first end + + def small_document + Benchmarking.load_file(SMALL_DOCUMENT_FILE).first + end + + def large_document + Benchmarking.load_file(LARGE_DOCUMENT_FILE).first + end end end end From 269454752aa5295aaa76ceb0b7a68e7bcb836a9f Mon Sep 17 00:00:00 2001 From: Emily Date: Mon, 18 Jan 2016 17:22:18 +0100 Subject: [PATCH 06/14] Multi Document profile tests and refactoring --- Rakefile | 31 ++++- profile/benchmarking.rb | 98 +++++++++++++++- profile/benchmarking/micro.rb | 73 ++++++------ profile/benchmarking/multi_doc.rb | 179 +++++++++++++++++++++++++++++ profile/benchmarking/single_doc.rb | 138 ++++++++++++---------- 5 files changed, 416 insertions(+), 103 deletions(-) create mode 100644 profile/benchmarking/multi_doc.rb diff --git a/Rakefile b/Rakefile index 265815f8ea..72f223450d 100644 --- a/Rakefile +++ b/Rakefile @@ -48,7 +48,7 @@ namespace :benchmark do desc "Run the driver benchmark tests" namespace :micro do - desc "Run the driver micro benchmark tests" + desc "Run the common driver micro benchmarking tests" task :flat do puts "MICRO BENCHMARK:: FLAT" Mongo::Benchmarking::Micro.run(:flat) @@ -91,4 +91,33 @@ namespace :benchmark do task :all => [:command, :find_one, :insert_one_small, :insert_one_large ] end + + namespace :multi_doc do + desc "Run the common driver multi-document benchmarking tests" + task :find_many do + puts "MULTI DOCUMENT BENCHMARK:: FIND MANY" + Mongo::Benchmarking::MultiDoc.run(:find_many) + end + + task :bulk_insert_small do + puts "MULTI DOCUMENT BENCHMARK:: BULK INSERT SMALL" + Mongo::Benchmarking::MultiDoc.run(:bulk_insert_small) + end + + task :bulk_insert_large do + puts "MULTI DOCUMENT BENCHMARK:: BULK INSERT LARGE" + Mongo::Benchmarking::MultiDoc.run(:bulk_insert_large) + end + + task :gridfs_upload do + puts "MULTI DOCUMENT BENCHMARK:: GRIDFS UPLOAD" + Mongo::Benchmarking::MultiDoc.run(:gridfs_upload) + end + + task :gridfs_download do + puts "MULTI DOCUMENT BENCHMARK:: GRIDFS DOWNLOAD" + Mongo::Benchmarking::MultiDoc.run(:gridfs_download) + end + + end end diff --git a/profile/benchmarking.rb b/profile/benchmarking.rb index e84356ace2..a7205a8dfa 100644 --- a/profile/benchmarking.rb +++ b/profile/benchmarking.rb @@ -1,13 +1,99 @@ +# Copyright (C) 2015 MongoDB, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +require 'benchmark' +require_relative 'benchmarking/helper' +require_relative 'benchmarking/micro' +require_relative 'benchmarking/single_doc' +require_relative 'benchmarking/multi_doc' + module Mongo + + # Module with all functionality for running driver benchmark tests. + # + # @since 2.2.2 module Benchmarking + extend self + + # The current path. + # + # @return [ String ] The current path. + # + # @since 2.2.2 CURRENT_PATH = File.expand_path(File.dirname(__FILE__)) - MICRO_TESTS_PATH = "#{CURRENT_PATH}/benchmarking/data/micro/" - SINGLE_DOCUMENTS_PATH = "#{CURRENT_PATH}/benchmarking/data/single-doc/" - require 'benchmark' - require_relative 'benchmarking/helper' - require_relative 'benchmarking/micro' - require_relative 'benchmarking/single_doc' + # The path to data files used in Benchmarking tests. + # + # @return [ String ] Path to Benchmarking test files. + # + # @since 2.2.2 + DATA_PATH = "#{CURRENT_PATH}/benchmarking/data/" + + # The file containing the single tweet document. + # + # @return [ String ] The file containing the tweet document. + # + # @since 2.2.2 + TWEET_DOCUMENT_FILE = "#{DATA_PATH}TWEET.json" + + # The file containing the single small document. + # + # @return [ String ] The file containing the small document. + # + # @since 2.2.2 + SMALL_DOCUMENT_FILE = "#{DATA_PATH}SMALL_DOC.json" + + # The file containing the single large document. + # + # @return [ String ] The file containing the large document. + # + # @since 2.2.2 + LARGE_DOCUMENT_FILE = "#{DATA_PATH}LARGE_DOC.json" + + # The file to upload when testing GridFS. + # + # @return [ String ] The file containing the GridFS test data. + # + # @since 2.2.2 + GRIDFS_FILE = "#{DATA_PATH}GRIDFS_LARGE" + + # The default number of test repetitions. + # + # @return [ Integer ] The number of test repetitions. + # + # @since 2.2.2 + TEST_REPETITIONS = 10 + + # The number of default warmup repetitions of the test to do before + # recording times. + # + # @return [ Integer ] The default number of warmup repetitions. + # + # @since 2.2.2 + WARMUP_REPETITIONS = 10 + + def tweet_document + Benchmarking.load_file(TWEET_DOCUMENT_FILE).first + end + + def small_document + Benchmarking.load_file(SMALL_DOCUMENT_FILE).first + end + + def large_document + Benchmarking.load_file(LARGE_DOCUMENT_FILE).first + end end end diff --git a/profile/benchmarking/micro.rb b/profile/benchmarking/micro.rb index b560a86133..04a73e2604 100644 --- a/profile/benchmarking/micro.rb +++ b/profile/benchmarking/micro.rb @@ -1,20 +1,25 @@ +# Copyright (C) 2015 MongoDB, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + module Mongo module Benchmarking - module Micro - # The number of repetitions of the test to do before timing. - # - # @return [ Integer ] The number of warmup repetitions. - # - # @since 2.2.2 - WARMUP_REPETITIONS = 10 - - # The number of times to run and time the test. - # - # @return [ Integer ] The number of test repetitions. - # - # @since 2.2.2 - TEST_REPETITIONS = 100 + # These tests focus on BSON encoding and decoding; they are client-side only and + # do not involve any transmission of data to or from the server. + # + # @since 2.2.2 + module Micro extend self @@ -23,55 +28,56 @@ module Micro # @example Run a test. # Benchmarking::Micro.run(:flat) # - # @param [ Symbol ] The type of test to run. + # @param [ Symbol ] type The type of test to run. + # @param [ Integer ] repetitions The number of test repetitions. # - # @return [ Array ] An array of results. + # @return [ Numeric ] The test results. # # @since 2.2.2 - def run(type) - file = type.to_s << "_bson.json" - file_path = MICRO_TESTS_PATH + file - ['encode', 'decode'].collect do |method| - result = send(method, file_path, TEST_REPETITIONS) - puts "#{method} : #{result}" + def run(type, repetitions = Benchmarking::TEST_REPETITIONS) + file_name = type.to_s << "_bson.json" + file_path = Benchmarking::DATA_PATH + file_name + ['encode'].each do |task| + puts "#{task} : #{send(task, file_path, repetitions)}" end end # Run an encoding micro benchmark test. # # @example Run an encoding test. - # Benchmarking::Micro.encode(file_name, 100) + # Benchmarking::Micro.encode(file_name) # - # @param [ String ] The name of the file with data for the test. - # @param [ Integer ] The number of test repetitions. + # @param [ String ] file_name The name of the file with data for the test. + # @param [ Integer ] repetitions The number of test repetitions. # # @return [ Numeric ] The median of the results. # # @since 2.2.2 def encode(file_name, repetitions) data = Benchmarking.load_file(file_name) - doc = BSON::Document.new(data.first) + document = BSON::Document.new(data.first) # WARMUP_REPETITIONS.times do # doc.to_bson # end - Benchmarking.median(repetitions.times.collect do + results = repetitions.times.collect do Benchmark.realtime do 10_000.times do - doc.to_bson + document.to_bson end end - end) + end + Benchmarking.median(results) end # Run a decoding micro benchmark test. # # @example Run an decoding test. - # Benchmarking::Micro.decode(file_name, 100) + # Benchmarking::Micro.decode(file_name) # - # @param [ String ] The name of the file with data for the test. - # @param [ Integer ] The number of test repetitions. + # @param [ String ] file_name The name of the file with data for the test. + # @param [ Integer ] repetitions The number of test repetitions. # # @return [ Numeric ] The median of the results. # @@ -87,12 +93,11 @@ def decode(file_name, repetitions) results = repetitions.times.collect do Benchmark.realtime do 10_000.times do - buffer.reset_read_position + buffer.rewind BSON::Document.from_bson(buffer) end end end - Benchmarking.median(results) end end diff --git a/profile/benchmarking/multi_doc.rb b/profile/benchmarking/multi_doc.rb new file mode 100644 index 0000000000..d4ce0de874 --- /dev/null +++ b/profile/benchmarking/multi_doc.rb @@ -0,0 +1,179 @@ +# Copyright (C) 2015 MongoDB, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +module Mongo + module Benchmarking + + # Multi-doc benchmarks focus on multiple-document read and write operations. + # They are designed to give insight into the efficiency of the driver's implementation + # of bulk/batch operations such as bulk writes and cursor reads. + # + # @since 2.2.2 + module MultiDoc + + extend self + + # Run a multi-document benchmark test. + # + # @example Run a test. + # Benchmarking::MultiDoc.run(:find_many) + # + # @param [ Symbol ] type The type of test to run. + # @param [ Integer ] repetitions The number of test repetitions. + # + # @return [ Numeric ] The test results. + # + # @since 2.2.2 + def run(type, repetitions = Benchmarking::TEST_REPETITIONS) + Mongo::Logger.logger.level = ::Logger::WARN + puts "#{type} : #{send(type, repetitions)}" + end + + # Test finding many documents. + # + # @example Test sending a find and exhausting the cursor. + # Benchmarking::MultiDoc.find_many(10) + # + # @param [ Integer ] repetitions The number of test repetitions. + # + # @return [ Numeric ] The median of the results. + # + # @since 2.2.2 + def find_many(repetitions) + client.database.drop + doc = Benchmarking.tweet_document + + 10_000.times do |i| + collection.insert_one(doc) + end + + results = repetitions.times.collect do + Benchmark.realtime do + collection.find.to_a + end + end + client.database.drop + Benchmarking.median(results) + end + + # Test doing a bulk insert of small documents. + # + # @example Test bulk insert of small documents. + # Benchmarking::MultiDoc.bulk_insert_small(10) + # + # @param [ Integer ] repetitions The number of test repetitions. + # + # @return [ Numeric ] The median of the results. + # + # @since 2.2.2 + def bulk_insert_small(repetitions) + bulk_insert(repetitions, [Benchmarking.small_document] * 10_000) + end + + # Test doing a bulk insert of large documents. + # + # @example Test bulk insert of large documents. + # Benchmarking::MultiDoc.bulk_insert_large(10) + # + # @param [ Integer ] repetitions The number of test repetitions. + # + # @return [ Numeric ] The median of the results. + # + # @since 2.2.2 + def bulk_insert_large(repetitions) + bulk_insert(repetitions, [Benchmarking.large_document] * 10) + end + + # Test uploading to GridFS. + # + # @example Test uploading to GridFS. + # Benchmarking::MultiDoc.gridfs_upload(10) + # + # @param [ Integer ] repetitions The number of test repetitions. + # + # @return [ Numeric ] The median of the results. + # + # @since 2.2.2 + def gridfs_upload(repetitions) + client.database.drop + create_collection + fs = client.with(write: { w: 1 }).database.fs(write: { w: 1}) + + s = StringIO.new('a') + fs.upload_from_stream('create-indices.test', s) + + file = File.open(GRIDFS_FILE) + + results = repetitions.times.collect do + file.rewind + Benchmark.realtime do + fs.upload_from_stream('GRIDFS_LARGE', file) + end + end + Benchmarking.median(results) + end + + # Test downloading from GridFS. + # + # @example Test downloading from GridFS. + # Benchmarking::MultiDoc.gridfs_download(10) + # + # @param [ Integer ] repetitions The number of test repetitions. + # + # @return [ Numeric ] The median of the results. + # + # @since 2.2.2 + def gridfs_download(repetitions = Benchmarking::TEST_REPETITIONS) + client.database.drop + create_collection + fs = client.with(write: { w: 1 }).database.fs(write: { w: 1}) + + file_id = fs.upload_from_stream('gridfstest', File.open(GRIDFS_FILE)) + io = StringIO.new + + results = repetitions.times.collect do + io.rewind + Benchmark.realtime do + fs.download_to_stream(file_id, io) + end + end + Benchmarking.median(results) + end + + private + + def bulk_insert(repetitions, docs) + client.database.drop + create_collection + + results = repetitions.times.collect do + Benchmark.realtime do + collection.insert_many(docs) + end + end + client.database.drop + Benchmarking.median(results) + end + + def client + @client ||= Mongo::Client.new(["localhost:27017"], database: 'perftest') + end + + def collection + @collection ||= client[:corpus].tap { |coll| coll.create } + end + alias :create_collection :collection + end + end +end diff --git a/profile/benchmarking/single_doc.rb b/profile/benchmarking/single_doc.rb index 5b4c6d36bc..c1c786b061 100644 --- a/profile/benchmarking/single_doc.rb +++ b/profile/benchmarking/single_doc.rb @@ -1,62 +1,54 @@ +# Copyright (C) 2015 MongoDB, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + module Mongo module Benchmarking - module SingleDoc - - # The number of repetitions of the test to do before timing. - # - # @return [ Integer ] The number of warmup repetitions. - # - # @since 2.2.2 - WARMUP_REPETITIONS = 10 - - # The number of times to run and time the test. - # - # @return [ Integer ] The number of test repetitions. - # - # @since 2.2.2 - TEST_REPETITIONS = 10 - - # The file containing the single tweet document. - # - # @return [ String ] The file containing the tweet document. - # - # @since 2.2.2 - TWEET_DOCUMENT_FILE = SINGLE_DOCUMENTS_PATH + 'TWEET.json' - # The file containing the single small document. - # - # @return [ String ] The file containing the small document. - # - # @since 2.2.2 - SMALL_DOCUMENT_FILE = SINGLE_DOCUMENTS_PATH + 'SMALL_DOC.json' - - # The file containing the single large document. - # - # @return [ String ] The file containing the large document. - # - # @since 2.2.2 - LARGE_DOCUMENT_FILE = SINGLE_DOCUMENTS_PATH + 'LARGE_DOC.json' + # Single-doc tests focus on single-document read and write operations. + # They are designed to give insights into the efficiency of the driver's + # implementation of the basic wire protocol. + # + # @since 2.2.2 + module SingleDoc extend self - # Run a micro benchmark test. + # Run a Single Document benchmark test. # # @example Run a test. - # Benchmarking::Micro.run(:flat) + # Benchmarking::SingleDoc.run(:command) # - # @param [ Symbol ] The type of test to run. + # @param [ Symbol ] type The type of test to run. + # @param [ Integer ] repetitions The number of test repetitions. # - # @return [ Array ] An array of results. + # @return [ Numberic ] The test results. # # @since 2.2.2 - def run(type) + def run(type, repetitions = Benchmarking::TEST_REPETITIONS) Mongo::Logger.logger.level = ::Logger::WARN - puts "#{type} : #{send(type)}" + puts "#{type} : #{send(type, repetitions)}" end - private - - def command(repetitions = TEST_REPETITIONS) + # Test sending a command to the server. + # + # @example Test sending an ismaster command. + # Benchmarking::SingleDoc.command(10) + # + # @param [ Integer ] repetitions The number of test repetitions. + # + # @since 2.2.2 + def command(repetitions) monitor = client.cluster.servers.first.monitor results = repetitions.times.collect do Benchmark.realtime do @@ -68,9 +60,19 @@ def command(repetitions = TEST_REPETITIONS) Benchmarking.median(results) end - def find_one(repetitions = TEST_REPETITIONS) + # Test sending find one by id. + # + # @example Test sending a find. + # Benchmarking::SingleDoc.find_one(10) + # + # @param [ Integer ] repetitions The number of test repetitions. + # + # @return [ Numeric ] The median of the results. + # + # @since 2.2.2 + def find_one(repetitions) client.database.drop - doc = tweet_document + doc = Benchmarking.tweet_document 10_000.times do |i| doc[:_id] = i @@ -87,16 +89,39 @@ def find_one(repetitions = TEST_REPETITIONS) Benchmarking.median(results) end - def insert_one_large(repetitions = TEST_REPETITIONS) - insert_one(repetitions, 10, large_document) + # Test inserting a large document. + # + # @example Test inserting a large document. + # Benchmarking::SingleDoc.insert_one_large(10) + # + # @param [ Integer ] repetitions The number of test repetitions. + # + # @return [ Numeric ] The median of the results. + # + # @since 2.2.2 + def insert_one_large(repetitions) + insert_one(repetitions, 10, Benchmarking.large_document) end - def insert_one_small(repetitions = TEST_REPETITIONS) - insert_one(repetitions, 10_000, small_document) + # Test inserting a small document. + # + # @example Test inserting a small document. + # Benchmarking::SingleDoc.insert_one_small(10) + # + # @param [ Integer ] repetitions The number of test repetitions. + # + # @return [ Numeric ] The median of the results. + # + # @since 2.2.2 + def insert_one_small(repetitions) + insert_one(repetitions, 10_000, Benchmarking.small_document) end + private + def insert_one(repetitions, do_repetitions, doc) client.database.drop + create_collection results = repetitions.times.collect do Benchmark.realtime do @@ -115,18 +140,7 @@ def client def collection @collection ||= client[:corpus].tap { |coll| coll.create } end - - def tweet_document - Benchmarking.load_file(TWEET_DOCUMENT_FILE).first - end - - def small_document - Benchmarking.load_file(SMALL_DOCUMENT_FILE).first - end - - def large_document - Benchmarking.load_file(LARGE_DOCUMENT_FILE).first - end + alias :create_collection :collection end end end From 486c02711ee54a07e931e88913a0070dd4090e72 Mon Sep 17 00:00:00 2001 From: Emily Date: Tue, 19 Jan 2016 15:56:12 +0100 Subject: [PATCH 07/14] Freeze Benchmarking constants --- profile/benchmarking.rb | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/profile/benchmarking.rb b/profile/benchmarking.rb index a7205a8dfa..5500cb5366 100644 --- a/profile/benchmarking.rb +++ b/profile/benchmarking.rb @@ -32,49 +32,49 @@ module Benchmarking # @return [ String ] The current path. # # @since 2.2.2 - CURRENT_PATH = File.expand_path(File.dirname(__FILE__)) + CURRENT_PATH = File.expand_path(File.dirname(__FILE__)).freeze # The path to data files used in Benchmarking tests. # # @return [ String ] Path to Benchmarking test files. # # @since 2.2.2 - DATA_PATH = "#{CURRENT_PATH}/benchmarking/data/" + DATA_PATH = "#{CURRENT_PATH}/benchmarking/data/".freeze # The file containing the single tweet document. # # @return [ String ] The file containing the tweet document. # # @since 2.2.2 - TWEET_DOCUMENT_FILE = "#{DATA_PATH}TWEET.json" + TWEET_DOCUMENT_FILE = "#{DATA_PATH}TWEET.json".freeze # The file containing the single small document. # # @return [ String ] The file containing the small document. # # @since 2.2.2 - SMALL_DOCUMENT_FILE = "#{DATA_PATH}SMALL_DOC.json" + SMALL_DOCUMENT_FILE = "#{DATA_PATH}SMALL_DOC.json".freeze # The file containing the single large document. # # @return [ String ] The file containing the large document. # # @since 2.2.2 - LARGE_DOCUMENT_FILE = "#{DATA_PATH}LARGE_DOC.json" + LARGE_DOCUMENT_FILE = "#{DATA_PATH}LARGE_DOC.json".freeze # The file to upload when testing GridFS. # # @return [ String ] The file containing the GridFS test data. # # @since 2.2.2 - GRIDFS_FILE = "#{DATA_PATH}GRIDFS_LARGE" + GRIDFS_FILE = "#{DATA_PATH}GRIDFS_LARGE".freeze # The default number of test repetitions. # # @return [ Integer ] The number of test repetitions. # # @since 2.2.2 - TEST_REPETITIONS = 10 + TEST_REPETITIONS = 100.freeze # The number of default warmup repetitions of the test to do before # recording times. @@ -82,7 +82,7 @@ module Benchmarking # @return [ Integer ] The default number of warmup repetitions. # # @since 2.2.2 - WARMUP_REPETITIONS = 10 + WARMUP_REPETITIONS = 10.freeze def tweet_document Benchmarking.load_file(TWEET_DOCUMENT_FILE).first From 96e4d942e2a009d24a89d12119f423bc1450b363 Mon Sep 17 00:00:00 2001 From: Emily Date: Wed, 20 Jan 2016 12:24:40 +0100 Subject: [PATCH 08/14] Correct name of rewind method and split micro benchmarks into en/decoding --- Rakefile | 47 +++++++++++++++++++++++++---------- profile/benchmarking/micro.rb | 9 +++---- 2 files changed, 38 insertions(+), 18 deletions(-) diff --git a/Rakefile b/Rakefile index 72f223450d..594c20fa0d 100644 --- a/Rakefile +++ b/Rakefile @@ -49,22 +49,45 @@ namespace :benchmark do namespace :micro do desc "Run the common driver micro benchmarking tests" - task :flat do - puts "MICRO BENCHMARK:: FLAT" - Mongo::Benchmarking::Micro.run(:flat) - end - task :deep do - puts "MICRO BENCHMARK:: DEEP" - Mongo::Benchmarking::Micro.run(:deep) + namespace :flat do + desc "Benchmarking for flat bson documents" + task :encode do + puts "MICRO BENCHMARK:: FLAT:: ENCODE" + Mongo::Benchmarking::Micro.run(:flat, :encode) + end + + task :decode do + puts "MICRO BENCHMARK:: FLAT:: DECODE" + Mongo::Benchmarking::Micro.run(:flat, :decode) + end end - task :full do - puts "MICRO BENCHMARK:: FULL" - Mongo::Benchmarking::Micro.run(:full) + namespace :deep do + desc "Benchmarking for deep bson documents" + task :encode do + puts "MICRO BENCHMARK:: DEEP:: ENCODE" + Mongo::Benchmarking::Micro.run(:deep, :encode) + end + + task :decode do + puts "MICRO BENCHMARK:: DEEP:: DECODE" + Mongo::Benchmarking::Micro.run(:deep, :decode) + end end - task :all => [:flat, :deep, :full ] + namespace :full do + desc "Benchmarking for full bson documents" + task :encode do + puts "MICRO BENCHMARK:: FULL:: ENCODE" + Mongo::Benchmarking::Micro.run(:full, :encode) + end + + task :decode do + puts "MICRO BENCHMARK:: FULL:: DECODE" + Mongo::Benchmarking::Micro.run(:full, :decode) + end + end end namespace :single_doc do @@ -88,8 +111,6 @@ namespace :benchmark do puts "SINGLE_DOC BENCHMARK:: INSERT ONE LARGE DOCUMENT" Mongo::Benchmarking::SingleDoc.run(:insert_one_large) end - - task :all => [:command, :find_one, :insert_one_small, :insert_one_large ] end namespace :multi_doc do diff --git a/profile/benchmarking/micro.rb b/profile/benchmarking/micro.rb index 04a73e2604..802bfda74f 100644 --- a/profile/benchmarking/micro.rb +++ b/profile/benchmarking/micro.rb @@ -34,12 +34,11 @@ module Micro # @return [ Numeric ] The test results. # # @since 2.2.2 - def run(type, repetitions = Benchmarking::TEST_REPETITIONS) + def run(type, action, repetitions = Benchmarking::TEST_REPETITIONS) file_name = type.to_s << "_bson.json" + GC.disable file_path = Benchmarking::DATA_PATH + file_name - ['encode'].each do |task| - puts "#{task} : #{send(task, file_path, repetitions)}" - end + puts "#{action} : #{send(action, file_path, repetitions)}" end # Run an encoding micro benchmark test. @@ -93,8 +92,8 @@ def decode(file_name, repetitions) results = repetitions.times.collect do Benchmark.realtime do 10_000.times do - buffer.rewind BSON::Document.from_bson(buffer) + buffer.rewind! end end end From f9ff16285205a42514e8b69c1625405db6fce09d Mon Sep 17 00:00:00 2001 From: Emily Date: Fri, 22 Jan 2016 17:44:20 +0100 Subject: [PATCH 09/14] Add the parallel ETL import tasks --- Rakefile | 7 ++ profile/benchmarking.rb | 8 ++ profile/benchmarking/multi_doc.rb | 2 +- profile/benchmarking/parallel.rb | 124 ++++++++++++++++++++++++++++++ 4 files changed, 140 insertions(+), 1 deletion(-) create mode 100644 profile/benchmarking/parallel.rb diff --git a/Rakefile b/Rakefile index 594c20fa0d..7268dc69cb 100644 --- a/Rakefile +++ b/Rakefile @@ -139,6 +139,13 @@ namespace :benchmark do puts "MULTI DOCUMENT BENCHMARK:: GRIDFS DOWNLOAD" Mongo::Benchmarking::MultiDoc.run(:gridfs_download) end + end + namespace :parallel do + desc "Run the common driver paralell ETL benchmarking tests" + task :import do + puts "PARALLEL ETL BENCHMARK:: IMPORT" + Mongo::Benchmarking::Parallel.run(:import) + end end end diff --git a/profile/benchmarking.rb b/profile/benchmarking.rb index 5500cb5366..1a9bbd000c 100644 --- a/profile/benchmarking.rb +++ b/profile/benchmarking.rb @@ -17,6 +17,7 @@ require_relative 'benchmarking/micro' require_relative 'benchmarking/single_doc' require_relative 'benchmarking/multi_doc' +require_relative 'benchmarking/parallel' module Mongo @@ -69,6 +70,13 @@ module Benchmarking # @since 2.2.2 GRIDFS_FILE = "#{DATA_PATH}GRIDFS_LARGE".freeze + # The file path and base name for the LDJSON files. + # + # @return [ String ] The file path and base name for the LDJSON files. + # + # @since 2.2.2 + LDJSON_FILE_BASE = "#{DATA_PATH}LDJSON_MULTI/LDJSON".freeze + # The default number of test repetitions. # # @return [ Integer ] The number of test repetitions. diff --git a/profile/benchmarking/multi_doc.rb b/profile/benchmarking/multi_doc.rb index d4ce0de874..bf1436acea 100644 --- a/profile/benchmarking/multi_doc.rb +++ b/profile/benchmarking/multi_doc.rb @@ -167,7 +167,7 @@ def bulk_insert(repetitions, docs) end def client - @client ||= Mongo::Client.new(["localhost:27017"], database: 'perftest') + @client ||= Mongo::Client.new(["localhost:27017"], database: 'perftest', monitoring: false) end def collection diff --git a/profile/benchmarking/parallel.rb b/profile/benchmarking/parallel.rb new file mode 100644 index 0000000000..76d7a1434a --- /dev/null +++ b/profile/benchmarking/parallel.rb @@ -0,0 +1,124 @@ +# Copyright (C) 2015 MongoDB, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +#require 'ruby-prof' + +module Mongo + module Benchmarking + + # Parallel tests simulate ETL operations from disk to database or vice-versa. + # They are designed to be implemented using a language's preferred approach to + # concurrency and thus stress how drivers handle concurrency. + # These intentionally involve overhead above and beyond the driver itself to + # simulate the sort of "real-world" pressures that a drivers would be under + # during concurrent operation. + # + # @since 2.2.2 + module Parallel + + extend self + + # Run a parallel benchmark test. + # + # @example Run a test. + # Benchmarking::Parallel.run(:import) + # + # @param [ Symbol ] type The type of test to run. + # + # @return [ Numeric ] The test results. + # + # @since 2.2.2 + def run(type) + Mongo::Logger.logger.level = ::Logger::WARN + type = type.to_s + '_jruby' if BSON::Environment.jruby? + puts "#{type} : #{send(type)}" + end + + # Test concurrently importing documents from a set of files. + # Using JRuby. + # + # @example Testing concurrently importing files using JRuby. + # Benchmarking::Parallel.import_jruby + # + # @return [ Numeric ] The test result. + # + # @since 2.2.2 + def import_jruby + #require 'jrjackson' + client.database.drop + create_collection + files = [*1..100].collect { |i| "#{LDJSON_FILE_BASE}#{i.to_s.rjust(3, "0")}.txt" } + + threads = [] + result = Benchmark.realtime do + 4.times do |i| + threads << Thread.new do + 25.times do |j| + docs = File.open(files[10 * i + j]).collect { |document| JSON.parse(document) } + #docs = File.open(files[10 * i + j]).collect { |document| JrJackson::Json.load(document) } + collection.insert_many(docs) + end + end + end + threads.collect { |t| t.join } + end + client.database.drop + result + end + + # Test concurrently importing documents from a set of files. + # + # @example Testing concurrently importing files. + # Benchmarking::Parallel.import + # + # @return [ Numeric ] The test result. + # + # @since 2.2.2 + def import + #require 'yajl' + #parser = Yajl::Parser.new + client.database.drop + create_collection + files = [*1..100].collect { |i| "#{LDJSON_FILE_BASE}#{i.to_s.rjust(3, "0")}.txt" } + + threads = [] + result = Benchmark.realtime do + 4.times do |i| + threads << Thread.new do + 10.times do |j| + docs = File.open(files[10 * i + j]).collect { |document| JSON.parse(document) } + #docs = File.open(files[10 * i + j]).collect { |document| parser.parse(document) } + collection.insert_many(docs) + end + end + end + threads.collect { |t| t.join } + end + client.database.drop + result + end + + private + + def client + @client ||= Mongo::Client.new(["localhost:27017"], database: 'perftest', monitoring: false) + end + + def collection + @collection ||= client[:corpus].tap { |coll| coll.create } + end + alias :create_collection :collection + end + end +end From 4f8ea28ace94180cb2263895600f98488441b19b Mon Sep 17 00:00:00 2001 From: Emily Date: Wed, 27 Jan 2016 13:53:43 +0100 Subject: [PATCH 10/14] Benchmark parallel export --- Rakefile | 5 +++ profile/benchmarking.rb | 7 ++++ profile/benchmarking/parallel.rb | 62 +++++++++++++++++++++++++------- 3 files changed, 61 insertions(+), 13 deletions(-) diff --git a/Rakefile b/Rakefile index 7268dc69cb..d4563ca737 100644 --- a/Rakefile +++ b/Rakefile @@ -147,5 +147,10 @@ namespace :benchmark do puts "PARALLEL ETL BENCHMARK:: IMPORT" Mongo::Benchmarking::Parallel.run(:import) end + + task :export do + puts "PARALLEL ETL BENCHMARK:: EXPORT" + Mongo::Benchmarking::Parallel.run(:export) + end end end diff --git a/profile/benchmarking.rb b/profile/benchmarking.rb index 1a9bbd000c..6dfa5f9d88 100644 --- a/profile/benchmarking.rb +++ b/profile/benchmarking.rb @@ -77,6 +77,13 @@ module Benchmarking # @since 2.2.2 LDJSON_FILE_BASE = "#{DATA_PATH}LDJSON_MULTI/LDJSON".freeze + # The file path and base name for the outputted LDJSON files. + # + # @return [ String ] The file path and base name for the outputted LDJSON files. + # + # @since 2.2.2 + LDJSON_FILE_OUTPUT_BASE = "#{DATA_PATH}LDJSON_MULTI/output/LDJSON".freeze + # The default number of test repetitions. # # @return [ Integer ] The number of test repetitions. diff --git a/profile/benchmarking/parallel.rb b/profile/benchmarking/parallel.rb index 76d7a1434a..a563260ad9 100644 --- a/profile/benchmarking/parallel.rb +++ b/profile/benchmarking/parallel.rb @@ -86,37 +86,73 @@ def import_jruby # # @since 2.2.2 def import - #require 'yajl' - #parser = Yajl::Parser.new + require 'yajl/json_gem' + require 'celluloid' + + Mongo::Collection.send(:include, Celluloid) + client.database.drop create_collection files = [*1..100].collect { |i| "#{LDJSON_FILE_BASE}#{i.to_s.rjust(3, "0")}.txt" } - threads = [] result = Benchmark.realtime do - 4.times do |i| - threads << Thread.new do - 10.times do |j| - docs = File.open(files[10 * i + j]).collect { |document| JSON.parse(document) } - #docs = File.open(files[10 * i + j]).collect { |document| parser.parse(document) } - collection.insert_many(docs) - end - end + Benchmarking::TEST_REPETITIONS.times do |i| + docs = File.open(files[i]).map{ |document| JSON.parse(document) } + collection.async.insert_many(docs) end - threads.collect { |t| t.join } end client.database.drop result end + def export + require 'ruby-prof' + insert_files + files = [*1..Benchmarking::TEST_REPETITIONS].collect do |i| + name = "#{LDJSON_FILE_OUTPUT_BASE}#{i.to_s.rjust(3, "0")}.txt" + File.new(name, 'w') + end + #prof = nil + result = Benchmark.realtime do + Benchmarking::TEST_REPETITIONS.times do |i| + #prof = RubyProf.profile do + files[i].write(collection.find(_id: { '$gte' => (i * 5000), + '$lt' => (i+1) * 5000 }).to_a) + end + #end + end + result + end + private + def insert_files + require 'yajl/json_gem' + require 'celluloid' + + Mongo::Collection.send(:include, Celluloid) + + client.database.drop + create_collection + files = [*1..Benchmarking::TEST_REPETITIONS].collect do |i| + "#{LDJSON_FILE_BASE}#{i.to_s.rjust(3, "0")}.txt" + end + + Benchmarking::TEST_REPETITIONS.times do |i| + docs = File.open(files[i]).each_with_index.collect do |document, offset| + JSON.parse(document).merge(_id: i * 5000 + offset) + end + collection.async.insert_many(docs) + end + puts "Imported #{Benchmarking::TEST_REPETITIONS} files, #{collection.count} documents." + end + def client @client ||= Mongo::Client.new(["localhost:27017"], database: 'perftest', monitoring: false) end def collection - @collection ||= client[:corpus].tap { |coll| coll.create } + @collection ||= begin; client[:corpus].tap { |coll| coll.create }; rescue Error::OperationFailure; client[:corpus]; end end alias :create_collection :collection end From ef0228f561a3de030533c3aa16610585720faa84 Mon Sep 17 00:00:00 2001 From: Emily Date: Wed, 27 Jan 2016 14:00:15 +0100 Subject: [PATCH 11/14] Benchmarking tests will be version 2.2.3 --- profile/benchmarking.rb | 22 +++++++++++----------- profile/benchmarking/helper.rb | 6 +++--- profile/benchmarking/micro.rb | 8 ++++---- profile/benchmarking/multi_doc.rb | 14 +++++++------- profile/benchmarking/parallel.rb | 16 ++++++++++++---- profile/benchmarking/single_doc.rb | 12 ++++++------ 6 files changed, 43 insertions(+), 35 deletions(-) diff --git a/profile/benchmarking.rb b/profile/benchmarking.rb index 6dfa5f9d88..f045fe881a 100644 --- a/profile/benchmarking.rb +++ b/profile/benchmarking.rb @@ -23,7 +23,7 @@ module Mongo # Module with all functionality for running driver benchmark tests. # - # @since 2.2.2 + # @since 2.2.3 module Benchmarking extend self @@ -32,63 +32,63 @@ module Benchmarking # # @return [ String ] The current path. # - # @since 2.2.2 + # @since 2.2.3 CURRENT_PATH = File.expand_path(File.dirname(__FILE__)).freeze # The path to data files used in Benchmarking tests. # # @return [ String ] Path to Benchmarking test files. # - # @since 2.2.2 + # @since 2.2.3 DATA_PATH = "#{CURRENT_PATH}/benchmarking/data/".freeze # The file containing the single tweet document. # # @return [ String ] The file containing the tweet document. # - # @since 2.2.2 + # @since 2.2.3 TWEET_DOCUMENT_FILE = "#{DATA_PATH}TWEET.json".freeze # The file containing the single small document. # # @return [ String ] The file containing the small document. # - # @since 2.2.2 + # @since 2.2.3 SMALL_DOCUMENT_FILE = "#{DATA_PATH}SMALL_DOC.json".freeze # The file containing the single large document. # # @return [ String ] The file containing the large document. # - # @since 2.2.2 + # @since 2.2.3 LARGE_DOCUMENT_FILE = "#{DATA_PATH}LARGE_DOC.json".freeze # The file to upload when testing GridFS. # # @return [ String ] The file containing the GridFS test data. # - # @since 2.2.2 + # @since 2.2.3 GRIDFS_FILE = "#{DATA_PATH}GRIDFS_LARGE".freeze # The file path and base name for the LDJSON files. # # @return [ String ] The file path and base name for the LDJSON files. # - # @since 2.2.2 + # @since 2.2.3 LDJSON_FILE_BASE = "#{DATA_PATH}LDJSON_MULTI/LDJSON".freeze # The file path and base name for the outputted LDJSON files. # # @return [ String ] The file path and base name for the outputted LDJSON files. # - # @since 2.2.2 + # @since 2.2.3 LDJSON_FILE_OUTPUT_BASE = "#{DATA_PATH}LDJSON_MULTI/output/LDJSON".freeze # The default number of test repetitions. # # @return [ Integer ] The number of test repetitions. # - # @since 2.2.2 + # @since 2.2.3 TEST_REPETITIONS = 100.freeze # The number of default warmup repetitions of the test to do before @@ -96,7 +96,7 @@ module Benchmarking # # @return [ Integer ] The default number of warmup repetitions. # - # @since 2.2.2 + # @since 2.2.3 WARMUP_REPETITIONS = 10.freeze def tweet_document diff --git a/profile/benchmarking/helper.rb b/profile/benchmarking/helper.rb index e2c144bcb3..687c28970e 100644 --- a/profile/benchmarking/helper.rb +++ b/profile/benchmarking/helper.rb @@ -14,7 +14,7 @@ module Benchmarking # # @return [ Array ] A list of extended-json documents. # - # @since 2.2.2 + # @since 2.2.3 def load_file(file_name) File.open(file_name, "r") do |f| f.each_line.collect do |line| @@ -33,7 +33,7 @@ def load_file(file_name) # # @return [ Hash ] An extended-json document. # - # @since 2.2.2 + # @since 2.2.3 def parse_json(document) JSON.parse(document).tap do |doc| if doc['_id'] && doc['_id']['$oid'] @@ -51,7 +51,7 @@ def parse_json(document) # # @return [ Numeric ] The median of the list. # - # @since 2.2.2 + # @since 2.2.3 def median(values) values.sort![values.size/2-1] end diff --git a/profile/benchmarking/micro.rb b/profile/benchmarking/micro.rb index 802bfda74f..3197b49341 100644 --- a/profile/benchmarking/micro.rb +++ b/profile/benchmarking/micro.rb @@ -18,7 +18,7 @@ module Benchmarking # These tests focus on BSON encoding and decoding; they are client-side only and # do not involve any transmission of data to or from the server. # - # @since 2.2.2 + # @since 2.2.3 module Micro extend self @@ -33,7 +33,7 @@ module Micro # # @return [ Numeric ] The test results. # - # @since 2.2.2 + # @since 2.2.3 def run(type, action, repetitions = Benchmarking::TEST_REPETITIONS) file_name = type.to_s << "_bson.json" GC.disable @@ -51,7 +51,7 @@ def run(type, action, repetitions = Benchmarking::TEST_REPETITIONS) # # @return [ Numeric ] The median of the results. # - # @since 2.2.2 + # @since 2.2.3 def encode(file_name, repetitions) data = Benchmarking.load_file(file_name) document = BSON::Document.new(data.first) @@ -80,7 +80,7 @@ def encode(file_name, repetitions) # # @return [ Numeric ] The median of the results. # - # @since 2.2.2 + # @since 2.2.3 def decode(file_name, repetitions) data = Benchmarking.load_file(file_name) buffer = BSON::Document.new(data.first).to_bson diff --git a/profile/benchmarking/multi_doc.rb b/profile/benchmarking/multi_doc.rb index bf1436acea..d8f44fa1fb 100644 --- a/profile/benchmarking/multi_doc.rb +++ b/profile/benchmarking/multi_doc.rb @@ -19,7 +19,7 @@ module Benchmarking # They are designed to give insight into the efficiency of the driver's implementation # of bulk/batch operations such as bulk writes and cursor reads. # - # @since 2.2.2 + # @since 2.2.3 module MultiDoc extend self @@ -34,7 +34,7 @@ module MultiDoc # # @return [ Numeric ] The test results. # - # @since 2.2.2 + # @since 2.2.3 def run(type, repetitions = Benchmarking::TEST_REPETITIONS) Mongo::Logger.logger.level = ::Logger::WARN puts "#{type} : #{send(type, repetitions)}" @@ -49,7 +49,7 @@ def run(type, repetitions = Benchmarking::TEST_REPETITIONS) # # @return [ Numeric ] The median of the results. # - # @since 2.2.2 + # @since 2.2.3 def find_many(repetitions) client.database.drop doc = Benchmarking.tweet_document @@ -76,7 +76,7 @@ def find_many(repetitions) # # @return [ Numeric ] The median of the results. # - # @since 2.2.2 + # @since 2.2.3 def bulk_insert_small(repetitions) bulk_insert(repetitions, [Benchmarking.small_document] * 10_000) end @@ -90,7 +90,7 @@ def bulk_insert_small(repetitions) # # @return [ Numeric ] The median of the results. # - # @since 2.2.2 + # @since 2.2.3 def bulk_insert_large(repetitions) bulk_insert(repetitions, [Benchmarking.large_document] * 10) end @@ -104,7 +104,7 @@ def bulk_insert_large(repetitions) # # @return [ Numeric ] The median of the results. # - # @since 2.2.2 + # @since 2.2.3 def gridfs_upload(repetitions) client.database.drop create_collection @@ -133,7 +133,7 @@ def gridfs_upload(repetitions) # # @return [ Numeric ] The median of the results. # - # @since 2.2.2 + # @since 2.2.3 def gridfs_download(repetitions = Benchmarking::TEST_REPETITIONS) client.database.drop create_collection diff --git a/profile/benchmarking/parallel.rb b/profile/benchmarking/parallel.rb index a563260ad9..ec60a80df4 100644 --- a/profile/benchmarking/parallel.rb +++ b/profile/benchmarking/parallel.rb @@ -24,7 +24,7 @@ module Benchmarking # simulate the sort of "real-world" pressures that a drivers would be under # during concurrent operation. # - # @since 2.2.2 + # @since 2.2.3 module Parallel extend self @@ -38,7 +38,7 @@ module Parallel # # @return [ Numeric ] The test results. # - # @since 2.2.2 + # @since 2.2.3 def run(type) Mongo::Logger.logger.level = ::Logger::WARN type = type.to_s + '_jruby' if BSON::Environment.jruby? @@ -53,7 +53,7 @@ def run(type) # # @return [ Numeric ] The test result. # - # @since 2.2.2 + # @since 2.2.3 def import_jruby #require 'jrjackson' client.database.drop @@ -84,7 +84,7 @@ def import_jruby # # @return [ Numeric ] The test result. # - # @since 2.2.2 + # @since 2.2.3 def import require 'yajl/json_gem' require 'celluloid' @@ -105,6 +105,14 @@ def import result end + # Test concurrently exporting documents from a collection to a set of files. + # + # @example Testing concurrently importing files. + # Benchmarking::Parallel.export + # + # @return [ Numeric ] The test result. + # + # @since 2.2.3 def export require 'ruby-prof' insert_files diff --git a/profile/benchmarking/single_doc.rb b/profile/benchmarking/single_doc.rb index c1c786b061..e5f3021caa 100644 --- a/profile/benchmarking/single_doc.rb +++ b/profile/benchmarking/single_doc.rb @@ -19,7 +19,7 @@ module Benchmarking # They are designed to give insights into the efficiency of the driver's # implementation of the basic wire protocol. # - # @since 2.2.2 + # @since 2.2.3 module SingleDoc extend self @@ -34,7 +34,7 @@ module SingleDoc # # @return [ Numberic ] The test results. # - # @since 2.2.2 + # @since 2.2.3 def run(type, repetitions = Benchmarking::TEST_REPETITIONS) Mongo::Logger.logger.level = ::Logger::WARN puts "#{type} : #{send(type, repetitions)}" @@ -47,7 +47,7 @@ def run(type, repetitions = Benchmarking::TEST_REPETITIONS) # # @param [ Integer ] repetitions The number of test repetitions. # - # @since 2.2.2 + # @since 2.2.3 def command(repetitions) monitor = client.cluster.servers.first.monitor results = repetitions.times.collect do @@ -69,7 +69,7 @@ def command(repetitions) # # @return [ Numeric ] The median of the results. # - # @since 2.2.2 + # @since 2.2.3 def find_one(repetitions) client.database.drop doc = Benchmarking.tweet_document @@ -98,7 +98,7 @@ def find_one(repetitions) # # @return [ Numeric ] The median of the results. # - # @since 2.2.2 + # @since 2.2.3 def insert_one_large(repetitions) insert_one(repetitions, 10, Benchmarking.large_document) end @@ -112,7 +112,7 @@ def insert_one_large(repetitions) # # @return [ Numeric ] The median of the results. # - # @since 2.2.2 + # @since 2.2.3 def insert_one_small(repetitions) insert_one(repetitions, 10_000, Benchmarking.small_document) end From 62b9e2fb96dc1f1702059aacf3596e6e293b8501 Mon Sep 17 00:00:00 2001 From: Emily Date: Wed, 27 Jan 2016 17:34:40 +0100 Subject: [PATCH 12/14] Benchmarking parallel GRIDFS upload and download --- Rakefile | 10 +++++ profile/benchmarking.rb | 14 ++++++ profile/benchmarking/parallel.rb | 76 +++++++++++++++++++++++++++++++- 3 files changed, 98 insertions(+), 2 deletions(-) diff --git a/Rakefile b/Rakefile index d4563ca737..b0d9b16c58 100644 --- a/Rakefile +++ b/Rakefile @@ -152,5 +152,15 @@ namespace :benchmark do puts "PARALLEL ETL BENCHMARK:: EXPORT" Mongo::Benchmarking::Parallel.run(:export) end + + task :gridfs_upload do + puts "PARALLEL ETL BENCHMARK:: GRIDFS UPLOAD" + Mongo::Benchmarking::Parallel.run(:gridfs_upload) + end + + task :gridfs_download do + puts "PARALLEL ETL BENCHMARK:: GRIDFS DOWNLOAD" + Mongo::Benchmarking::Parallel.run(:gridfs_download) + end end end diff --git a/profile/benchmarking.rb b/profile/benchmarking.rb index f045fe881a..c35b5db442 100644 --- a/profile/benchmarking.rb +++ b/profile/benchmarking.rb @@ -84,6 +84,20 @@ module Benchmarking # @since 2.2.3 LDJSON_FILE_OUTPUT_BASE = "#{DATA_PATH}LDJSON_MULTI/output/LDJSON".freeze + # The file path and base name for the GRIDFS files to upload. + # + # @return [ String ] The file path and base name for the GRIDFS files to upload. + # + # @since 2.2.3 + GRIDFS_MULTI_BASE = "#{DATA_PATH}GRIDFS_MULTI/file".freeze + + # The file path and base name for the outputted GRIDFS downloaded files. + # + # @return [ String ] The file path and base name for the outputted GRIDFS downloaded files. + # + # @since 2.2.3 + GRIDFS_MULTI_OUTPUT_BASE = "#{DATA_PATH}GRIDFS_MULTI/output/file-output".freeze + # The default number of test repetitions. # # @return [ Integer ] The number of test repetitions. diff --git a/profile/benchmarking/parallel.rb b/profile/benchmarking/parallel.rb index ec60a80df4..88f2fb09b3 100644 --- a/profile/benchmarking/parallel.rb +++ b/profile/benchmarking/parallel.rb @@ -129,9 +129,81 @@ def export end #end end + client.database.drop result end + # This benchmark tests driver performance uploading files from disk to GridFS. + # + # @example Test uploading files from disk to GridFS. + # Benchmarking::Parallel.gridfs_upload + # + # @return [ Numeric ] The test result. + # + # @since 2.2.3 + def gridfs_upload + n = 50 + client.database.drop + fs = client.database.fs + + files = [*0...n].collect do |i| + name = "#{GRIDFS_MULTI_BASE}#{i}.txt" + { + file: File.open(name, 'r'), + name: File.basename(name) + } + end + + s = StringIO.new('a') + fs.upload_from_stream('create-indices.test', s) + + Benchmark.realtime do + n.times do |i| + fs.upload_from_stream(files[i][:name], files[i][:file]) + end + end + end + alias :gridfs_upload_jruby :gridfs_upload + + + # This benchmark tests driver performance downloading files from GridFS to disk. + # + # @example Test downloading files from GridFS to disk. + # Benchmarking::Parallel.gridfs_download + # + # @return [ Numeric ] The test result. + # + # @since 2.2.3 + def gridfs_download + n_files = 50 + n_threads = BSON::Environment.jruby? ? 4 : 2 + threads = [] + client.database.drop + fs = client.database.fs + + file_info = [*0...n_files].collect do |i| + name = "#{GRIDFS_MULTI_BASE}#{i}.txt" + { + _id: fs.upload_from_stream(name, File.open(name)), + output_name: "#{GRIDFS_MULTI_OUTPUT_BASE}#{i}.txt" + } + end.freeze + + reps = n_files/n_threads.freeze + Benchmark.realtime do + n_threads.times do |i| + threads << Thread.new do + reps.times do |j| + index = i * reps + j + fs.download_to_stream(file_info[index][:_id], File.open(file_info[index][:output_name], "w")) + end + end + end + threads.collect(&:value) + end + end + alias :gridfs_download_jruby :gridfs_download + private def insert_files @@ -143,11 +215,11 @@ def insert_files client.database.drop create_collection files = [*1..Benchmarking::TEST_REPETITIONS].collect do |i| - "#{LDJSON_FILE_BASE}#{i.to_s.rjust(3, "0")}.txt" + File.open("#{LDJSON_FILE_BASE}#{i.to_s.rjust(3, "0")}.txt") end Benchmarking::TEST_REPETITIONS.times do |i| - docs = File.open(files[i]).each_with_index.collect do |document, offset| + docs = files[i].each_with_index.collect do |document, offset| JSON.parse(document).merge(_id: i * 5000 + offset) end collection.async.insert_many(docs) From 6da3153469cf1df390f494ee6e167274cf87fdc1 Mon Sep 17 00:00:00 2001 From: Emily Date: Tue, 9 Feb 2016 13:12:13 +0100 Subject: [PATCH 13/14] Add documentation and other small changes --- Gemfile | 2 ++ Rakefile | 48 ++++++++++++++++++++++++++------ profile/benchmarking.rb | 18 ++++++------ profile/benchmarking/micro.rb | 2 +- profile/benchmarking/parallel.rb | 2 +- 5 files changed, 53 insertions(+), 19 deletions(-) diff --git a/Gemfile b/Gemfile index d9c06b841c..3fc3bf3c8f 100644 --- a/Gemfile +++ b/Gemfile @@ -10,6 +10,8 @@ group :development, :testing do gem 'rspec', '~> 3.0' gem 'mime-types', '~> 1.25' gem 'httparty' + gem 'yajl-ruby', require: 'yajl', platforms: :mri + gem 'celluloid', platforms: :mri end group :development do diff --git a/Rakefile b/Rakefile index b0d9b16c58..5faa400046 100644 --- a/Rakefile +++ b/Rakefile @@ -44,19 +44,23 @@ end require_relative "profile/benchmarking" +# Some require data files, available from the drivers team. See the comments above each task for details." namespace :benchmark do - desc "Run the driver benchmark tests" + desc "Run the driver benchmark tests." namespace :micro do desc "Run the common driver micro benchmarking tests" namespace :flat do - desc "Benchmarking for flat bson documents" + desc "Benchmarking for flat bson documents." + + # Requirement: A file in Mongo::Benchmarking::DATA_PATH, called flat_bson.json. task :encode do puts "MICRO BENCHMARK:: FLAT:: ENCODE" Mongo::Benchmarking::Micro.run(:flat, :encode) end + # Requirement: A file in Mongo::Benchmarking::DATA_PATH, called flat_bson.json. task :decode do puts "MICRO BENCHMARK:: FLAT:: DECODE" Mongo::Benchmarking::Micro.run(:flat, :decode) @@ -64,12 +68,15 @@ namespace :benchmark do end namespace :deep do - desc "Benchmarking for deep bson documents" + desc "Benchmarking for deep bson documents." + + # Requirement: A file in Mongo::Benchmarking::DATA_PATH, called deep_bson.json. task :encode do puts "MICRO BENCHMARK:: DEEP:: ENCODE" Mongo::Benchmarking::Micro.run(:deep, :encode) end + # Requirement: A file in Mongo::Benchmarking::DATA_PATH, called deep_bson.json. task :decode do puts "MICRO BENCHMARK:: DEEP:: DECODE" Mongo::Benchmarking::Micro.run(:deep, :decode) @@ -77,12 +84,15 @@ namespace :benchmark do end namespace :full do - desc "Benchmarking for full bson documents" + desc "Benchmarking for full bson documents." + + # Requirement: A file in Mongo::Benchmarking::DATA_PATH, called full_bson.json. task :encode do puts "MICRO BENCHMARK:: FULL:: ENCODE" Mongo::Benchmarking::Micro.run(:full, :encode) end + # Requirement: A file in Mongo::Benchmarking::DATA_PATH, called full_bson.json. task :decode do puts "MICRO BENCHMARK:: FULL:: DECODE" Mongo::Benchmarking::Micro.run(:full, :decode) @@ -93,48 +103,57 @@ namespace :benchmark do namespace :single_doc do desc "Run the common driver single-document benchmarking tests" task :command do - puts "SINGLE-DOC BENCHMARK:: COMMAND" + puts "SINGLE DOC BENCHMARK:: COMMAND" Mongo::Benchmarking::SingleDoc.run(:command) end + # Requirement: A file in Mongo::Benchmarking::DATA_PATH, called TWEET.json. task :find_one do - puts "SINGLE_DOC BENCHMARK:: FIND ONE BY ID" + puts "SINGLE DOC BENCHMARK:: FIND ONE BY ID" Mongo::Benchmarking::SingleDoc.run(:find_one) end + # Requirement: A file in Mongo::Benchmarking::DATA_PATH, called SMALL_DOC.json. task :insert_one_small do - puts "SINGLE_DOC BENCHMARK:: INSERT ONE SMALL DOCUMENT" + puts "SINGLE DOC BENCHMARK:: INSERT ONE SMALL DOCUMENT" Mongo::Benchmarking::SingleDoc.run(:insert_one_small) end + # Requirement: A file in Mongo::Benchmarking::DATA_PATH, called LARGE_DOC.json. task :insert_one_large do - puts "SINGLE_DOC BENCHMARK:: INSERT ONE LARGE DOCUMENT" + puts "SINGLE DOC BENCHMARK:: INSERT ONE LARGE DOCUMENT" Mongo::Benchmarking::SingleDoc.run(:insert_one_large) end end namespace :multi_doc do desc "Run the common driver multi-document benchmarking tests" + + # Requirement: A file in Mongo::Benchmarking::DATA_PATH, called TWEET.json. task :find_many do puts "MULTI DOCUMENT BENCHMARK:: FIND MANY" Mongo::Benchmarking::MultiDoc.run(:find_many) end + # Requirement: A file in Mongo::Benchmarking::DATA_PATH, called SMALL_DOC.json. task :bulk_insert_small do puts "MULTI DOCUMENT BENCHMARK:: BULK INSERT SMALL" Mongo::Benchmarking::MultiDoc.run(:bulk_insert_small) end + # Requirement: A file in Mongo::Benchmarking::DATA_PATH, called LARGE_DOC.json. task :bulk_insert_large do puts "MULTI DOCUMENT BENCHMARK:: BULK INSERT LARGE" Mongo::Benchmarking::MultiDoc.run(:bulk_insert_large) end + # Requirement: A file in Mongo::Benchmarking::DATA_PATH, called GRIDFS_LARGE. task :gridfs_upload do puts "MULTI DOCUMENT BENCHMARK:: GRIDFS UPLOAD" Mongo::Benchmarking::MultiDoc.run(:gridfs_upload) end + # Requirement: A file in Mongo::Benchmarking::DATA_PATH, called GRIDFS_LARGE. task :gridfs_download do puts "MULTI DOCUMENT BENCHMARK:: GRIDFS DOWNLOAD" Mongo::Benchmarking::MultiDoc.run(:gridfs_download) @@ -143,21 +162,34 @@ namespace :benchmark do namespace :parallel do desc "Run the common driver paralell ETL benchmarking tests" + + # Requirement: A directory in Mongo::Benchmarking::DATA_PATH, called LDJSON_MULTI, + # with the files used in this task. task :import do puts "PARALLEL ETL BENCHMARK:: IMPORT" Mongo::Benchmarking::Parallel.run(:import) end + # Requirement: A directory in Mongo::Benchmarking::DATA_PATH, called LDJSON_MULTI, + # with the files used in this task. + # Requirement: Another directory in "#{Mongo::Benchmarking::DATA_PATH}/LDJSON_MULTI" + # called 'output'. task :export do puts "PARALLEL ETL BENCHMARK:: EXPORT" Mongo::Benchmarking::Parallel.run(:export) end + # Requirement: A directory in Mongo::Benchmarking::DATA_PATH, called GRIDFS_MULTI, + # with the files used in this task. task :gridfs_upload do puts "PARALLEL ETL BENCHMARK:: GRIDFS UPLOAD" Mongo::Benchmarking::Parallel.run(:gridfs_upload) end + # Requirement: A directory in Mongo::Benchmarking::DATA_PATH, called GRIDFS_MULTI, + # with the files used in this task. + # Requirement: Another directory in "#{Mongo::Benchmarking::DATA_PATH}/GRIDFS_MULTI" + # called 'output'. task :gridfs_download do puts "PARALLEL ETL BENCHMARK:: GRIDFS DOWNLOAD" Mongo::Benchmarking::Parallel.run(:gridfs_download) diff --git a/profile/benchmarking.rb b/profile/benchmarking.rb index c35b5db442..cab233ca21 100644 --- a/profile/benchmarking.rb +++ b/profile/benchmarking.rb @@ -40,63 +40,63 @@ module Benchmarking # @return [ String ] Path to Benchmarking test files. # # @since 2.2.3 - DATA_PATH = "#{CURRENT_PATH}/benchmarking/data/".freeze + DATA_PATH = [CURRENT_PATH, 'benchmarking', 'data'].join('/').freeze # The file containing the single tweet document. # # @return [ String ] The file containing the tweet document. # # @since 2.2.3 - TWEET_DOCUMENT_FILE = "#{DATA_PATH}TWEET.json".freeze + TWEET_DOCUMENT_FILE = [DATA_PATH, 'TWEET.json'].join('/').freeze # The file containing the single small document. # # @return [ String ] The file containing the small document. # # @since 2.2.3 - SMALL_DOCUMENT_FILE = "#{DATA_PATH}SMALL_DOC.json".freeze + SMALL_DOCUMENT_FILE = [DATA_PATH, 'SMALL_DOC.json'].join('/').freeze # The file containing the single large document. # # @return [ String ] The file containing the large document. # # @since 2.2.3 - LARGE_DOCUMENT_FILE = "#{DATA_PATH}LARGE_DOC.json".freeze + LARGE_DOCUMENT_FILE = [DATA_PATH, 'LARGE_DOC.json'].join('/').freeze # The file to upload when testing GridFS. # # @return [ String ] The file containing the GridFS test data. # # @since 2.2.3 - GRIDFS_FILE = "#{DATA_PATH}GRIDFS_LARGE".freeze + GRIDFS_FILE = [DATA_PATH, 'GRIDFS_LARGE'].join('/').freeze # The file path and base name for the LDJSON files. # # @return [ String ] The file path and base name for the LDJSON files. # # @since 2.2.3 - LDJSON_FILE_BASE = "#{DATA_PATH}LDJSON_MULTI/LDJSON".freeze + LDJSON_FILE_BASE = [DATA_PATH, 'LDJSON_MULTI', 'LDJSON'].join('/').freeze # The file path and base name for the outputted LDJSON files. # # @return [ String ] The file path and base name for the outputted LDJSON files. # # @since 2.2.3 - LDJSON_FILE_OUTPUT_BASE = "#{DATA_PATH}LDJSON_MULTI/output/LDJSON".freeze + LDJSON_FILE_OUTPUT_BASE = [DATA_PATH, 'LDJSON_MULTI', 'output', 'LDJSON'].join('/').freeze # The file path and base name for the GRIDFS files to upload. # # @return [ String ] The file path and base name for the GRIDFS files to upload. # # @since 2.2.3 - GRIDFS_MULTI_BASE = "#{DATA_PATH}GRIDFS_MULTI/file".freeze + GRIDFS_MULTI_BASE = [DATA_PATH, 'GRIDFS_MULTI', 'file'].join('/').freeze # The file path and base name for the outputted GRIDFS downloaded files. # # @return [ String ] The file path and base name for the outputted GRIDFS downloaded files. # # @since 2.2.3 - GRIDFS_MULTI_OUTPUT_BASE = "#{DATA_PATH}GRIDFS_MULTI/output/file-output".freeze + GRIDFS_MULTI_OUTPUT_BASE = [DATA_PATH, 'GRIDFS_MULTI', 'output', 'file-output'].join('/').freeze # The default number of test repetitions. # diff --git a/profile/benchmarking/micro.rb b/profile/benchmarking/micro.rb index 3197b49341..bc3e4b4572 100644 --- a/profile/benchmarking/micro.rb +++ b/profile/benchmarking/micro.rb @@ -37,7 +37,7 @@ module Micro def run(type, action, repetitions = Benchmarking::TEST_REPETITIONS) file_name = type.to_s << "_bson.json" GC.disable - file_path = Benchmarking::DATA_PATH + file_name + file_path = [Benchmarking::DATA_PATH, file_name].join('/') puts "#{action} : #{send(action, file_path, repetitions)}" end diff --git a/profile/benchmarking/parallel.rb b/profile/benchmarking/parallel.rb index 88f2fb09b3..a0ec134874 100644 --- a/profile/benchmarking/parallel.rb +++ b/profile/benchmarking/parallel.rb @@ -114,7 +114,7 @@ def import # # @since 2.2.3 def export - require 'ruby-prof' + #require 'ruby-prof' insert_files files = [*1..Benchmarking::TEST_REPETITIONS].collect do |i| name = "#{LDJSON_FILE_OUTPUT_BASE}#{i.to_s.rjust(3, "0")}.txt" From 1a9b6fba2ca8899958c8e5bf0d2f2b4285e47103 Mon Sep 17 00:00:00 2001 From: Emily Date: Tue, 9 Feb 2016 13:16:51 +0100 Subject: [PATCH 14/14] Update client and collection creation in two tasks --- profile/benchmarking/multi_doc.rb | 2 +- profile/benchmarking/single_doc.rb | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/profile/benchmarking/multi_doc.rb b/profile/benchmarking/multi_doc.rb index d8f44fa1fb..1a38d09ef5 100644 --- a/profile/benchmarking/multi_doc.rb +++ b/profile/benchmarking/multi_doc.rb @@ -171,7 +171,7 @@ def client end def collection - @collection ||= client[:corpus].tap { |coll| coll.create } + @collection ||= begin; client[:corpus].tap { |coll| coll.create }; rescue Error::OperationFailure; client[:corpus]; end end alias :create_collection :collection end diff --git a/profile/benchmarking/single_doc.rb b/profile/benchmarking/single_doc.rb index e5f3021caa..94dffcd1f6 100644 --- a/profile/benchmarking/single_doc.rb +++ b/profile/benchmarking/single_doc.rb @@ -134,11 +134,11 @@ def insert_one(repetitions, do_repetitions, doc) end def client - @client ||= Mongo::Client.new(["localhost:27017"], database: 'perftest') + @client ||= Mongo::Client.new(["localhost:27017"], database: 'perftest', monitoring: false) end def collection - @collection ||= client[:corpus].tap { |coll| coll.create } + @collection ||= begin; client[:corpus].tap { |coll| coll.create }; rescue Error::OperationFailure; client[:corpus]; end end alias :create_collection :collection end