Skip to content

Commit 94e097b

Browse files
committed
Add the parallel ETL import tasks
1 parent 632c770 commit 94e097b

File tree

4 files changed

+140
-1
lines changed

4 files changed

+140
-1
lines changed

Rakefile

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -139,6 +139,13 @@ namespace :benchmark do
139139
puts "MULTI DOCUMENT BENCHMARK:: GRIDFS DOWNLOAD"
140140
Mongo::Benchmarking::MultiDoc.run(:gridfs_download)
141141
end
142+
end
142143

144+
namespace :parallel do
145+
desc "Run the common driver paralell ETL benchmarking tests"
146+
task :import do
147+
puts "PARALLEL ETL BENCHMARK:: IMPORT"
148+
Mongo::Benchmarking::Parallel.run(:import)
149+
end
143150
end
144151
end

profile/benchmarking.rb

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
require_relative 'benchmarking/micro'
1818
require_relative 'benchmarking/single_doc'
1919
require_relative 'benchmarking/multi_doc'
20+
require_relative 'benchmarking/parallel'
2021

2122
module Mongo
2223

@@ -69,6 +70,13 @@ module Benchmarking
6970
# @since 2.2.2
7071
GRIDFS_FILE = "#{DATA_PATH}GRIDFS_LARGE".freeze
7172

73+
# The file path and base name for the LDJSON files.
74+
#
75+
# @return [ String ] The file path and base name for the LDJSON files.
76+
#
77+
# @since 2.2.2
78+
LDJSON_FILE_BASE = "#{DATA_PATH}LDJSON_MULTI/LDJSON".freeze
79+
7280
# The default number of test repetitions.
7381
#
7482
# @return [ Integer ] The number of test repetitions.

profile/benchmarking/multi_doc.rb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -167,7 +167,7 @@ def bulk_insert(repetitions, docs)
167167
end
168168

169169
def client
170-
@client ||= Mongo::Client.new(["localhost:27017"], database: 'perftest')
170+
@client ||= Mongo::Client.new(["localhost:27017"], database: 'perftest', monitoring: false)
171171
end
172172

173173
def collection

profile/benchmarking/parallel.rb

Lines changed: 124 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,124 @@
1+
# Copyright (C) 2015 MongoDB, Inc.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
#require 'ruby-prof'
16+
17+
module Mongo
18+
module Benchmarking
19+
20+
# Parallel tests simulate ETL operations from disk to database or vice-versa.
21+
# They are designed to be implemented using a language's preferred approach to
22+
# concurrency and thus stress how drivers handle concurrency.
23+
# These intentionally involve overhead above and beyond the driver itself to
24+
# simulate the sort of "real-world" pressures that a drivers would be under
25+
# during concurrent operation.
26+
#
27+
# @since 2.2.2
28+
module Parallel
29+
30+
extend self
31+
32+
# Run a parallel benchmark test.
33+
#
34+
# @example Run a test.
35+
# Benchmarking::Parallel.run(:import)
36+
#
37+
# @param [ Symbol ] type The type of test to run.
38+
#
39+
# @return [ Numeric ] The test results.
40+
#
41+
# @since 2.2.2
42+
def run(type)
43+
Mongo::Logger.logger.level = ::Logger::WARN
44+
type = type.to_s + '_jruby' if BSON::Environment.jruby?
45+
puts "#{type} : #{send(type)}"
46+
end
47+
48+
# Test concurrently importing documents from a set of files.
49+
# Using JRuby.
50+
#
51+
# @example Testing concurrently importing files using JRuby.
52+
# Benchmarking::Parallel.import_jruby
53+
#
54+
# @return [ Numeric ] The test result.
55+
#
56+
# @since 2.2.2
57+
def import_jruby
58+
#require 'jrjackson'
59+
client.database.drop
60+
create_collection
61+
files = [*1..100].collect { |i| "#{LDJSON_FILE_BASE}#{i.to_s.rjust(3, "0")}.txt" }
62+
63+
threads = []
64+
result = Benchmark.realtime do
65+
4.times do |i|
66+
threads << Thread.new do
67+
25.times do |j|
68+
docs = File.open(files[10 * i + j]).collect { |document| JSON.parse(document) }
69+
#docs = File.open(files[10 * i + j]).collect { |document| JrJackson::Json.load(document) }
70+
collection.insert_many(docs)
71+
end
72+
end
73+
end
74+
threads.collect { |t| t.join }
75+
end
76+
client.database.drop
77+
result
78+
end
79+
80+
# Test concurrently importing documents from a set of files.
81+
#
82+
# @example Testing concurrently importing files.
83+
# Benchmarking::Parallel.import
84+
#
85+
# @return [ Numeric ] The test result.
86+
#
87+
# @since 2.2.2
88+
def import
89+
#require 'yajl'
90+
#parser = Yajl::Parser.new
91+
client.database.drop
92+
create_collection
93+
files = [*1..100].collect { |i| "#{LDJSON_FILE_BASE}#{i.to_s.rjust(3, "0")}.txt" }
94+
95+
threads = []
96+
result = Benchmark.realtime do
97+
4.times do |i|
98+
threads << Thread.new do
99+
10.times do |j|
100+
docs = File.open(files[10 * i + j]).collect { |document| JSON.parse(document) }
101+
#docs = File.open(files[10 * i + j]).collect { |document| parser.parse(document) }
102+
collection.insert_many(docs)
103+
end
104+
end
105+
end
106+
threads.collect { |t| t.join }
107+
end
108+
client.database.drop
109+
result
110+
end
111+
112+
private
113+
114+
def client
115+
@client ||= Mongo::Client.new(["localhost:27017"], database: 'perftest', monitoring: false)
116+
end
117+
118+
def collection
119+
@collection ||= client[:corpus].tap { |coll| coll.create }
120+
end
121+
alias :create_collection :collection
122+
end
123+
end
124+
end

0 commit comments

Comments
 (0)