Permalink
Browse files

Add work/result batching; use blocking sockets when possible.

fixes #47.  fixes #46
  • Loading branch information...
dbishop committed Mar 28, 2013
1 parent 28ade3c commit e96c86bc058ba6d5d5b21d8190e19d232e15f716
Showing with 223 additions and 114 deletions.
  1. +34 −2 README.rst
  2. +40 −2 bin/ssbench-master
  3. +7 −1 bin/ssbench-worker
  4. +87 −71 ssbench/master.py
  5. +11 −11 ssbench/tests/test_worker.py
  6. +44 −27 ssbench/worker.py
View
@@ -232,9 +232,11 @@ runs a benchmark scenario::
[--os-cacert <ca-certificate>] [--insecure]
[-S STORAGE_URL] [-T TOKEN] [-c COUNT]
[-u COUNT] [-o COUNT] [--workers COUNT]
- [-q] [--profile] [--noop] [-k]
+ [--batch-size COUNT] [-q] [--profile]
+ [--noop] [-k]
+ [--connect-timeout CONNECT_TIMEOUT]
+ [--network-timeout NETWORK_TIMEOUT]
[-s STATS_FILE] [-r] [--pctile PERCENTILE]
-
...
@@ -244,6 +246,7 @@ previously-run benchmark scenario::
$ ssbench-master report-scenario -h
usage: ssbench-master report-scenario [-h] -s STATS_FILE [-f REPORT_FILE]
[--pctile PERCENTILE] [-r RPS_HISTOGRAM]
+ [--profile]
...
@@ -404,6 +407,35 @@ command. Simply use the ``--workers COUNT`` option to ``ssbench-master``::
ssbench-master report-scenario -s /tmp/ssbench-results/Small_test_scenario.2013-02-20.091016.stat
+Scalability and Throughput
+--------------------------
+
+Assuming the Swift cluster being benchmarked is not the bottleneck, the
+scalability of ssbench may be increased by
+
+- Running up to one ``ssbench-worker`` process per CPU core on any number of
+ benchmarking servers.
+- Increasing the default ``--batch-size`` parameter (defaults to 1) on both the
+ ``ssbench-master`` and ``ssbench-worker`` command-lines. Note that if you
+ are running everything on one server and using the ``--workers`` argument to
+ ``ssbench-master``, the ``--batch-size`` parameter passed to
+ ``ssbench-master`` will be passed on to the automatically-started
+ ``ssbench-worker`` processes.
+- For optimal scalability, the user-count (concurrency) should be greater than
+ and also an even multiple of both the batch-size and number of
+ ``ssbench-worker`` processes.
+
+As a simple example, on my quad-core MacBook Pro, I get around **9,800** requests
+per second with ``--noop`` (see below) with this command-line (a
+``--batch-size`` of 1)::
+
+ $ ssbench-master run-scenario ... -u 24 -o 30000 --workers 3 --noop
+
+But with a ``--batch-size`` of 8, I can get around **19,500** requests per second::
+
+ $ ssbench-master run-scenario ... -u 24 -o 30000 --workers 3 --noop --batch-size 8
+
+
HTTPS on OS X
-------------
View
@@ -49,6 +49,14 @@ def run_scenario(master, args):
if args.user_count != 'value from scenario' else None
operation_count = int(args.op_count) \
if args.op_count != 'value from scenario' else None
+
+ # Sanity-check batch_size
+ if args.batch_size > user_count:
+ logger.warning('--batch-size %d was > --user-count %d; using ',
+ '--batch-size %d', args.batch_size, user_count,
+ user_count)
+ args.batch_size = user_count
+
if args.noop:
scenario_class = ScenarioNoop
logging.info('NOTE: --noop was specified; not testing Swift.')
@@ -83,7 +91,8 @@ def run_scenario(master, args):
'ssbench-worker', '--zmq-host', zmq_host,
'--zmq-work-port', str(args.zmq_work_port),
'--zmq-results-port', str(args.zmq_results_port),
- '--concurrency', str(users_per_worker)]
+ '--concurrency', str(users_per_worker),
+ '--batch-size', str(args.batch_size)]
if args.profile:
worker_cmd = worker_cmd + [
'--profile-count',
@@ -112,8 +121,13 @@ def run_scenario(master, args):
storage_url=args.storage_url,
token=args.token, noop=args.noop,
with_profiling=args.profile,
- keep_objects=args.keep_objects)
+ keep_objects=args.keep_objects,
+ batch_size=args.batch_size)
+ logging.debug(' dumping %d results to %r', len(results), args.stats_file)
+ dump_start = time.time()
pickle.dump([scenario, results], args.stats_file)
+ logging.debug(' done dumping results (took %.2fs)',
+ time.time() - dump_start)
finally:
# Make sure any local spawned workers get killed
if worker_count:
@@ -131,11 +145,14 @@ def run_scenario(master, args):
int(os.environ['SUDO_GID']))
if not args.no_default_report:
+ report_start = time.time()
args.stats_file.close()
args.stats_file = stats_file_path
args.report_file = sys.stdout
args.rps_histogram = None
report_scenario(master, args)
+ logging.debug(' scenario report took %.2fs',
+ time.time() - report_start)
else:
args.stats_file.close()
@@ -150,6 +167,11 @@ def run_scenario(master, args):
def report_scenario(master, args):
+ if args.profile:
+ import cProfile
+ prof = cProfile.Profile()
+ prof.enable()
+
if args.stats_file.endswith('.gz'):
args.stats_file = GzipFile(args.stats_file, 'rb')
else:
@@ -163,6 +185,12 @@ def report_scenario(master, args):
# Note: not explicitly closing here in case it's redirected to STDOUT
# (i.e. "-")
+ if args.profile:
+ prof.disable()
+ prof_output_path = '/tmp/report_scenario.%d.prof' % os.getpid()
+ prof.dump_stats(prof_output_path)
+ logging.info('PROFILED report_scenario to %s', prof_output_path)
+
if __name__ == "__main__":
arg_parser = argparse.ArgumentParser(
@@ -316,6 +344,13 @@ if __name__ == "__main__":
'--workers', metavar='COUNT', type=int,
help='Spawn COUNT local ssbench-worker processes just for this '
'run. To workers on other hosts, they must be started manually.')
+ run_scenario_arg_parser.add_argument(
+ '--batch-size', metavar='COUNT', type=int,
+ default=1,
+ help='Send bench jobs to workers in batches of this size to '
+ 'increase benchmarking throughput; for best results, '
+ 'user-count should be greater than and an even multiple of '
+ 'both batch-size and worker count.')
run_scenario_arg_parser.add_argument(
'-q', '--quiet', action='store_true', default=False,
help='Suppress most output (including progress characters during '
@@ -370,6 +405,9 @@ if __name__ == "__main__":
'-r', '--rps-histogram', type=argparse.FileType('w'),
help='Also write a CSV file with requests completed per second '
'histogram data')
+ report_scenario_arg_parser.add_argument(
+ '--profile', action='store_true', default=False,
+ help='Profile the report generation.')
report_scenario_arg_parser.set_defaults(func=report_scenario)
args = arg_parser.parse_args(sys.argv[1:])
View
@@ -40,6 +40,12 @@ if __name__ == "__main__":
'provide.')
arg_parser.add_argument('--retries', default=10, type=int,
help='Maximum number of times to retry a job.')
+ arg_parser.add_argument(
+ '--batch-size', metavar='COUNT', type=int, default=1,
+ help='Send back bench results in batches of this size to '
+ 'increase benchmarking throughput; for best results, '
+ 'this should match the --batch-size specified in the ssbench-master '
+ 'command-line.')
arg_parser.add_argument('-p', '--profile-count', type=int, metavar='COUNT',
default=0,
help='Profile %(metavar)s work jobs, starting '
@@ -56,5 +62,5 @@ if __name__ == "__main__":
worker = Worker(args.zmq_host, args.zmq_work_port, args.zmq_results_port,
args.worker_id, args.retries,
profile_count=args.profile_count,
- concurrency=args.concurrency)
+ concurrency=args.concurrency, batch_size=args.batch_size)
worker.go()
Oops, something went wrong.

0 comments on commit e96c86b

Please sign in to comment.