From 8d8e86201ab589a3b97f4e2671d8b530e766a09b Mon Sep 17 00:00:00 2001 From: Chris Simpson Date: Thu, 9 Jun 2016 21:17:15 -0400 Subject: [PATCH] Cache Token objects to improve performance. The Token class now contains a cache of tokens, and each token stores its encoded value. In Python 3 this prevents encoding the Token commands (get, set, incr, etc...) repeatly. There is also a smaller performance improvement by creating fewer objects. A very basic benchmark script was also added. --- benchmarks/basic_operations.py | 197 +++++++++++++++++++++++++++++++++ redis/client.py | 53 ++++----- redis/connection.py | 24 +++- 3 files changed, 245 insertions(+), 29 deletions(-) create mode 100644 benchmarks/basic_operations.py diff --git a/benchmarks/basic_operations.py b/benchmarks/basic_operations.py new file mode 100644 index 0000000000..cb7f3f2c74 --- /dev/null +++ b/benchmarks/basic_operations.py @@ -0,0 +1,197 @@ +from __future__ import print_function +import redis +import time +import sys +from functools import wraps +from argparse import ArgumentParser + +if sys.version_info[0] == 3: + long = int + +def parse_args(): + parser = ArgumentParser() + parser.add_argument('-n', + type=int, + help='Total number of requests (default 100000)', + default=100000) + parser.add_argument('-P', + type=int, + help=('Pipeline requests.' + ' Default 1 (no pipeline).'), + default=1) + parser.add_argument('-s', + type=int, + help='Data size of SET/GET value in bytes (default 2)', + default=2) + + args = parser.parse_args() + return args + +def run(): + args = parse_args() + r = redis.StrictRedis() + r.flushall() + set_str(conn=r, num=args.n, pipeline_size=args.P, data_size=args.s) + set_int(conn=r, num=args.n, pipeline_size=args.P, data_size=args.s) + get_str(conn=r, num=args.n, pipeline_size=args.P, data_size=args.s) + get_int(conn=r, num=args.n, pipeline_size=args.P, data_size=args.s) + incr(conn=r, num=args.n, pipeline_size=args.P, data_size=args.s) + lpush(conn=r, num=args.n, pipeline_size=args.P, data_size=args.s) + lrange_300(conn=r, num=args.n, pipeline_size=args.P, data_size=args.s) + lpop(conn=r, num=args.n, pipeline_size=args.P, data_size=args.s) + hmset(conn=r, num=args.n, pipeline_size=args.P, data_size=args.s) + + +def timer(func): + @wraps(func) + def wrapper(*args, **kwargs): + start = time.clock() + ret = func(*args, **kwargs) + duration = time.clock() - start + if 'num' in kwargs: + count = kwargs['num'] + else: + count = args[1] + print('{0} - {1} Requests'.format(func.__name__, count)) + print('Duration = {}'.format(duration)) + print('Rate = {}'.format(count/duration)) + print('') + return ret + return wrapper + + +@timer +def set_str(conn, num, pipeline_size, data_size): + if pipeline_size > 1: + conn = conn.pipeline() + + format_str = '{:0<%d}' % data_size + set_data = format_str.format('a') + for i in range(num): + conn.set('set_str:%d' % i, set_data) + if pipeline_size > 1 and i % pipeline_size == 0: + conn.execute() + + if pipeline_size > 1: + conn.execute() + + +@timer +def set_int(conn, num, pipeline_size, data_size): + if pipeline_size > 1: + conn = conn.pipeline() + + format_str = '{:0<%d}' % data_size + set_data = int(format_str.format('1')) + for i in range(num): + conn.set('set_int:%d' % i, set_data) + if pipeline_size > 1 and i % pipeline_size == 0: + conn.execute() + + if pipeline_size > 1: + conn.execute() + + +@timer +def get_str(conn, num, pipeline_size, data_size): + if pipeline_size > 1: + conn = conn.pipeline() + + for i in range(num): + conn.get('set_str:%d' % i) + if pipeline_size > 1 and i % pipeline_size == 0: + conn.execute() + + if pipeline_size > 1: + conn.execute() + + +@timer +def get_int(conn, num, pipeline_size, data_size): + if pipeline_size > 1: + conn = conn.pipeline() + + for i in range(num): + conn.get('set_int:%d' % i) + if pipeline_size > 1 and i % pipeline_size == 0: + conn.execute() + + if pipeline_size > 1: + conn.execute() + + +@timer +def incr(conn, num, pipeline_size, *args, **kwargs): + if pipeline_size > 1: + conn = conn.pipeline() + + for i in range(num): + conn.incr('incr_key') + if pipeline_size > 1 and i % pipeline_size == 0: + conn.execute() + + if pipeline_size > 1: + conn.execute() + + +@timer +def lpush(conn, num, pipeline_size, data_size): + if pipeline_size > 1: + conn = conn.pipeline() + + format_str = '{:0<%d}' % data_size + set_data = int(format_str.format('1')) + for i in range(num): + conn.lpush('lpush_key', set_data) + if pipeline_size > 1 and i % pipeline_size == 0: + conn.execute() + + if pipeline_size > 1: + conn.execute() + + +@timer +def lrange_300(conn, num, pipeline_size, data_size): + if pipeline_size > 1: + conn = conn.pipeline() + + for i in range(num): + conn.lrange('lpush_key', i, i+300) + if pipeline_size > 1 and i % pipeline_size == 0: + conn.execute() + + if pipeline_size > 1: + conn.execute() + + +@timer +def lpop(conn, num, pipeline_size, data_size): + if pipeline_size > 1: + conn = conn.pipeline() + for i in range(num): + conn.lpop('lpush_key') + if pipeline_size > 1 and i % pipeline_size == 0: + conn.execute() + if pipeline_size > 1: + conn.execute() + + +@timer +def hmset(conn, num, pipeline_size, data_size): + if pipeline_size > 1: + conn = conn.pipeline() + + set_data = {'str_value': 'string', + 'int_value': 123456, + 'long_value': long(123456), + 'float_value': 123456.0} + for i in range(num): + conn.hmset('hmset_key', set_data) + if pipeline_size > 1 and i % pipeline_size == 0: + conn.execute() + + if pipeline_size > 1: + conn.execute() + +if __name__ == '__main__': + run() diff --git a/redis/client.py b/redis/client.py index d6bd5c3275..548b79ccee 100755 --- a/redis/client.py +++ b/redis/client.py @@ -787,7 +787,8 @@ def slaveof(self, host=None, port=None): instance is promoted to a master instead. """ if host is None and port is None: - return self.execute_command('SLAVEOF', Token('NO'), Token('ONE')) + return self.execute_command('SLAVEOF', Token.get_token('NO'), + Token.get_token('ONE')) return self.execute_command('SLAVEOF', host, port) def slowlog_get(self, num=None): @@ -1362,10 +1363,10 @@ def sort(self, name, start=None, num=None, by=None, get=None, pieces = [name] if by is not None: - pieces.append(Token('BY')) + pieces.append(Token.get_token('BY')) pieces.append(by) if start is not None and num is not None: - pieces.append(Token('LIMIT')) + pieces.append(Token.get_token('LIMIT')) pieces.append(start) pieces.append(num) if get is not None: @@ -1374,18 +1375,18 @@ def sort(self, name, start=None, num=None, by=None, get=None, # values. We can't just iterate blindly because strings are # iterable. if isinstance(get, basestring): - pieces.append(Token('GET')) + pieces.append(Token.get_token('GET')) pieces.append(get) else: for g in get: - pieces.append(Token('GET')) + pieces.append(Token.get_token('GET')) pieces.append(g) if desc: - pieces.append(Token('DESC')) + pieces.append(Token.get_token('DESC')) if alpha: - pieces.append(Token('ALPHA')) + pieces.append(Token.get_token('ALPHA')) if store is not None: - pieces.append(Token('STORE')) + pieces.append(Token.get_token('STORE')) pieces.append(store) if groups: @@ -1409,9 +1410,9 @@ def scan(self, cursor=0, match=None, count=None): """ pieces = [cursor] if match is not None: - pieces.extend([Token('MATCH'), match]) + pieces.extend([Token.get_token('MATCH'), match]) if count is not None: - pieces.extend([Token('COUNT'), count]) + pieces.extend([Token.get_token('COUNT'), count]) return self.execute_command('SCAN', *pieces) def scan_iter(self, match=None, count=None): @@ -1440,9 +1441,9 @@ def sscan(self, name, cursor=0, match=None, count=None): """ pieces = [name, cursor] if match is not None: - pieces.extend([Token('MATCH'), match]) + pieces.extend([Token.get_token('MATCH'), match]) if count is not None: - pieces.extend([Token('COUNT'), count]) + pieces.extend([Token.get_token('COUNT'), count]) return self.execute_command('SSCAN', *pieces) def sscan_iter(self, name, match=None, count=None): @@ -1472,9 +1473,9 @@ def hscan(self, name, cursor=0, match=None, count=None): """ pieces = [name, cursor] if match is not None: - pieces.extend([Token('MATCH'), match]) + pieces.extend([Token.get_token('MATCH'), match]) if count is not None: - pieces.extend([Token('COUNT'), count]) + pieces.extend([Token.get_token('COUNT'), count]) return self.execute_command('HSCAN', *pieces) def hscan_iter(self, name, match=None, count=None): @@ -1507,9 +1508,9 @@ def zscan(self, name, cursor=0, match=None, count=None, """ pieces = [name, cursor] if match is not None: - pieces.extend([Token('MATCH'), match]) + pieces.extend([Token.get_token('MATCH'), match]) if count is not None: - pieces.extend([Token('COUNT'), count]) + pieces.extend([Token.get_token('COUNT'), count]) options = {'score_cast_func': score_cast_func} return self.execute_command('ZSCAN', *pieces, **options) @@ -1685,7 +1686,7 @@ def zrange(self, name, start, end, desc=False, withscores=False, score_cast_func) pieces = ['ZRANGE', name, start, end] if withscores: - pieces.append(Token('WITHSCORES')) + pieces.append(Token.get_token('WITHSCORES')) options = { 'withscores': withscores, 'score_cast_func': score_cast_func @@ -1705,7 +1706,7 @@ def zrangebylex(self, name, min, max, start=None, num=None): raise RedisError("``start`` and ``num`` must both be specified") pieces = ['ZRANGEBYLEX', name, min, max] if start is not None and num is not None: - pieces.extend([Token('LIMIT'), start, num]) + pieces.extend([Token.get_token('LIMIT'), start, num]) return self.execute_command(*pieces) def zrevrangebylex(self, name, max, min, start=None, num=None): @@ -1721,7 +1722,7 @@ def zrevrangebylex(self, name, max, min, start=None, num=None): raise RedisError("``start`` and ``num`` must both be specified") pieces = ['ZREVRANGEBYLEX', name, max, min] if start is not None and num is not None: - pieces.extend([Token('LIMIT'), start, num]) + pieces.extend([Token.get_token('LIMIT'), start, num]) return self.execute_command(*pieces) def zrangebyscore(self, name, min, max, start=None, num=None, @@ -1743,9 +1744,9 @@ def zrangebyscore(self, name, min, max, start=None, num=None, raise RedisError("``start`` and ``num`` must both be specified") pieces = ['ZRANGEBYSCORE', name, min, max] if start is not None and num is not None: - pieces.extend([Token('LIMIT'), start, num]) + pieces.extend([Token.get_token('LIMIT'), start, num]) if withscores: - pieces.append(Token('WITHSCORES')) + pieces.append(Token.get_token('WITHSCORES')) options = { 'withscores': withscores, 'score_cast_func': score_cast_func @@ -1803,7 +1804,7 @@ def zrevrange(self, name, start, end, withscores=False, """ pieces = ['ZREVRANGE', name, start, end] if withscores: - pieces.append(Token('WITHSCORES')) + pieces.append(Token.get_token('WITHSCORES')) options = { 'withscores': withscores, 'score_cast_func': score_cast_func @@ -1829,9 +1830,9 @@ def zrevrangebyscore(self, name, max, min, start=None, num=None, raise RedisError("``start`` and ``num`` must both be specified") pieces = ['ZREVRANGEBYSCORE', name, max, min] if start is not None and num is not None: - pieces.extend([Token('LIMIT'), start, num]) + pieces.extend([Token.get_token('LIMIT'), start, num]) if withscores: - pieces.append(Token('WITHSCORES')) + pieces.append(Token.get_token('WITHSCORES')) options = { 'withscores': withscores, 'score_cast_func': score_cast_func @@ -1865,10 +1866,10 @@ def _zaggregate(self, command, dest, keys, aggregate=None): weights = None pieces.extend(keys) if weights: - pieces.append(Token('WEIGHTS')) + pieces.append(Token.get_token('WEIGHTS')) pieces.extend(weights) if aggregate: - pieces.append(Token('AGGREGATE')) + pieces.append(Token.get_token('AGGREGATE')) pieces.append(aggregate) return self.execute_command(*pieces) diff --git a/redis/connection.py b/redis/connection.py index f0e28e3c5a..27db42a80d 100755 --- a/redis/connection.py +++ b/redis/connection.py @@ -65,10 +65,27 @@ class Token(object): hard-coded arguments are wrapped in this class so we know not to apply and encoding rules on them. """ + + _cache = {} + + @classmethod + def get_token(cls, value): + "Gets a cached token object or creates a new one if not already cached" + + # Use try/except because after running for a short time most tokens + # should already be cached + try: + return cls._cache[value] + except KeyError: + token = Token(value) + cls._cache[value] = token + return token + def __init__(self, value): if isinstance(value, Token): value = value.value self.value = value + self.encoded_value = b(value) def __repr__(self): return self.value @@ -586,7 +603,7 @@ def read_response(self): def encode(self, value): "Return a bytestring representation of the value" if isinstance(value, Token): - return b(value.value) + return value.encoded_value elif isinstance(value, bytes): return value elif isinstance(value, (int, long)): @@ -609,9 +626,10 @@ def pack_command(self, *args): # to prevent them from being encoded. command = args[0] if ' ' in command: - args = tuple([Token(s) for s in command.split(' ')]) + args[1:] + args = tuple([Token.get_token(s) + for s in command.split()]) + args[1:] else: - args = (Token(command),) + args[1:] + args = (Token.get_token(command),) + args[1:] buff = SYM_EMPTY.join( (SYM_STAR, b(str(len(args))), SYM_CRLF))