From 8d8e86201ab589a3b97f4e2671d8b530e766a09b Mon Sep 17 00:00:00 2001
From: Chris Simpson <cjsimpson@gmail.com>
Date: Thu, 9 Jun 2016 21:17:15 -0400
Subject: [PATCH] Cache Token objects to improve performance.

The Token class now contains a cache of tokens, and each token stores its encoded value.
In Python 3 this prevents encoding the Token commands (get, set, incr, etc...) repeatly.
There is also a smaller performance improvement by creating fewer objects.

A very basic benchmark script was also added.
---
 benchmarks/basic_operations.py | 197 +++++++++++++++++++++++++++++++++
 redis/client.py                |  53 ++++-----
 redis/connection.py            |  24 +++-
 3 files changed, 245 insertions(+), 29 deletions(-)
 create mode 100644 benchmarks/basic_operations.py
diff --git a/benchmarks/basic_operations.py b/benchmarks/basic_operations.py
new file mode 100644
index 0000000000..cb7f3f2c74
--- /dev/null
+++ b/benchmarks/basic_operations.py
@@ -0,0 +1,197 @@
+from __future__ import print_function
+import redis
+import time
+import sys
+from functools import wraps
+from argparse import ArgumentParser
+
+if sys.version_info[0] == 3:
+    long = int
+
+def parse_args():
+    parser = ArgumentParser()
+    parser.add_argument('-n',
+                        type=int,
+                        help='Total number of requests (default 100000)',
+                        default=100000)
+    parser.add_argument('-P',
+                        type=int,
+                        help=('Pipeline <numreq> requests.'
+                              ' Default 1 (no pipeline).'),
+                        default=1)
+    parser.add_argument('-s',
+                        type=int,
+                        help='Data size of SET/GET value in bytes (default 2)',
+                        default=2)
+
+    args = parser.parse_args()
+    return args
+
+def run():
+    args = parse_args()
+    r = redis.StrictRedis()
+    r.flushall()
+    set_str(conn=r, num=args.n, pipeline_size=args.P, data_size=args.s)
+    set_int(conn=r, num=args.n, pipeline_size=args.P, data_size=args.s)
+    get_str(conn=r, num=args.n, pipeline_size=args.P, data_size=args.s)
+    get_int(conn=r, num=args.n, pipeline_size=args.P, data_size=args.s)
+    incr(conn=r, num=args.n, pipeline_size=args.P, data_size=args.s)
+    lpush(conn=r, num=args.n, pipeline_size=args.P, data_size=args.s)
+    lrange_300(conn=r, num=args.n, pipeline_size=args.P, data_size=args.s)
+    lpop(conn=r, num=args.n, pipeline_size=args.P, data_size=args.s)
+    hmset(conn=r, num=args.n, pipeline_size=args.P, data_size=args.s)
+
+
+def timer(func):
+    @wraps(func)
+    def wrapper(*args, **kwargs):
+        start = time.clock()
+        ret = func(*args, **kwargs)
+        duration = time.clock() - start
+        if 'num' in kwargs:
+            count = kwargs['num']
+        else:
+            count = args[1]
+        print('{0} - {1} Requests'.format(func.__name__, count))
+        print('Duration  = {}'.format(duration))
+        print('Rate = {}'.format(count/duration))
+        print('')
+        return ret
+    return wrapper
+
+
+@timer
+def set_str(conn, num, pipeline_size, data_size):
+    if pipeline_size > 1:
+        conn = conn.pipeline()
+
+    format_str = '{:0<%d}' % data_size
+    set_data = format_str.format('a')
+    for i in range(num):
+        conn.set('set_str:%d' % i, set_data)
+        if pipeline_size > 1 and i % pipeline_size == 0:
+            conn.execute()
+
+    if pipeline_size > 1:
+        conn.execute()
+
+
+@timer
+def set_int(conn, num, pipeline_size, data_size):
+    if pipeline_size > 1:
+        conn = conn.pipeline()
+
+    format_str = '{:0<%d}' % data_size
+    set_data = int(format_str.format('1'))
+    for i in range(num):
+        conn.set('set_int:%d' % i, set_data)
+        if pipeline_size > 1 and i % pipeline_size == 0:
+            conn.execute()
+
+    if pipeline_size > 1:
+        conn.execute()
+
+
+@timer
+def get_str(conn, num, pipeline_size, data_size):
+    if pipeline_size > 1:
+        conn = conn.pipeline()
+
+    for i in range(num):
+        conn.get('set_str:%d' % i)
+        if pipeline_size > 1 and i % pipeline_size == 0:
+            conn.execute()
+
+    if pipeline_size > 1:
+        conn.execute()
+
+
+@timer
+def get_int(conn, num, pipeline_size, data_size):
+    if pipeline_size > 1:
+        conn = conn.pipeline()
+
+    for i in range(num):
+        conn.get('set_int:%d' % i)
+        if pipeline_size > 1 and i % pipeline_size == 0:
+            conn.execute()
+
+    if pipeline_size > 1:
+        conn.execute()
+
+
+@timer
+def incr(conn, num, pipeline_size, *args, **kwargs):
+    if pipeline_size > 1:
+        conn = conn.pipeline()
+
+    for i in range(num):
+        conn.incr('incr_key')
+        if pipeline_size > 1 and i % pipeline_size == 0:
+            conn.execute()
+
+    if pipeline_size > 1:
+        conn.execute()
+
+
+@timer
+def lpush(conn, num, pipeline_size, data_size):
+    if pipeline_size > 1:
+        conn = conn.pipeline()
+
+    format_str = '{:0<%d}' % data_size
+    set_data = int(format_str.format('1'))
+    for i in range(num):
+        conn.lpush('lpush_key', set_data)
+        if pipeline_size > 1 and i % pipeline_size == 0:
+            conn.execute()
+
+    if pipeline_size > 1:
+        conn.execute()
+
+
+@timer
+def lrange_300(conn, num, pipeline_size, data_size):
+    if pipeline_size > 1:
+        conn = conn.pipeline()
+
+    for i in range(num):
+        conn.lrange('lpush_key', i, i+300)
+        if pipeline_size > 1 and i % pipeline_size == 0:
+            conn.execute()
+
+    if pipeline_size > 1:
+        conn.execute()
+
+
+@timer
+def lpop(conn, num, pipeline_size, data_size):
+    if pipeline_size > 1:
+        conn = conn.pipeline()
+    for i in range(num):
+        conn.lpop('lpush_key')
+        if pipeline_size > 1 and i % pipeline_size == 0:
+            conn.execute()
+    if pipeline_size > 1:
+        conn.execute()
+
+
+@timer
+def hmset(conn, num, pipeline_size, data_size):
+    if pipeline_size > 1:
+        conn = conn.pipeline()
+
+    set_data = {'str_value': 'string',
+                'int_value': 123456,
+                'long_value': long(123456),
+                'float_value': 123456.0}
+    for i in range(num):
+        conn.hmset('hmset_key', set_data)
+        if pipeline_size > 1 and i % pipeline_size == 0:
+            conn.execute()
+
+    if pipeline_size > 1:
+        conn.execute()
+
+if __name__ == '__main__':
+    run()
diff --git a/redis/client.py b/redis/client.py
index d6bd5c3275..548b79ccee 100755
--- a/redis/client.py
+++ b/redis/client.py
@@ -787,7 +787,8 @@ def slaveof(self, host=None, port=None):
         instance is promoted to a master instead.
         """
         if host is None and port is None:
-            return self.execute_command('SLAVEOF', Token('NO'), Token('ONE'))
+            return self.execute_command('SLAVEOF', Token.get_token('NO'),
+                                        Token.get_token('ONE'))
         return self.execute_command('SLAVEOF', host, port)
 
     def slowlog_get(self, num=None):
@@ -1362,10 +1363,10 @@ def sort(self, name, start=None, num=None, by=None, get=None,
 
         pieces = [name]
         if by is not None:
-            pieces.append(Token('BY'))
+            pieces.append(Token.get_token('BY'))
             pieces.append(by)
         if start is not None and num is not None:
-            pieces.append(Token('LIMIT'))
+            pieces.append(Token.get_token('LIMIT'))
             pieces.append(start)
             pieces.append(num)
         if get is not None:
@@ -1374,18 +1375,18 @@ def sort(self, name, start=None, num=None, by=None, get=None,
             # values. We can't just iterate blindly because strings are
             # iterable.
             if isinstance(get, basestring):
-                pieces.append(Token('GET'))
+                pieces.append(Token.get_token('GET'))
                 pieces.append(get)
             else:
                 for g in get:
-                    pieces.append(Token('GET'))
+                    pieces.append(Token.get_token('GET'))
                     pieces.append(g)
         if desc:
-            pieces.append(Token('DESC'))
+            pieces.append(Token.get_token('DESC'))
         if alpha:
-            pieces.append(Token('ALPHA'))
+            pieces.append(Token.get_token('ALPHA'))
         if store is not None:
-            pieces.append(Token('STORE'))
+            pieces.append(Token.get_token('STORE'))
             pieces.append(store)
 
         if groups:
@@ -1409,9 +1410,9 @@ def scan(self, cursor=0, match=None, count=None):
         """
         pieces = [cursor]
         if match is not None:
-            pieces.extend([Token('MATCH'), match])
+            pieces.extend([Token.get_token('MATCH'), match])
         if count is not None:
-            pieces.extend([Token('COUNT'), count])
+            pieces.extend([Token.get_token('COUNT'), count])
         return self.execute_command('SCAN', *pieces)
 
     def scan_iter(self, match=None, count=None):
@@ -1440,9 +1441,9 @@ def sscan(self, name, cursor=0, match=None, count=None):
         """
         pieces = [name, cursor]
         if match is not None:
-            pieces.extend([Token('MATCH'), match])
+            pieces.extend([Token.get_token('MATCH'), match])
         if count is not None:
-            pieces.extend([Token('COUNT'), count])
+            pieces.extend([Token.get_token('COUNT'), count])
         return self.execute_command('SSCAN', *pieces)
 
     def sscan_iter(self, name, match=None, count=None):
@@ -1472,9 +1473,9 @@ def hscan(self, name, cursor=0, match=None, count=None):
         """
         pieces = [name, cursor]
         if match is not None:
-            pieces.extend([Token('MATCH'), match])
+            pieces.extend([Token.get_token('MATCH'), match])
         if count is not None:
-            pieces.extend([Token('COUNT'), count])
+            pieces.extend([Token.get_token('COUNT'), count])
         return self.execute_command('HSCAN', *pieces)
 
     def hscan_iter(self, name, match=None, count=None):
@@ -1507,9 +1508,9 @@ def zscan(self, name, cursor=0, match=None, count=None,
         """
         pieces = [name, cursor]
         if match is not None:
-            pieces.extend([Token('MATCH'), match])
+            pieces.extend([Token.get_token('MATCH'), match])
         if count is not None:
-            pieces.extend([Token('COUNT'), count])
+            pieces.extend([Token.get_token('COUNT'), count])
         options = {'score_cast_func': score_cast_func}
         return self.execute_command('ZSCAN', *pieces, **options)
 
@@ -1685,7 +1686,7 @@ def zrange(self, name, start, end, desc=False, withscores=False,
                                   score_cast_func)
         pieces = ['ZRANGE', name, start, end]
         if withscores:
-            pieces.append(Token('WITHSCORES'))
+            pieces.append(Token.get_token('WITHSCORES'))
         options = {
             'withscores': withscores,
             'score_cast_func': score_cast_func
@@ -1705,7 +1706,7 @@ def zrangebylex(self, name, min, max, start=None, num=None):
             raise RedisError("``start`` and ``num`` must both be specified")
         pieces = ['ZRANGEBYLEX', name, min, max]
         if start is not None and num is not None:
-            pieces.extend([Token('LIMIT'), start, num])
+            pieces.extend([Token.get_token('LIMIT'), start, num])
         return self.execute_command(*pieces)
 
     def zrevrangebylex(self, name, max, min, start=None, num=None):
@@ -1721,7 +1722,7 @@ def zrevrangebylex(self, name, max, min, start=None, num=None):
             raise RedisError("``start`` and ``num`` must both be specified")
         pieces = ['ZREVRANGEBYLEX', name, max, min]
         if start is not None and num is not None:
-            pieces.extend([Token('LIMIT'), start, num])
+            pieces.extend([Token.get_token('LIMIT'), start, num])
         return self.execute_command(*pieces)
 
     def zrangebyscore(self, name, min, max, start=None, num=None,
@@ -1743,9 +1744,9 @@ def zrangebyscore(self, name, min, max, start=None, num=None,
             raise RedisError("``start`` and ``num`` must both be specified")
         pieces = ['ZRANGEBYSCORE', name, min, max]
         if start is not None and num is not None:
-            pieces.extend([Token('LIMIT'), start, num])
+            pieces.extend([Token.get_token('LIMIT'), start, num])
         if withscores:
-            pieces.append(Token('WITHSCORES'))
+            pieces.append(Token.get_token('WITHSCORES'))
         options = {
             'withscores': withscores,
             'score_cast_func': score_cast_func
@@ -1803,7 +1804,7 @@ def zrevrange(self, name, start, end, withscores=False,
         """
         pieces = ['ZREVRANGE', name, start, end]
         if withscores:
-            pieces.append(Token('WITHSCORES'))
+            pieces.append(Token.get_token('WITHSCORES'))
         options = {
             'withscores': withscores,
             'score_cast_func': score_cast_func
@@ -1829,9 +1830,9 @@ def zrevrangebyscore(self, name, max, min, start=None, num=None,
             raise RedisError("``start`` and ``num`` must both be specified")
         pieces = ['ZREVRANGEBYSCORE', name, max, min]
         if start is not None and num is not None:
-            pieces.extend([Token('LIMIT'), start, num])
+            pieces.extend([Token.get_token('LIMIT'), start, num])
         if withscores:
-            pieces.append(Token('WITHSCORES'))
+            pieces.append(Token.get_token('WITHSCORES'))
         options = {
             'withscores': withscores,
             'score_cast_func': score_cast_func
@@ -1865,10 +1866,10 @@ def _zaggregate(self, command, dest, keys, aggregate=None):
             weights = None
         pieces.extend(keys)
         if weights:
-            pieces.append(Token('WEIGHTS'))
+            pieces.append(Token.get_token('WEIGHTS'))
             pieces.extend(weights)
         if aggregate:
-            pieces.append(Token('AGGREGATE'))
+            pieces.append(Token.get_token('AGGREGATE'))
             pieces.append(aggregate)
         return self.execute_command(*pieces)
 
diff --git a/redis/connection.py b/redis/connection.py
index f0e28e3c5a..27db42a80d 100755
--- a/redis/connection.py
+++ b/redis/connection.py
@@ -65,10 +65,27 @@ class Token(object):
     hard-coded arguments are wrapped in this class so we know not to apply
     and encoding rules on them.
     """
+
+    _cache = {}
+
+    @classmethod
+    def get_token(cls, value):
+        "Gets a cached token object or creates a new one if not already cached"
+
+        # Use try/except because after running for a short time most tokens
+        # should already be cached
+        try:
+            return cls._cache[value]
+        except KeyError:
+            token = Token(value)
+            cls._cache[value] = token
+            return token
+
     def __init__(self, value):
         if isinstance(value, Token):
             value = value.value
         self.value = value
+        self.encoded_value = b(value)
 
     def __repr__(self):
         return self.value
@@ -586,7 +603,7 @@ def read_response(self):
     def encode(self, value):
         "Return a bytestring representation of the value"
         if isinstance(value, Token):
-            return b(value.value)
+            return value.encoded_value
         elif isinstance(value, bytes):
             return value
         elif isinstance(value, (int, long)):
@@ -609,9 +626,10 @@ def pack_command(self, *args):
         # to prevent them from being encoded.
         command = args[0]
         if ' ' in command:
-            args = tuple([Token(s) for s in command.split(' ')]) + args[1:]
+            args = tuple([Token.get_token(s)
+                          for s in command.split()]) + args[1:]
         else:
-            args = (Token(command),) + args[1:]
+            args = (Token.get_token(command),) + args[1:]
 
         buff = SYM_EMPTY.join(
             (SYM_STAR, b(str(len(args))), SYM_CRLF))