In [1]:
import pandas as pd
from matplotlib import pyplot as plt
import numpy as np
from keras.models import Sequential, Model, load_model
from keras.layers import Input, Activation, GRU, Dense
from sklearn.metrics import mean_squared_error as mse
from sklearn.preprocessing import scale, StandardScaler, RobustScaler
from collections import OrderedDict, defaultdict, Counter
plt.rcParams['figure.figsize'] = [10, 8]

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


## Traditional Caching Algorithms

### LRU

In [2]:
class LruContentStore():
    def __init__(self, size):
        self.size = size
        self.store = OrderedDict()
        self.hits = 0
        self.misses = 0

    def add(self, item):
        if self.size:
            if(len(self.store) == self.size):
                self.store.popitem(last=False)
            self.store[item] = item

    def get(self, item):
        try:
            cached_item = self.store.pop(item)
            self.store[item] = cached_item
            self.hits += 1
            return cached_item
        except:
            self.misses += 1
            return None
    
    def refresh(self):
        pass

### LFU

In [3]:
class LfuContentStore():
    def __init__(self, size):
        self.size = size
        self.store = {} # {'name', [item, freq]}
        self.hits = 0
        self.misses = 0
    
    def add(self, item):
        if self.size:
            if len(self.store) == self.size:
                min_key = None
                min_freq = None
                for key in self.store.keys():
                    if min_freq == None or self.store[key][1] < min_freq:
                        min_freq = self.store[key][1]
                        min_key = key
                self.store.pop(min_key)
            self.store[item] = [item, 1]

    def get(self, item):
        try:
            cached_item = self.store[item][0]
            self.store[item][1] += 1
            self.hits += 1
            return cached_item
        except:
            self.misses += 1
            return None
    
    def refresh(self):
        pass

### Random

In [4]:
class RandomContentStore():
    def __init__(self, size):
        self.rng = np.random.RandomState(123)
        self.size = size
        self.store = {}
        self.hits = 0
        self.misses = 0

    def add(self, item):
        if self.size:
            if len(self.store) == self.size:
                self.store.pop(self.rng.choice(list(self.store.keys())))
            self.store[item] = item
    
    def get(self, item):
        try:
            cached_item = self.store[item]
            self.hits += 1
            return cached_item
        except:
            self.misses += 1
            return None
    
    def refresh(self):
        pass

### Rolling Moving Average

In [5]:
class RMAContentStore():
    def __init__(self, size):
        self.size = size
        self.store = {}
        self.hits = 0
        self.misses = 0
        self.history = {}
        self.ranking = defaultdict(int)
        self.interval_count = 0
        self.window = 7
        
    def add(self, item):
        if item not in self.history:
            self.history[item] = [0 for _ in range(self.window)]
        self.history[item][self.interval_count % 7] += 1
        if self.size:
            if len(self.store) == self.size:
                min_key, min_rank = self.get_min()
                if min_rank != None and min_rank < self.ranking[item]:
                    self.store.pop(min_key)
                    self.store[item] = item
            else:
                self.store[item] = item
    
    def get_min(self):
        min_key = None
        min_rank = None
        for key in self.store.keys():
            if min_key == None or self.ranking[key] < min_rank:
                min_rank = self.ranking[key]
                min_key = key
        return min_key, min_rank
    
    def refresh(self):
        self.interval_count += 1
        for key in self.ranking.keys():
            if key not in self.history:
                self.history[key] = [0 for _ in range(self.window)]
            self.ranking[key] = sum(self.history[key])/min(self.interval_count, self.window)
        
    def get(self, item):
        try:
            cached_item = self.store[item]
            self.hits += 1
            return cached_item
        except:
            self.misses += 1
            return None

### Exponential Moving Average

In [6]:
class EMAContentStore():
    def __init__(self, size):
        self.size = size
        self.store = {}
        self.hits = 0
        self.misses = 0
        self.history = defaultdict(int)
        self.ranking = defaultdict(int)
        self.alpha = 0.1
        
    def add(self, item):
        self.history[item] += 1
        if self.size:
            if len(self.store) == self.size:
                min_key, min_rank = self.get_min()
                if min_rank != None and min_rank < self.ranking[item]:
                    self.store.pop(min_key)
                    self.store[item] = item
            else:
                self.store[item] = item
    
    def get_min(self):
        min_key = None
        min_rank = None
        for key in self.store.keys():
            if min_key == None or self.ranking[key] < min_rank:
                min_rank = self.ranking[key]
                min_key = key
        return min_key, min_rank
    
    def refresh(self):
        for key in self.ranking.keys():
            self.ranking[key] = self.ranking[key] + self.alpha*(self.history[key]-self.ranking[key])
        self.history = defaultdict(int)
        
    def get(self, item):
        try:
            cached_item = self.store[item]
            self.hits += 1
            return cached_item
        except:
            self.misses += 1
            return None

### One-day Lookback

In [7]:
class ODContentStore():
    def __init__(self, size):
        self.size = size
        self.store = {}
        self.hits = 0
        self.misses = 0
        self.history = defaultdict(int)
        self.ranking = {}
        
    def add(self, item):
        self.history[item] += 1
        if self.size:
            if len(self.store) == self.size:
                min_key, min_rank = self.get_min()
                if min_rank != None and min_rank < self.ranking[item]:
                    self.store.pop(min_key)
                    self.store[item] = item
            else:
                 self.store[item] = item

    def get_min(self):
        min_key = None
        min_rank = None
        for key in self.store.keys():
            if min_key == None or self.ranking[key] < min_rank:
                min_rank = self.ranking[key]
                min_key = key
        return min_key, min_rank
                    
    def refresh(self):
        self.ranking = self.history.copy()
        self.history = defaultdict(int)
        
    def get(self, item):
        try:
            cached_item = self.store[item]
            self.hits += 1
            return cached_item
        except:
            self.misses += 1
            return None

## Machine Learning Models for Caching

### Simple GRU

In [8]:
class GRUContentStore():
    def __init__(self, size):
        self.model = load_model('simple_gru.h5') # load pretrained model
        self.size = size
        self.store = {}
        self.hits = 0
        self.misses = 0
        self.history = {}
        self.ranking = defaultdict(int)
        self.interval_count = 0
        self.window = 7
        
    def add(self, item):
        if item not in self.history:
            self.history[item] = np.zeros(self.window)
        self.history[item, self.interval_count % 7] += 1
        if self.size:
            if len(self.store) == self.size:
                min_key, min_rank = self.get_min()
                if min_rank != None and min_rank < self.ranking[item]:
                    self.store.pop(min_key)
                    self.store[item] = item
            else:
                self.store[item] = item
    
    def get_min(self):
        min_key = None
        min_rank = None
        for key in self.store.keys():
            if min_key == None or self.ranking[key] < min_rank:
                min_rank = self.ranking[key]
                min_key = key
        return min_key, min_rank
    
    def refresh(self):
        self.interval_count += 1
        for key in self.ranking.keys():
            if key not in self.history:
                self.history[key] = np.zeros(self.window)
            self.ranking[key] = self.model.predict(self.history[key])
        
    def get(self, item):
        try:
            cached_item = self.store[item]
            self.hits += 1
            return cached_item
        except:
            self.misses += 1
            return None

### Simple LSTM

### Init Content Stores

In [10]:
test_set = np.load('test_set.npy')

In [12]:
cache_size = int(0.01 * test_set.shape[0])
lru = LruContentStore(cache_size)
lfu  = LfuContentStore(cache_size)
rand = RandomContentStore(cache_size)
rma = RMAContentStore(cache_size)
ema = EMAContentStore(cache_size)
od = ODContentStore(cache_size)
gru = GRUContentStore(cache_size)
cses = [lru, lfu, rand, rma, ema, od, gru]

InvalidArgumentError: No OpKernel was registered to support Op 'CudnnRNN' with these attrs.  Registered devices: [CPU,XLA_CPU], Registered kernels:
  <no registered kernels>

	 [[Node: cu_dnnlstm_1/CudnnRNN = CudnnRNN[T=DT_FLOAT, direction="unidirectional", dropout=0, input_mode="linear_input", is_training=true, rnn_mode="lstm", seed=87654321, seed2=0](cu_dnnlstm_1/transpose, cu_dnnlstm_1/ExpandDims_1, cu_dnnlstm_1/ExpandDims_2, cu_dnnlstm_1/concat_1)]]

Caused by op 'cu_dnnlstm_1/CudnnRNN', defined at:
  File "/home/tavish/miniconda3/envs/py35/lib/python3.5/runpy.py", line 193, in _run_module_as_main
    "__main__", mod_spec)
  File "/home/tavish/miniconda3/envs/py35/lib/python3.5/runpy.py", line 85, in _run_code
    exec(code, run_globals)
  File "/home/tavish/miniconda3/envs/py35/lib/python3.5/site-packages/ipykernel_launcher.py", line 16, in <module>
    app.launch_new_instance()
  File "/home/tavish/miniconda3/envs/py35/lib/python3.5/site-packages/traitlets/config/application.py", line 658, in launch_instance
    app.start()
  File "/home/tavish/miniconda3/envs/py35/lib/python3.5/site-packages/ipykernel/kernelapp.py", line 486, in start
    self.io_loop.start()
  File "/home/tavish/miniconda3/envs/py35/lib/python3.5/site-packages/tornado/platform/asyncio.py", line 127, in start
    self.asyncio_loop.run_forever()
  File "/home/tavish/miniconda3/envs/py35/lib/python3.5/asyncio/base_events.py", line 421, in run_forever
    self._run_once()
  File "/home/tavish/miniconda3/envs/py35/lib/python3.5/asyncio/base_events.py", line 1425, in _run_once
    handle._run()
  File "/home/tavish/miniconda3/envs/py35/lib/python3.5/asyncio/events.py", line 127, in _run
    self._callback(*self._args)
  File "/home/tavish/miniconda3/envs/py35/lib/python3.5/site-packages/tornado/platform/asyncio.py", line 117, in _handle_events
    handler_func(fileobj, events)
  File "/home/tavish/miniconda3/envs/py35/lib/python3.5/site-packages/tornado/stack_context.py", line 276, in null_wrapper
    return fn(*args, **kwargs)
  File "/home/tavish/miniconda3/envs/py35/lib/python3.5/site-packages/zmq/eventloop/zmqstream.py", line 450, in _handle_events
    self._handle_recv()
  File "/home/tavish/miniconda3/envs/py35/lib/python3.5/site-packages/zmq/eventloop/zmqstream.py", line 480, in _handle_recv
    self._run_callback(callback, msg)
  File "/home/tavish/miniconda3/envs/py35/lib/python3.5/site-packages/zmq/eventloop/zmqstream.py", line 432, in _run_callback
    callback(*args, **kwargs)
  File "/home/tavish/miniconda3/envs/py35/lib/python3.5/site-packages/tornado/stack_context.py", line 276, in null_wrapper
    return fn(*args, **kwargs)
  File "/home/tavish/miniconda3/envs/py35/lib/python3.5/site-packages/ipykernel/kernelbase.py", line 283, in dispatcher
    return self.dispatch_shell(stream, msg)
  File "/home/tavish/miniconda3/envs/py35/lib/python3.5/site-packages/ipykernel/kernelbase.py", line 233, in dispatch_shell
    handler(stream, idents, msg)
  File "/home/tavish/miniconda3/envs/py35/lib/python3.5/site-packages/ipykernel/kernelbase.py", line 399, in execute_request
    user_expressions, allow_stdin)
  File "/home/tavish/miniconda3/envs/py35/lib/python3.5/site-packages/ipykernel/ipkernel.py", line 208, in do_execute
    res = shell.run_cell(code, store_history=store_history, silent=silent)
  File "/home/tavish/miniconda3/envs/py35/lib/python3.5/site-packages/ipykernel/zmqshell.py", line 537, in run_cell
    return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
  File "/home/tavish/miniconda3/envs/py35/lib/python3.5/site-packages/IPython/core/interactiveshell.py", line 2662, in run_cell
    raw_cell, store_history, silent, shell_futures)
  File "/home/tavish/miniconda3/envs/py35/lib/python3.5/site-packages/IPython/core/interactiveshell.py", line 2785, in _run_cell
    interactivity=interactivity, compiler=compiler, result=result)
  File "/home/tavish/miniconda3/envs/py35/lib/python3.5/site-packages/IPython/core/interactiveshell.py", line 2903, in run_ast_nodes
    if self.run_code(code, result):
  File "/home/tavish/miniconda3/envs/py35/lib/python3.5/site-packages/IPython/core/interactiveshell.py", line 2963, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-12-eecd875fb94c>", line 8, in <module>
    gru = GRUContentStore(cache_size)
  File "<ipython-input-8-9c20c767125c>", line 3, in __init__
    self.model = load_model('simple_gru.h5') # load pretrained model
  File "/home/tavish/miniconda3/envs/py35/lib/python3.5/site-packages/keras/engine/saving.py", line 419, in load_model
    model = _deserialize_model(f, custom_objects, compile)
  File "/home/tavish/miniconda3/envs/py35/lib/python3.5/site-packages/keras/engine/saving.py", line 225, in _deserialize_model
    model = model_from_config(model_config, custom_objects=custom_objects)
  File "/home/tavish/miniconda3/envs/py35/lib/python3.5/site-packages/keras/engine/saving.py", line 458, in model_from_config
    return deserialize(config, custom_objects=custom_objects)
  File "/home/tavish/miniconda3/envs/py35/lib/python3.5/site-packages/keras/layers/__init__.py", line 55, in deserialize
    printable_module_name='layer')
  File "/home/tavish/miniconda3/envs/py35/lib/python3.5/site-packages/keras/utils/generic_utils.py", line 145, in deserialize_keras_object
    list(custom_objects.items())))
  File "/home/tavish/miniconda3/envs/py35/lib/python3.5/site-packages/keras/engine/sequential.py", line 301, in from_config
    model.add(layer)
  File "/home/tavish/miniconda3/envs/py35/lib/python3.5/site-packages/keras/engine/sequential.py", line 165, in add
    layer(x)
  File "/home/tavish/miniconda3/envs/py35/lib/python3.5/site-packages/keras/layers/recurrent.py", line 532, in __call__
    return super(RNN, self).__call__(inputs, **kwargs)
  File "/home/tavish/miniconda3/envs/py35/lib/python3.5/site-packages/keras/engine/base_layer.py", line 457, in __call__
    output = self.call(inputs, **kwargs)
  File "/home/tavish/miniconda3/envs/py35/lib/python3.5/site-packages/keras/layers/cudnn_recurrent.py", line 90, in call
    output, states = self._process_batch(inputs, initial_state)
  File "/home/tavish/miniconda3/envs/py35/lib/python3.5/site-packages/keras/layers/cudnn_recurrent.py", line 517, in _process_batch
    is_training=True)
  File "/home/tavish/miniconda3/envs/py35/lib/python3.5/site-packages/tensorflow/contrib/cudnn_rnn/python/ops/cudnn_rnn_ops.py", line 1544, in __call__
    input_data, input_h, input_c, params, is_training=is_training)
  File "/home/tavish/miniconda3/envs/py35/lib/python3.5/site-packages/tensorflow/contrib/cudnn_rnn/python/ops/cudnn_rnn_ops.py", line 1435, in __call__
    seed=self._seed)
  File "/home/tavish/miniconda3/envs/py35/lib/python3.5/site-packages/tensorflow/contrib/cudnn_rnn/python/ops/cudnn_rnn_ops.py", line 922, in _cudnn_rnn
    outputs, output_h, output_c, _ = gen_cudnn_rnn_ops.cudnn_rnn(**args)
  File "/home/tavish/miniconda3/envs/py35/lib/python3.5/site-packages/tensorflow/python/ops/gen_cudnn_rnn_ops.py", line 115, in cudnn_rnn
    is_training=is_training, name=name)
  File "/home/tavish/miniconda3/envs/py35/lib/python3.5/site-packages/tensorflow/python/framework/op_def_library.py", line 787, in _apply_op_helper
    op_def=op_def)
  File "/home/tavish/miniconda3/envs/py35/lib/python3.5/site-packages/tensorflow/python/util/deprecation.py", line 454, in new_func
    return func(*args, **kwargs)
  File "/home/tavish/miniconda3/envs/py35/lib/python3.5/site-packages/tensorflow/python/framework/ops.py", line 3155, in create_op
    op_def=op_def)
  File "/home/tavish/miniconda3/envs/py35/lib/python3.5/site-packages/tensorflow/python/framework/ops.py", line 1717, in __init__
    self._traceback = tf_stack.extract_stack()

InvalidArgumentError (see above for traceback): No OpKernel was registered to support Op 'CudnnRNN' with these attrs.  Registered devices: [CPU,XLA_CPU], Registered kernels:
  <no registered kernels>

	 [[Node: cu_dnnlstm_1/CudnnRNN = CudnnRNN[T=DT_FLOAT, direction="unidirectional", dropout=0, input_mode="linear_input", is_training=true, rnn_mode="lstm", seed=87654321, seed2=0](cu_dnnlstm_1/transpose, cu_dnnlstm_1/ExpandDims_1, cu_dnnlstm_1/ExpandDims_2, cu_dnnlstm_1/concat_1)]]


### Initialize Statistical Baselines with Training Set

In [134]:
train_set = np.load('train_set.npy')

In [135]:
# initialize OD with training data
yesterday = train_set[:, -1, 0]
for i, val in enumerate(yesterday):
    od.ranking[i] = val

In [136]:
# initialize RMA with training data
last_week = train_set[:, -7:, 0]
last_week_avg = np.mean(last_week, axis=1)
# set rankings
for i, val in enumerate(last_week_avg):
    rma.ranking[i] = val
# set history
for item in range(last_week.shape[0]):
    rma.history[item] = last_week[item, :, :]

In [None]:
# initialize EMA with training data
history = train_set[:, :, 0]
for i in range(history.shape[0]):
    ema.ranking[i] = history[i, 0]
for i in range(1, history.shape[1]):
    recent_history = history[:, i]
    for j in range(history.shape[0]): 
        ema.ranking[j] = ema.ranking[j] + ema.alpha*(ema.ranking[j]-recent_history[j])

In [None]:
# initialize Simple GRU with training data
last_week = train_set[:, -7:, 0]
for item in range(last_week.shape[0]):
    gru.history[item] = last_week[item, :, :]
    gru.ranking[item] = gru.model.predict(gru.history[item])

### Run Tests with Test Set

In [139]:
seed = 123
for cs in cses:
    np.random.seed(123)
    for i in range(test_set.shape[1]):
        daily_reqs = test_set[:, i, 0]
        flat_daily_reqs = np.repeat(np.arange(test_set.shape[0], dtype='float'), 
                                    daily_reqs.astype('int'), 
                                    axis=0)
        np.random.shuffle(flat_daily_reqs)
        for req in flat_daily_reqs:
            if cs.get(req) == None:
                cs.add(req)
        cs.refresh()
    print(cs.hits/(cs.hits+cs.misses))

0.3692767690443395
0.3555356080100963
0.3380503293187446
0.4154067185485977
0.3485472449641437
0.23434120473035538
