In [53]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
from keras.models import Model
from keras.layers import Input, LSTM, Dense
from keras.optimizers import Adam

import seaborn as sns
sns.set()

# Data 

In [54]:
data = pd.read_csv('D:/IITM/DeepCache/syntheticDataset_O50.csv')
data.head()

Unnamed: 0,object_ID,request_time
0,5,2.961472
1,25,3.274127
2,2,3.785475
3,2,4.455687
4,4,5.288994


In [55]:
data = np.array(data)
req = data[:,0]
time = data[:,1]

batch_size = 20000 # 10% of training data

In [56]:
encoder_input_data = req[:200000].reshape(-1,batch_size,1) # total batches = 10

decoder_target_data = []
for i in range(1,11):
    temp = []
    for j in range(10): # to predict next 10 req for every batch
        temp.append(req[(batch_size*i) + j])
    decoder_target_data.append(temp)    

decoder_target_data = np.array(decoder_target_data).reshape(10,10,1)

print(encoder_input_data.shape,decoder_target_data.shape)

(10, 20000, 1) (10, 10, 1)


In [57]:
decoder_input_data = np.zeros(decoder_target_data.shape)
decoder_input_data[1:,:,0] = decoder_target_data[:-1,:,0]
decoder_input_data[0,0,0] = encoder_input_data[-1,0,0]
decoder_input_data.shape

(10, 10, 1)

In [58]:
decoder_target_data

array([[[ 2.],
        [14.],
        [ 1.],
        [ 3.],
        [13.],
        [ 4.],
        [ 2.],
        [ 1.],
        [14.],
        [ 1.]],

       [[ 3.],
        [38.],
        [16.],
        [ 3.],
        [ 9.],
        [16.],
        [ 1.],
        [ 1.],
        [ 5.],
        [37.]],

       [[43.],
        [19.],
        [46.],
        [38.],
        [38.],
        [39.],
        [ 7.],
        [38.],
        [38.],
        [43.]],

       [[45.],
        [ 7.],
        [22.],
        [45.],
        [21.],
        [49.],
        [ 7.],
        [49.],
        [ 7.],
        [49.]],

       [[49.],
        [39.],
        [43.],
        [43.],
        [ 7.],
        [39.],
        [19.],
        [45.],
        [ 7.],
        [39.]],

       [[10.],
        [10.],
        [39.],
        [30.],
        [21.],
        [19.],
        [ 1.],
        [12.],
        [41.],
        [ 9.]],

       [[45.],
        [30.],
        [ 9.],
        [ 3.],
        [16.],
        [28.]

In [59]:
decoder_input_data

array([[[37.],
        [ 0.],
        [ 0.],
        [ 0.],
        [ 0.],
        [ 0.],
        [ 0.],
        [ 0.],
        [ 0.],
        [ 0.]],

       [[ 2.],
        [14.],
        [ 1.],
        [ 3.],
        [13.],
        [ 4.],
        [ 2.],
        [ 1.],
        [14.],
        [ 1.]],

       [[ 3.],
        [38.],
        [16.],
        [ 3.],
        [ 9.],
        [16.],
        [ 1.],
        [ 1.],
        [ 5.],
        [37.]],

       [[43.],
        [19.],
        [46.],
        [38.],
        [38.],
        [39.],
        [ 7.],
        [38.],
        [38.],
        [43.]],

       [[45.],
        [ 7.],
        [22.],
        [45.],
        [21.],
        [49.],
        [ 7.],
        [49.],
        [ 7.],
        [49.]],

       [[49.],
        [39.],
        [43.],
        [43.],
        [ 7.],
        [39.],
        [19.],
        [45.],
        [ 7.],
        [39.]],

       [[10.],
        [10.],
        [39.],
        [30.],
        [21.],
        [19.]

# Model

In [60]:
latent_dim = 128 # LSTM hidden units
dropout = 0

In [61]:
encoder_inputs = Input(shape=(None, 1)) 
encoder = LSTM(latent_dim, dropout=dropout, return_state=True)
encoder_outputs, state_h, state_c = encoder(encoder_inputs)
encoder_states = [state_h, state_c]

In [62]:
decoder_inputs = Input(shape=(None, 1)) 
decoder_lstm = LSTM(latent_dim, dropout=dropout, return_sequences=True, return_state=True)
decoder_outputs, _, _ = decoder_lstm(decoder_inputs,
                                     initial_state=encoder_states)


In [63]:
decoder_dense = Dense(1) # 1 continuous output at each timestep
decoder_outputs = decoder_dense(decoder_outputs)


In [64]:
model = Model([encoder_inputs, decoder_inputs], decoder_outputs)

In [65]:
model.summary()


__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_9 (InputLayer)            (None, None, 1)      0                                            
__________________________________________________________________________________________________
input_10 (InputLayer)           (None, None, 1)      0                                            
__________________________________________________________________________________________________
lstm_3 (LSTM)                   [(None, 128), (None, 66560       input_9[0][0]                    
__________________________________________________________________________________________________
lstm_4 (LSTM)                   [(None, None, 128),  66560       input_10[0][0]                   
                                                                 lstm_3[0][1]                     
          

# Compile The Model

In [66]:
model.compile(Adam(), loss='mean_absolute_error')
history = model.fit([encoder_input_data, decoder_input_data], decoder_target_data,batch_size=batch_size,epochs=30)

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


# Testing The Model 

In [67]:
encoder_model = Model(encoder_inputs, encoder_states)
pred_steps = 10
# A modified version of the decoding stage that takes in predicted target inputs
# and encoded state vectors, returning predicted target outputs and decoder state vectors.
# We need to hang onto these state vectors to run the next step of the inference loop.
decoder_state_input_h = Input(shape=(latent_dim,))
decoder_state_input_c = Input(shape=(latent_dim,))
decoder_states_inputs = [decoder_state_input_h, decoder_state_input_c]

decoder_outputs, state_h, state_c = decoder_lstm(decoder_inputs, initial_state=decoder_states_inputs)
decoder_states = [state_h, state_c]

decoder_outputs = decoder_dense(decoder_outputs)
decoder_model = Model([decoder_inputs] + decoder_states_inputs,
                      [decoder_outputs] + decoder_states)

def decode_sequence(input_seq):
    
    # Encode the input as state vectors.
    states_value = encoder_model.predict(input_seq)

    # Generate empty target sequence of length 1.
    target_seq = np.zeros((1, 1, 1))
    
    # Populate the first target sequence with end of encoding series pageviews
    target_seq[0, 0, 0] = input_seq[0, -1, 0]

    # Sampling loop for a batch of sequences - we will fill decoded_seq with predictions
    # (to simplify, here we assume a batch of size 1).

    decoded_seq = np.zeros((1,pred_steps,1))
    for i in range(pred_steps):
        
        output, h, c = decoder_model.predict([target_seq] + states_value)
        
        decoded_seq[0,i,0] = output[0,0,0]

        # Update the target sequence (of length 1).
        target_seq = np.zeros((1, 1, 1))
        target_seq[0, 0, 0] = output[0,0,0]

        # Update states
        states_value = [h, c]

    return decoded_seq

In [80]:
sample = req[200000:220000].reshape(1,20000,1)


In [81]:
prediction = decode_sequence(sample)
prediction

array([[[ 8.43196487],
        [ 9.91250706],
        [10.57084465],
        [10.75850582],
        [10.79717255],
        [10.78395462],
        [10.73087311],
        [10.66273499],
        [10.59674835],
        [10.53757477]]])

In [82]:
req[220001:220010]

array([17., 35., 50.,  8., 12.,  8., 13., 34., 50.])

In [89]:
# to find the popularity of predicted req
def fre(id):
    freq = 0
    for i in range(220001,len(req)):
        if req[i] == id:
            freq += 1
    return freq              

In [90]:
fre(8)

4085

In [91]:
fre(9)

706

In [None]:
frq(10)

In [92]:
fre(11)

996

In [None]:
freq = []
for i in range(len(req)):
    freq.append(fre(req[i]))
    

In [None]:
freq = np.array(freq)

In [None]:
print(np.min(freq),np.max(freq))