In [1]:
from random import randint
from numpy import array
import numpy as np
from keras.models import Model
from keras.layers import Input
from keras.layers import LSTM
from keras.layers import Dense
import matplotlib.pyplot as plt
from keras.optimizers import Adam

%matplotlib inline

Using TensorFlow backend.


DATA

In [0]:
data = []
with open('/content/drive/My Drive/request_seq.txt' , errors='ignore' ) as fin:
    fin.seek(0)
    d = fin.read(100000000) 

In [3]:
#Data is continous sequence.Each segment in the sequence separated by '\n'
#Each segment contains 3 items: number of request, request ID and size of requested file.
d[:100]

'1 778030675 13539\n2 3484637698 17635\n3 1130267045 152\n4 3423744231 616\n5 203613471 121\n6 1342928892 '

In [4]:
#Spliting the continous sequence
data = d.split()
data[:10]

['1',
 '778030675',
 '13539',
 '2',
 '3484637698',
 '17635',
 '3',
 '1130267045',
 '152',
 '4']

In [5]:
del d
len(data)

12722261

In [0]:
#We are interested in number of request and request id.
req = []
for i in range(len(data)):
  r = []
  if (i-1)%3 == 0 and len(req)<2005000:
    r.append(data[i])
    #To convert string into real numbers. 
    temp = 0
    for j in range(len(r[0])):
      temp = temp + int(r[0][j])*pow(10,len(r[0])-j-1)
    req.append(temp)

In [7]:
req = np.array(req)
req.shape

(2005000,)

In [8]:
#To find the range of request id
min = np.min(req)
max = np.max(req)
min,max

(2301, 4294966263)

In [0]:
# To reduce total numbers of ID
id = 50
factor = (max - min)/(id-1)
req1 = []
for i in range(len(req)):
  req1 .append(int(np.divide(req[i],factor)))

In [0]:
# For bipartite network, Distributing request among 10 users
users = 5
req_user = []
for i in range(int(len(req1)/users)):
  temp = []
  for j in range(i*users,(i+1)*users):
    temp.append(req1[j])
  req_user.append(temp)

In [11]:
req_user = np.array(np.transpose(req_user))
req_user.shape

(5, 401000)

In [0]:
del req, req1, temp, data, r

In [13]:
count = np.zeros((users,id), dtype = 'float')
for i in range(users):
  for j in range(1000):
    count[i,int(req_user[i][j])] += 1
count.shape

(5, 50)

In [0]:
prob = np.zeros((users,len(req_user[0])-1000,id), dtype = 'f4')

In [0]:
for i in range(users):
  for j in range(len(req_user[0])-1000):
    count[i,int(req_user[i,j+1000])] += 1
    count[i,int(req_user[i,j])] -= 1
    prob[i,j,:] = count[i,:]/1000

In [16]:
prob.shape

(5, 400000, 50)

In [0]:
del count

In [18]:
batch_size = 15000
encoder_input_data = prob[:,:300000,:].reshape(20*users,batch_size,id) # total samples per users = 20
encoder_input_data.shape

(100, 15000, 50)

In [19]:
decoder_target_data = []
k = 10 # Next sequence to be predicted
for i in range(1,21):
    temp = []
    for j in range(k): # to predict next 10 req for every batch
        temp.append(prob[:,(batch_size*i) + j,:])
    decoder_target_data.append(temp)    

decoder_target_data = np.array(decoder_target_data).reshape(20*users,k,id)
decoder_target_data.shape

(100, 10, 50)

In [20]:
decoder_input_data = np.zeros(decoder_target_data.shape)
decoder_input_data[1:,:,:] = decoder_target_data[:-1,:,:]
decoder_input_data[0,:,:] = encoder_input_data[-1,-1,:]
decoder_input_data.shape

(100, 10, 50)

MODEL


In [0]:
latent_dim = 128 # LSTM hidden units
dropout = 0.1

In [0]:
encoder_inputs = Input(shape=(None, 50)) 
encoder = LSTM(latent_dim, dropout=dropout, return_state=True)
encoder_outputs, state_h, state_c = encoder(encoder_inputs)
encoder_states = [state_h, state_c]

In [0]:
decoder_inputs = Input(shape=(None, 50)) 
decoder_lstm = LSTM(latent_dim, dropout=dropout, return_sequences=True, return_state=True)
decoder_outputs, _, _ = decoder_lstm(decoder_inputs,
                                     initial_state=encoder_states)

In [0]:
decoder_outputs = Dense(50, activation='softmax')(decoder_outputs)

In [25]:
model = Model([encoder_inputs, decoder_inputs], decoder_outputs)
model.summary()

Model: "model_1"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            (None, None, 50)     0                                            
__________________________________________________________________________________________________
input_2 (InputLayer)            (None, None, 50)     0                                            
__________________________________________________________________________________________________
lstm_1 (LSTM)                   [(None, 128), (None, 91648       input_1[0][0]                    
__________________________________________________________________________________________________
lstm_2 (LSTM)                   [(None, None, 128),  91648       input_2[0][0]                    
                                                                 lstm_1[0][1]               

In [26]:
model.compile(Adam(), loss='mean_squared_error')
history = model.fit([encoder_input_data, decoder_input_data], decoder_target_data,batch_size=batch_size,epochs=30)

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


In [0]:
encoder_model = Model(encoder_inputs, encoder_states)
pred_steps = 10

decoder_state_input_h = Input(shape=(latent_dim,))
decoder_state_input_c = Input(shape=(latent_dim,))
decoder_states_inputs = [decoder_state_input_h, decoder_state_input_c]

decoder_outputs, state_h, state_c = decoder_lstm(decoder_inputs, initial_state=decoder_states_inputs)
decoder_states = [state_h, state_c]

decoder_outputs = Dense(50, activation='softmax')(decoder_outputs)
decoder_model = Model([decoder_inputs] + decoder_states_inputs,
                      [decoder_outputs] + decoder_states)

In [0]:
def decode_sequence(input_seq):
    
    # Encode the input as state vectors.
    states_value = encoder_model.predict(input_seq)

    # Generate empty target sequence of length 1.
    target_seq = np.zeros((1, 1, 50))
    
    # Populate the first target sequence with end of encoding series pageviews
    target_seq[0, 0, :] = input_seq[0, -1, :]

    # Sampling loop for a batch of sequences - we will fill decoded_seq with predictions
    # (to simplify, here we assume a batch of size 1).

    decoded_seq = np.zeros((1,pred_steps,50))
    for i in range(pred_steps):
        
        output, h, c = decoder_model.predict([target_seq] + states_value)
        
        decoded_seq[0,i,:] = output[0,0,:]

        # Update the target sequence (of length 1).
        target_seq = np.zeros((1, 1, 50))
        target_seq[0, 0, :] = output[0,0,:]

        # Update states
        states_value = [h, c]

    return decoded_seq

TESTING

In [29]:
prob.shape

(5, 400000, 50)

In [30]:
(400000-10000)/1000

390.0

In [31]:
p = decode_sequence(prob[1,:15000,:].reshape(1,15000,50))
p.shape

(1, 10, 50)

In [0]:
prediction = []
for i in range(users):
  print(i)
  temp = []
  for j in range(1000):
    temp.append(decode_sequence(prob[i,i*390:i*390 + 15000,:].reshape(1,15000,50)))
  prediction.append(temp)

0


In [0]:
prediction = np.array(prediction).reshape(users,1000,k,id)
prediction.shape