In [0]:
from random import randint
from numpy import array
import numpy as np
from numpy import random
from keras.models import Model
from scipy.optimize import linprog
from keras.layers import Input
from keras.layers import LSTM
from keras.layers import Dense
import matplotlib.pyplot as plt
from keras.optimizers import Adam

%matplotlib inline

DATA

In [0]:
data = []
with open('/content/drive/My Drive/request_seq.txt' , errors='ignore' ) as fin:
    fin.seek(0)
    d = fin.read(100000000) 

In [48]:
#Data is continous sequence.Each segment in the sequence separated by '\n'
#Each segment contains 3 items: number of request, request ID and size of requested file.
d[:100]

'1 778030675 13539\n2 3484637698 17635\n3 1130267045 152\n4 3423744231 616\n5 203613471 121\n6 1342928892 '

In [49]:
#Spliting the continous sequence
data = d.split()
data[:10]

['1',
 '778030675',
 '13539',
 '2',
 '3484637698',
 '17635',
 '3',
 '1130267045',
 '152',
 '4']

In [50]:
del d
len(data)

12722261

In [51]:
#We are interested in number of request and request id.
req = []
for i in range(len(data)):
  r = []
  if (i-1)%3 == 0 and len(req)<2005000:
    r.append(data[i])
    #To convert string into real numbers. 
    temp = 0
    for j in range(len(r[0])):
      temp = temp + int(r[0][j])*pow(10,len(r[0])-j-1)
    req.append(temp)

req = np.array(req)
req.shape

(2005000,)

In [52]:
total_id = len(np.unique(req))
print(total_id)

763307


In [53]:
#To find the range of request id
min = np.min(req)
max = np.max(req)
min,max

(2301, 4294966263)

In [0]:
# To reduce total numbers of ID
id = 100
factor = (max - min)/(id-1)
req1 = []
for i in range(len(req)):
  req1 .append(int(np.divide(req[i],factor)))

In [0]:
# For bipartite network, Distributing request among 10 users
users = 5
req_user = []
for i in range(int(len(req1)/users)):
  temp = []
  for j in range(i*users,(i+1)*users):
    temp.append(req1[j])
  req_user.append(temp)

In [56]:
req_user = np.array(np.transpose(req_user))
req_user.shape

(5, 401000)

In [0]:
del req, req1, temp, data, r

In [58]:
count = np.zeros((users,id), dtype = 'float')
for i in range(users):
  for j in range(1000):
    count[i,int(req_user[i][j])] += 1
count.shape

(5, 100)

In [0]:
prob = np.zeros((users,len(req_user[0])-1000,id), dtype = 'f4')

In [0]:
for i in range(users):
  for j in range(len(req_user[0])-1000):
    count[i,int(req_user[i,j+1000])] += 1
    count[i,int(req_user[i,j])] -= 1
    prob[i,j,:] = count[i,:]/1000

In [0]:
del count

In [62]:
batch_size = 15000
encoder_input_data = prob[:,:300000,:].reshape(20*users,batch_size,id) # total samples per users = 20
encoder_input_data.shape

(100, 15000, 100)

In [63]:
decoder_target_data = []
K = 10 # Next sequence to be predicted
for i in range(1,21):
    temp = []
    for j in range(K): # to predict next 10 req for every batch
        temp.append(prob[:,(batch_size*i) + j,:])
    decoder_target_data.append(temp)    

decoder_target_data = np.array(decoder_target_data).reshape(20*users,K,id)
decoder_target_data.shape

(100, 10, 100)

In [64]:
decoder_input_data = np.zeros(decoder_target_data.shape)
decoder_input_data[1:,:,:] = decoder_target_data[:-1,:,:]
decoder_input_data[0,:,:] = encoder_input_data[-1,-1,:]
decoder_input_data.shape

(100, 10, 100)

MODEL

In [0]:
latent_dim = 128 # LSTM hidden units
dropout = 0.1

In [66]:
encoder_inputs = Input(shape=(None, id)) 
encoder = LSTM(latent_dim, dropout=dropout, return_state=True)
encoder_outputs, state_h, state_c = encoder(encoder_inputs)
encoder_states = [state_h, state_c]

decoder_inputs = Input(shape=(None, id)) 
decoder_lstm = LSTM(latent_dim, dropout=dropout, return_sequences=True, return_state=True)
decoder_outputs, _, _ = decoder_lstm(decoder_inputs,
                                     initial_state=encoder_states)

decoder_outputs = Dense(id, activation='softmax')(decoder_outputs)

model = Model([encoder_inputs, decoder_inputs], decoder_outputs)
model.summary()

Model: "model_4"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_5 (InputLayer)            (None, None, 100)    0                                            
__________________________________________________________________________________________________
input_6 (InputLayer)            (None, None, 100)    0                                            
__________________________________________________________________________________________________
lstm_3 (LSTM)                   [(None, 128), (None, 117248      input_5[0][0]                    
__________________________________________________________________________________________________
lstm_4 (LSTM)                   [(None, None, 128),  117248      input_6[0][0]                    
                                                                 lstm_3[0][1]               

In [67]:
model.compile(Adam(), loss='mean_squared_error')
history = model.fit([encoder_input_data, decoder_input_data], decoder_target_data,batch_size=batch_size,epochs=10)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


FUTURE REQUEST PREDICTION

In [0]:
encoder_model = Model(encoder_inputs, encoder_states)
pred_steps = K

decoder_state_input_h = Input(shape=(latent_dim,))
decoder_state_input_c = Input(shape=(latent_dim,))
decoder_states_inputs = [decoder_state_input_h, decoder_state_input_c]

decoder_outputs, state_h, state_c = decoder_lstm(decoder_inputs, initial_state=decoder_states_inputs)
decoder_states = [state_h, state_c]

decoder_outputs = Dense(id, activation='softmax')(decoder_outputs)
decoder_model = Model([decoder_inputs] + decoder_states_inputs,
                      [decoder_outputs] + decoder_states)

In [0]:
def decode_sequence(input_seq):
    
    # Encode the input as state vectors.
    states_value = encoder_model.predict(input_seq)

    # Generate empty target sequence of length 1.
    target_seq = np.zeros((1, 1, id))
    
    # Populate the first target sequence with end of encoding series pageviews
    target_seq[0, 0, :] = input_seq[0, -1, :]

    # Sampling loop for a batch of sequences - we will fill decoded_seq with predictions
    # (to simplify, here we assume a batch of size 1).

    decoded_seq = np.zeros((1,pred_steps,id))
    for i in range(pred_steps):
        
        output, h, c = decoder_model.predict([target_seq] + states_value)
        
        decoded_seq[0,i,:] = output[0,0,:]

        # Update the target sequence (of length 1).
        target_seq = np.zeros((1, 1, id))
        target_seq[0, 0, :] = output[0,0,:]

        # Update states
        states_value = [h, c]

    return decoded_seq

In [70]:
n = int((len(prob[0]) - batch_size)/1000)
n

385

In [71]:
prediction = []
for i in range(users):
  print(i)
  temp = []
  for j in range(1000):
    temp.append(decode_sequence(prob[i,int(j*n) : int(j*n + batch_size),:].reshape(1,batch_size,id)))
  prediction.append(temp)

0
1
2
3
4


In [72]:
prediction = np.array(prediction).reshape(users,1000,K,id)
prediction.shape

(5, 1000, 10, 100)

GENERATION OF FAKE REQUESTS

In [73]:
fake_req = []
for i in range(users):
  t = []
  for j in range(1000):
    temp = []
    for k in range(10):
      for l in range(id):
        if prediction[i,j,k,l] > sorted(prediction[i,j,k],reverse = 1)[5]:
          temp.append(l)
    while len(temp) < 50:
      temp.append(0)
    t.append(temp)
  fake_req.append(t)
fake_req = np.array(fake_req) 
fake_req.shape

(5, 1000, 50)

In [0]:
generated_req = []
for l in range(users):
  request = []
  for i in range(batch_size):
    request.append(req_user[l,i])
  for i in range(50):
    request.append(fake_req[l,0,i])
  for i in range(1000):
    for j in range(n + 50):
      if j < n:
        request.append(req_user[l,10000 + j + n*i])
      elif(i>0):
        request.append(fake_req[l,i,j-n])
  generated_req.append(request)

In [75]:
generated_req = np.array(generated_req)
generated_req.shape

(5, 450000)

In [76]:
req_user.shape

(5, 401000)

In [0]:
np.save('req',req_user)
np.save('gen_req',generated_req)