<a href="https://colab.research.google.com/github/sumitdua10/Seq-Networks/blob/master/Seq_to_Seq_Teacher_Forcing.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
from google.colab import files

uploaded = files.upload()

for fn in uploaded.keys():
  print('User uploaded file "{name}" with length {length} bytes'.format(
      name=fn, length=len(uploaded[fn])))

Saving book2.txt to book2.txt
User uploaded file "book2.txt" with length 20298 bytes


In [2]:
import pandas as pd
import numpy as np

df = pd.read_csv(fn, delimiter = '\t')
print(df.shape)
print(df[:5])


(797, 4)
           X  Date  Month  Year
0  18/9/1970    18      9  1970
1  15/7/1968    15      7  1968
2  23/5/1963    23      5  1963
3  27/7/2007    27      7  2007
4  16/6/1995    16      6  1995


In [3]:
from keras.models import Sequential, Model
from keras.layers import Dense, Activation, Dropout, LSTM, GRU, Input, Embedding
import random as rn
import tensorflow as tf

#Code to get reproducible results
np.random.seed(42)
rn.seed(12345)

# Force TensorFlow to use single thread.
# Multiple threads are a potential source of non-reproducible results.
# For further details, see: https://stackoverflow.com/questions/42022950/

session_conf = tf.ConfigProto(intra_op_parallelism_threads=1,
                              inter_op_parallelism_threads=1)

from keras import backend as K
tf.set_random_seed(1234)

sess = tf.Session(graph=tf.get_default_graph(), config=session_conf)
K.set_session(sess)


Using TensorFlow backend.


In [4]:
data = open(fn, 'r').read()
data= data.lower()
chars = list(set(data))
#print(data[1:50])

data_size, VOCAB_LEN = len(data), len(chars)
print('There are %d total characters and %d unique characters(vocab length)in your data.' % (data_size, VOCAB_LEN))

#2. Global variables
MAX_INPUT_SEQ_LEN = len(max(df['X']))
OUTPUT_SIZE = 4 # one for year, one for month and one for date
RNN_SIZE = 96
m = df.shape[0]

print("Max Length ", MAX_INPUT_SEQ_LEN)

#3. Create a dictionary & vocab size
char_to_ix = { ch:i for i,ch in enumerate(sorted(chars)) }
ix_to_char = { i:ch for i,ch in enumerate(sorted(chars)) }
print(ix_to_char)
print(char_to_ix)


There are 19501 total characters and 40 unique characters(vocab length)in your data.
Max Length  29
{0: '\t', 1: '\n', 2: ' ', 3: ',', 4: '-', 5: '/', 6: '0', 7: '1', 8: '2', 9: '3', 10: '4', 11: '5', 12: '6', 13: '7', 14: '8', 15: '9', 16: ':', 17: 'a', 18: 'b', 19: 'c', 20: 'd', 21: 'e', 22: 'f', 23: 'g', 24: 'h', 25: 'i', 26: 'j', 27: 'l', 28: 'm', 29: 'n', 30: 'o', 31: 'p', 32: 'r', 33: 's', 34: 't', 35: 'u', 36: 'v', 37: 'w', 38: 'x', 39: 'y'}
{'\t': 0, '\n': 1, ' ': 2, ',': 3, '-': 4, '/': 5, '0': 6, '1': 7, '2': 8, '3': 9, '4': 10, '5': 11, '6': 12, '7': 13, '8': 14, '9': 15, ':': 16, 'a': 17, 'b': 18, 'c': 19, 'd': 20, 'e': 21, 'f': 22, 'g': 23, 'h': 24, 'i': 25, 'j': 26, 'l': 27, 'm': 28, 'n': 29, 'o': 30, 'p': 31, 'r': 32, 's': 33, 't': 34, 'u': 35, 'v': 36, 'w': 37, 'x': 38, 'y': 39}


In [0]:


#5. Create on hot encoding of size x = m X SEQ_L X VOCAB_LEN and y = m X OUTPUT_SIZE * VOCAB_LEN

x = np.zeros(shape = (m, MAX_INPUT_SEQ_LEN, VOCAB_LEN))
y = np.zeros(shape=(m,OUTPUT_SIZE, VOCAB_LEN))

#decoder input. This will be used as Teacher enforcing during training. 
# It will be y shifted by one character so that y[0] goes as input to 2nd decoder cell and so on. First input to decoder cell will be 0
# and output h,c from encoder LSTM will be used as initial states for first cell of decoder model.

de_x  = np.zeros(shape=(m,OUTPUT_SIZE, VOCAB_LEN))

df['X'] = df['X'].str.lower()
for i in range(m):
    for j,k in enumerate(df['X'][i]):
      try:
        #print(k)
        x[i][j][char_to_ix[k]] = 1
      except:
        print("i {} j {} k {}".format( i, j, k))
        
    for a,b in enumerate(str(df['Year'][i])):
      try:
        
        y[i][a][char_to_ix[b]] = 1
        
        
        if a<OUTPUT_SIZE-1:
          de_x[i][a+1][char_to_ix[b]] = 1
        
      except:
        print("i {} a {} b {}".format( i, a, b))
   

In [0]:
#Define the layers and global variables


activation_size=96
activation_funct = 'tanh'
# a layer instance is callable on a tensor, and returns a tensor
def seq_layer(ret_seq,  inputs):
  return LSTM(activation_size,  activation=activation_funct, return_state=True, return_sequences = ret_seq)(inputs)



In [116]:
from keras import optimizers

en_inputs = Input(shape=(MAX_INPUT_SEQ_LEN, VOCAB_LEN))

#encoder model will return only final state. so final tensor will represent the input text which will be further decoded.
o,h,c = seq_layer(ret_seq=False, inputs = en_inputs)

en_states = [h,c]

#decoder inputs. it should have the seq length of output tensor.
de_inputs = Input(shape = (OUTPUT_SIZE, VOCAB_LEN))

#decoder LSTM will have return seq True and no. of cells matching with the output length.
o = LSTM(activation_size,return_sequences = True)(inputs =de_inputs, initial_state = [h,c])

print(o)

#final dense layer of softmax to predict the character among available characters of size VOCAB_LEN
o = Dense(VOCAB_LEN, activation='softmax')(o)
print(o)


model = Model(inputs=[en_inputs, de_inputs], outputs=o)

print(model.summary())

sgd = optimizers.Adam(lr=0.01)
model.compile(optimizer=sgd,
              loss='categorical_crossentropy',
              metrics=['accuracy'])


Tensor("lstm_20/transpose_1:0", shape=(?, ?, 96), dtype=float32)
Tensor("dense_15/truediv:0", shape=(?, 4, 40), dtype=float32)
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_52 (InputLayer)           (None, 29, 40)       0                                            
__________________________________________________________________________________________________
input_53 (InputLayer)           (None, 4, 40)        0                                            
__________________________________________________________________________________________________
lstm_19 (LSTM)                  [(None, 96), (None,  52608       input_52[0][0]                   
__________________________________________________________________________________________________
lstm_20 (LSTM)                  (None, 4, 96)        52608       input_53[0][0]  

In [122]:

model.fit([x,de_x], y, epochs= 3, validation_split = 0.25)  
#model.fit(x, y, epochs= 30, validation_split = 0.25) 


Train on 597 samples, validate on 200 samples
Epoch 1/3
Epoch 2/3
Epoch 3/3


<keras.callbacks.History at 0x7f50a6931358>

Prediction 

In [130]:
pred_l = ['23-Dec-1955', '24 Dec 1960', 'December 15 2003', 'January of 15 90', '1995 1 Apr', 
          '15/01/2000', '19 May 1999', '01-02-90', '2 October 1993', '25 05 83']

pred_l = [x.lower() for x in pred_l]

#print(pred_l)
pred_m = len(pred_l)
print("TOtal size of prediction list: ", pred_m)

pred_x = np.zeros(shape = (pred_m, MAX_INPUT_SEQ_LEN, VOCAB_LEN))

for i in range(len(pred_l)):
    for j,k in enumerate(pred_l[i]):
      try:
        pred_x[i][j][char_to_ix[k]] = 1
      except:
        print("i {} j {} k {}".format( i, j, k))
        
 

TOtal size of prediction list:  10


Let's define the prediction modelling

Prediction will have two Modes - Encoder model and decoder model.

Encoder model will predict the final tensor representing the input text.
Predicted tensor from Encoder will be used as input for decoder model. 

First cell of decoder will have final states from encoder model and input as zero or <SOS> token. The prediction from first cell is our predicted character. The h,s from this cell will be used input to subsequent cell till desired length or <EOS> token is reached.

In [131]:
#Define the prediction encoder model. This model will have the same weights as of training encoder model
encoder_model = Model(en_inputs, en_states)
print(encoder_model.summary())

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_52 (InputLayer)        (None, 29, 40)            0         
_________________________________________________________________
lstm_19 (LSTM)               [(None, 96), (None, 96),  52608     
Total params: 52,608
Trainable params: 52,608
Non-trainable params: 0
_________________________________________________________________
None


In [132]:
#Tt turns out prediction encoder model's  layer of weights is same as of training model first layer of weights.
#Keras does it automatically as it's using same inputs

w = model.get_weights()
en_w = encoder_model.get_weights()
print("Training Model Weights:")
for i in range(len(w)):
  print(w[i].shape)

print("\nPrediction Model Weights:")
for i in range(len(en_w)):
  print(en_w[i].shape)

print("\nTraining Model Sample Weights from first Tensor:")
print(w[0][0][:4])
  
print("\nPrediction Model Sample Weights from first Tensor:")
print(en_w[0][0][:4])

Training Model Weights:
(40, 384)
(96, 384)
(384,)
(40, 384)
(96, 384)
(384,)
(96, 40)
(40,)

Prediction Model Weights:
(40, 384)
(96, 384)
(384,)

Training Model Sample Weights from first Tensor:
[-0.07492594 -0.0961068   0.06139428  0.03893624]

Prediction Model Sample Weights from first Tensor:
[-0.07492594 -0.0961068   0.06139428  0.03893624]


In [133]:
decoder_inputs = Input(shape=(1,VOCAB_LEN,))
decoder_state_input_h = Input(shape=(activation_size,))
decoder_state_input_c = Input(shape=(activation_size,))
decoder_states_inputs = [decoder_state_input_h, decoder_state_input_c]

decoder_outputs, state_h, state_c = LSTM (activation_size, return_state = True, return_sequences=False)(
    decoder_inputs, initial_state=decoder_states_inputs)

decoder_states = [state_h, state_c]
decoder_outputs = Dense(VOCAB_LEN, activation='softmax')(decoder_outputs)

decoder_model = Model(
    [decoder_inputs, decoder_state_input_h, decoder_state_input_c],# decoder_states_inputs],
    [decoder_outputs, state_h, state_c])#decoder_states])

print(decoder_model.summary())
  


__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_57 (InputLayer)           (None, 1, 40)        0                                            
__________________________________________________________________________________________________
input_58 (InputLayer)           (None, 96)           0                                            
__________________________________________________________________________________________________
input_59 (InputLayer)           (None, 96)           0                                            
__________________________________________________________________________________________________
lstm_22 (LSTM)                  [(None, 96), (None,  52608       input_57[0][0]                   
                                                                 input_58[0][0]                   
          

In [134]:
#Tt turns out prediction decoders model's layers of weights is same as of training model's decoder layer of weights.
#Keras does it automatically as it's using same inputs

w = model.get_weights()
de_w = decoder_model.get_weights()
print("Training Model Weights:")
for i in range(len(w)):
  print("Layer ",i, ": ", w[i].shape)


print("\nPrediction Model Weights:")
for i in range(len(de_w)):
  print("Layer ",i, ": ", de_w[i].shape)

print("\nTraining Model Sample Weights from last Tensor:")
print("Layer last", ": ", w[-1][:4])
  
print("\nPrediction Model Sample Weights from last Tensor:")
print("Layer last", ": ", de_w[-1][:4])
decoder_model.set_weights(w[3:])

print("\nTraining Model Sample Weights from last Tensor:")
print("Layer last", ": ", w[-1][:4])

de_w = decoder_model.get_weights()
print("\nPrediction Model Sample Weights from last Tensor:")
print("Layer last", ": ", de_w[-1][:4])


Training Model Weights:
Layer  0 :  (40, 384)
Layer  1 :  (96, 384)
Layer  2 :  (384,)
Layer  3 :  (40, 384)
Layer  4 :  (96, 384)
Layer  5 :  (384,)
Layer  6 :  (96, 40)
Layer  7 :  (40,)

Prediction Model Weights:
Layer  0 :  (40, 384)
Layer  1 :  (96, 384)
Layer  2 :  (384,)
Layer  3 :  (96, 40)
Layer  4 :  (40,)

Training Model Sample Weights from last Tensor:
Layer last :  [-0.22116935 -0.2755494  -0.22704618 -0.2136446 ]

Prediction Model Sample Weights from last Tensor:
Layer last :  [0. 0. 0. 0.]

Training Model Sample Weights from last Tensor:
Layer last :  [-0.22116935 -0.2755494  -0.22704618 -0.2136446 ]

Prediction Model Sample Weights from last Tensor:
Layer last :  [-0.22116935 -0.2755494  -0.22704618 -0.2136446 ]


In [135]:
#Final state prediction from encoder prediction model

#encoder_model and get final state values
states = encoder_model.predict(pred_x)

#We deinfed output of prediction encoder model as en_states = [h,c] hence we get output as list of h & c
pred_h = states[0]
pred_c = states[1]

pred_de_x  = np.zeros(shape=(OUTPUT_SIZE, pred_m,1, VOCAB_LEN))

for i in range(OUTPUT_SIZE):
  
  de_o,de_h,de_c = decoder_model.predict([pred_de_x[0], pred_h, pred_c])#+states)
  
  #print(de_o.shape)
  
  pred_h = de_h
  pred_c = de_c
  
  if i<OUTPUT_SIZE-1:
    pred_de_x[i+1,:,0,:] = de_o
  
  pos = np.argmax(de_o, axis=1)
  #print(pos)
  out = [ix_to_char[i] for i in pos]
  print(out)


['1', '1', '2', '2', '1', '2', '1', '2', '1', '2']
['9', '9', '0', '0', '9', '0', '9', '0', '9', '0']
['9', '6', '0', '1', '9', '0', '9', '0', '2', '0']
['9', '2', '3', '9', '5', '0', '9', '3', '3', '0']
