In [1]:
# Run this cell to mount your Google Drive.
from google.colab import drive
drive.mount('/content/drive')

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3Aietf%3Awg%3Aoauth%3A2.0%3Aoob&scope=email%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdocs.test%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive.photos.readonly%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fpeopleapi.readonly&response_type=code

Enter your authorization code:
··········
Mounted at /content/drive


### Load pre-processed data for training

In [2]:
import pickle
import numpy as np
from keras.models import Model
from keras.layers import Input, Embedding, CuDNNLSTM, Dense, TimeDistributed, LSTM

Using TensorFlow backend.


In [0]:
# loading X and Y
with open('/content/drive/My Drive/Colab Notebooks/chatbot_seq2seq/data/x_and_y.pkl', 'rb') as f:
    X_encoder, X_decoder, y = pickle.load(f)

In [0]:
# loading vocab_embeddings
with open('/content/drive/My Drive/Colab Notebooks/chatbot_seq2seq/data/embedding_weights.pkl', 'rb') as f:
    embedding_weights = pickle.load(f)

#### Now the structure of our model will be like this:
##### 1. Encoder LSTM will take the input (Embedded) and after processing, it will pass on the cell state to the decoder
##### 2. Decoder LSTM will take initial state from cell state of encoder and inputs will be the Expected output just 1 word behind so basically the decoder lstm will predict the next word in the output sequence
##### 3. A dense layer i.e a regular Feedforward NN will then predict the words occuring in the expected sentence in one-hot encoded form i.e the labels will be "1" where the word from the vocabulary is present in the given sentence and the predicted output will be in softmax probability form so basically prediction by -ve log loss

### Building the Seq2Seq Model

In [0]:
max_len = 20
vocab_size = 15000
embedding_dim = 300
hidden_dim = 300
# obtained in vocab_embedding
number_of_samples = 221616
# train : val = 93.75 %
number_of_train_samples = 221616
number_of_val_samples = 0

In [0]:
epochs = 50
batch_size = 1026

In [7]:
# Embedding Layer
embedding_layer = Embedding(
    input_dim=vocab_size, 
    output_dim=embedding_dim,
    input_length=max_len,
    weights=[embedding_weights],
    trainable=False
)

W0627 04:40:46.456438 140022985574272 deprecation_wrapper.py:119] From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:74: The name tf.get_default_graph is deprecated. Please use tf.compat.v1.get_default_graph instead.



In [8]:
# Encoder
encoder_inputs = Input(batch_shape=(batch_size, max_len,), dtype='int32')
encoder_embedding = embedding_layer(encoder_inputs)
encoder_LSTM = CuDNNLSTM(hidden_dim, return_state=True, stateful=True)
encoder_outputs, state_h, state_c = encoder_LSTM(encoder_embedding)

W0627 04:40:46.513562 140022985574272 deprecation_wrapper.py:119] From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:517: The name tf.placeholder is deprecated. Please use tf.compat.v1.placeholder instead.

W0627 04:40:46.527290 140022985574272 deprecation_wrapper.py:119] From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:4138: The name tf.random_uniform is deprecated. Please use tf.random.uniform instead.

W0627 04:40:46.538114 140022985574272 deprecation_wrapper.py:119] From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:174: The name tf.get_default_session is deprecated. Please use tf.compat.v1.get_default_session instead.

W0627 04:40:46.538940 140022985574272 deprecation_wrapper.py:119] From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:181: The name tf.ConfigProto is deprecated. Please use tf.compat.v1.ConfigProto instead.



In [0]:
# Decoder
decoder_inputs = Input(shape=(max_len,), dtype='int32')
decoder_embedding = embedding_layer(decoder_inputs)
decoder_LSTM = CuDNNLSTM(hidden_dim, return_state=True, return_sequences=True)
decoder_outputs, _, _ = decoder_LSTM(decoder_embedding, initial_state=[state_h, state_c])

In [0]:
# Output
outputs = TimeDistributed(Dense(vocab_size, activation='softmax'))(decoder_outputs)
model = Model([encoder_inputs, decoder_inputs], outputs)

In [11]:
model.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_2 (InputLayer)            (None, 20)           0                                            
__________________________________________________________________________________________________
input_1 (InputLayer)            (1026, 20)           0                                            
__________________________________________________________________________________________________
embedding_1 (Embedding)         multiple             4500000     input_1[0][0]                    
                                                                 input_2[0][0]                    
__________________________________________________________________________________________________
cu_dnnlstm_1 (CuDNNLSTM)        [(1026, 300), (1026, 722400      embedding_1[0][0]                
__________

In [12]:
model.compile(optimizer='sgd', loss ='categorical_crossentropy', metrics = ['accuracy'])

W0627 04:40:53.202728 140022985574272 deprecation_wrapper.py:119] From /usr/local/lib/python3.6/dist-packages/keras/optimizers.py:790: The name tf.train.Optimizer is deprecated. Please use tf.compat.v1.train.Optimizer instead.



### Training the Model

#### Preparing train and val generator for training in batches

##### Making Y in one-hot encoded form first
##### output is (number of sequences, max_len, vocab_size) i.e for each sentence at each position in maxlen what is the one hot encoding of the word present?

In [0]:
def train_generator(batch_size):
    number_of_batches = int(number_of_train_samples / batch_size)
    Y = np.zeros(shape=(batch_size, max_len, vocab_size), dtype="float32")
    while True:
        for count in range(number_of_batches):
            for i, sequences in enumerate(y[count * batch_size: count * batch_size + batch_size]):
                for j, sequence in enumerate(sequences):
                    Y[i][j][sequence] = 1
            yield ([
                X_encoder[count * batch_size: count * batch_size + batch_size],
                X_decoder[count * batch_size: count * batch_size + batch_size]], 
                Y
            )
            Y.fill(0)

In [0]:
def validation_generator(val_size):
    if val_size == 0:
        return None
    validation_index = number_of_samples - val_size
    Y = np.zeros(shape=(val_size, max_len, vocab_size), dtype="float32")
    for i, sequences in enumerate(y[validation_index:]):
        for j, sequence in enumerate(sequences):
            Y[i][j][sequence] = 1
            
    return ([X_encoder[validation_index:], X_decoder[validation_index:]], Y)

In [0]:
train_gen = train_generator(batch_size)   # we have 513 batches of 405 samples (513 x 405 => 207765 + 13851 => 221616)
val_gen = validation_generator(number_of_val_samples)

# history is used for plotting
history = model.fit_generator(
    generator=train_gen,
    steps_per_epoch=int(number_of_train_samples/batch_size),
    epochs=epochs,
    validation_data=val_gen,
    shuffle=False
)

W0627 04:40:53.364301 140022985574272 deprecation.py:323] From /usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/math_grad.py:1250: add_dispatch_support.<locals>.wrapper (from tensorflow.python.ops.array_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where


Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
 41/216 [====>.........................] - ETA: 2:51 - loss: 2.2088 - acc: 0.7750

### Save Model

In [0]:
# Save model Architecture

# save as JSON
json_string = model.to_json()
open('model_architecture.json', 'w').write(json_string)

In [0]:
## Save the whole model

model.save('model.h5')

In [0]:
# Save model weights

model.save_weights('model_weights.h5')

In [0]:
# model.predict()