## Loading the libraries

In [1]:
import pandas as pd
import numpy as np
from keras import Sequential
from keras.layers import Dense, Embedding, LSTM
from keras.utils import to_categorical
from keras.models import model_from_json
import urllib
import pickle
import math
import zipfile
import os

Using TensorFlow backend.


## Loading Glove Embedding Dictionaries

In [2]:
if not os.path.exists("embedding.pickle"):
    
    urllib.request.urlretrieve("https://nlp.stanford.edu/data/glove.6B.zip",
                             "glove.6B.zip")
    
    with zipfile.ZipFile("glove.6B.zip","r") as zip_ref:
        zip_ref.extractall("glove")

In [3]:
if not os.path.exists("embedding.pickle"):
    embeddings_index = dict()
    with open('./glove/glove.6B.100d.txt',encoding="utf8") as file:
        for line in file:
            values = line.split(" ")
            word = values[0]
            coefs = np.asarray(values[1:], dtype='float32')
            embeddings_index[word] = coefs            
    print('Loaded %s word vectors.' % len(embeddings_index))

## Reading the Tokenizers

In [4]:
with open("tokenizer.pickle" , 'rb') as file:
    tokenizer = pickle.load(file)
vocab_size = len(tokenizer.word_index) +1

## Creating the embedding matrix

In [5]:
if(not os.path.exists("embedding.pickle")):
    # create a weight matrix for words in training docs
    embedding_matrix = np.zeros((vocab_size, 100))
    for word, i in tokenizer.word_index.items():
        embedding_vector = embeddings_index.get(word)
        if embedding_vector is not None:
            embedding_matrix[i] = embedding_vector
  
    with open("embedding.pickle",'wb') as file:
        pickle.dump(embedding_matrix,file)

else:
    with open("embedding.pickle",'rb') as file:
        embedding_matrix = pickle.load(file)

## Filtering the data

In [6]:
if not os.path.exists("x_y_filtered.csv"):
    data = pd.read_csv("x_y_data.csv")
    ys = data.groupby('y')['x1'].count().reset_index()
    ys = ys[ys.x1 >10]
    data = data[data.y.isin(ys.y)]
    y_vocab = data.y.max() + 1
    data.to_csv("x_y_filtered.csv",header=False,index=False)
    with open("x_y_filtered_meta.pickle",'wb') as file:
        pickle.dump((y_vocab,total_size),file)
    del ys
    del data    

In [7]:
with open("x_y_filtered_meta.pickle",'rb') as file:
    y_vocab,total_size = pickle.load(file)

## Creating the model

In [8]:
def load_model(file_path):
  
    json_file = open(file_path + ".json", 'r')
    loaded_model_json = json_file.read()
    json_file.close()

    loaded_model = model_from_json(loaded_model_json)
    loaded_model.load_weights(file_path + ".h5")

    print(file_path + " loaded.")

    return loaded_model

In [9]:
import os.path
file_path = "Keras_Model/"+"model_"
for i in range(100):
    if(not os.path.exists(file_path+ str(10*(i+1))+"_epochs"+".json")):
        break

if(i==0):
    model = Sequential()
    model.add(Embedding(vocab_size, 100, weights=[embedding_matrix], input_length=4, trainable=False))
    model.add(LSTM(1024,return_sequences=True))
    model.add(LSTM(1024,return_sequences=True))
    model.add(LSTM(1024,return_sequences=True))
    model.add(LSTM(1024))
    model.add(Dense(1024, activation='relu'))
    model.add(Dense(512, activation='relu'))
    model.add(Dense(256, activation='relu'))
    model.add(Dense(128, activation='relu'))
    model.add(Dense(64, activation='relu'))
    model.add(Dense(y_vocab, activation='softmax'))
    print("Model created.")
else:
    file_path = file_path + str(10*i) + "_epochs"
    model = load_model(file_path)
    
print(model.summary())  
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy']) 

Model created.
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_1 (Embedding)      (None, 4, 100)            63645600  
_________________________________________________________________
lstm_1 (LSTM)                (None, 4, 1024)           4608000   
_________________________________________________________________
lstm_2 (LSTM)                (None, 4, 1024)           8392704   
_________________________________________________________________
lstm_3 (LSTM)                (None, 4, 1024)           8392704   
_________________________________________________________________
lstm_4 (LSTM)                (None, 1024)              8392704   
_________________________________________________________________
dense_1 (Dense)              (None, 1024)              1049600   
_________________________________________________________________
dense_2 (Dense)              (None, 512)               524800

## Create generators to fit

In [10]:
def generator(batch_size = 512):
    start = 0
    end = (start + batch_size) if (start +batch_size <= total_size) else total_size
    
    while True:
        d = pd.read_csv("x_y_filtered.csv",header=None,skiprows=start,nrows=(end-start))
        X = d.iloc[:,:-1].values
        y = to_categorical(d.iloc[:,-1].values,num_classes=y_vocab)
        
        
        start = end if end < total_size else 0
        end = (start + batch_size) if (start +batch_size <= total_size) else total_size
            
        yield X,y

## Training the model

In [11]:
def save_model(model,model_name):
    model_json = model.to_json()
    with open("Keras_Model/"+model_name+".json", "w") as json_file:
        json_file.write(model_json)
    model.save_weights("Keras_Model/"+model_name+".h5")
    print("Keras_Model/"+model_name + " saved to disk.")

In [None]:
model_saved = i
for i in range(model_saved,model_saved +10):
    batch_size = 128
    steps = 1000
    model.fit_generator(generator(batch_size),steps,10)
    save_model(model,"model_"+str(10*(i+1))+"_epochs")

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Keras_Model/model_10_epochs saved to disk.
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Keras_Model/model_20_epochs saved to disk.
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Keras_Model/model_30_epochs saved to disk.
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Keras_Model/model_40_epochs saved to disk.
Epoch 1/10
Epoch 2/10