# 1 - Introduction

This notebook train four different models that make use of the pretrained embeddings with 50 dimensions.

## 1.1 Load Packages and Global Variables

In [1]:
%matplotlib inline
import os
import numpy as np
import nltk
from sklearn.externals import joblib

In [2]:
PROJECT_DIR = os.path.join(os.getcwd(), os.pardir)
os.chdir(PROJECT_DIR)

In [3]:
import src.neural_networks as nn

Using TensorFlow backend.


## 1.2- Load the Data

Load the train and validation neural network data sets if they are present, otherwise raise an exception.

In [4]:
#Load sequnces
try:
    train = joblib.load('data/processed/train_nn.pkl')
    valid = joblib.load('data/processed/valid_nn.pkl')
except FileNotFoundError:
    #need to run earlier notebook if files not present
    raise Exception("Files not found. Run Notebook 4.1")

In [9]:
#Load embedding matrix of 50 dimensions
try:
    embedding_matrix50 = joblib.load('data/interim/embeddings50.pkl')
except FileNotFoundError:
    #need to run earlier notebook if files not present
    raise Exception("Files not found. Run Notebook 4.1")

In [10]:
#Load embedding matrix of 300 dimensions
try:
    embedding_matrix300 = joblib.load('data/interim/embeddings300.pkl')
except FileNotFoundError:
    #need to run earlier notebook if files not present
    raise Exception("Files not found. Run Notebook 4.1")

# 2 - Run 50d models

In [16]:
#these arguments will be the same when creating all four 50d models
model_args = {"embedding_matrix":embedding_matrix50,
             "n_hidden":250}

## 2.1 - LSTM + Averaged Final

In [7]:
#LSTM, average final
model = nn.create_model(rnn_type="LSTM",
                     dense_final=False,
                     **model_args)
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
joke_seq (InputLayer)        (None, 300)               0         
_________________________________________________________________
embedding (Embedding)        (None, 300, 50)           1369800   
_________________________________________________________________
mask_paddings (Masking)      (None, 300, 50)           0         
_________________________________________________________________
drop_words (SpatialDropout1D (None, 300, 50)           0         
_________________________________________________________________
masking_1 (Masking)          (None, 300, 50)           0         
_________________________________________________________________
rnn (LSTM)                   (None, 250)               301000    
_________________________________________________________________
avg_pred (GlobalAverage)     (None, 1)                 0         
Total para

In [None]:
#these arguments will be the same when running all four models
run_args = {"model":model,
            "train":(train["seqs"],train["labels"]),
            "valid":(valid["seqs"],valid["labels"])}

In [8]:
nn.run_model(out_path="models/nn_50d_lstm_avg.hdf5",**run_args)

Train on 171945 samples, validate on 24564 samples
Epoch 1/1000
Epoch 2/1000
Epoch 3/1000
Epoch 4/1000

KeyboardInterrupt: 

## 2.2 - GRU + Averaged Final

Second model uses GRU instead of LSTM, but maintains average final layer

In [17]:
#LSTM, average final
model = nn.create_model(rnn_type="GRU",
                     dense_final=False,
                     **model_args)
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
joke_seq (InputLayer)        (None, 300)               0         
_________________________________________________________________
embedding (Embedding)        (None, 300, 50)           1369800   
_________________________________________________________________
mask_paddings (Masking)      (None, 300, 50)           0         
_________________________________________________________________
drop_words (SpatialDropout1D (None, 300, 50)           0         
_________________________________________________________________
masking_4 (Masking)          (None, 300, 50)           0         
_________________________________________________________________
rnn (GRU)                    (None, 250)               225750    
_________________________________________________________________
avg_pred (GlobalAverage)     (None, 1)                 0         
Total para

In [18]:
nn.run_model(out_path="models/nn_50d_gru_avg.hdf5", **run_args)

Train on 171945 samples, validate on 24564 samples
Epoch 1/1000
Epoch 2/1000
Epoch 3/1000
Epoch 4/1000
 34000/171945 [====>.........................] - ETA: 1:36 - loss: 0.6509 - acc: 0.6081

KeyboardInterrupt: 

## 2.3 - LSTM + Dense Final

Third model uses LSTM again, but now has a dense final layer

In [19]:
model = nn.create_model(rnn_type="LSTM",
                     dense_final=True,
                     **model_args)
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
joke_seq (InputLayer)        (None, 300)               0         
_________________________________________________________________
embedding (Embedding)        (None, 300, 50)           1369800   
_________________________________________________________________
mask_paddings (Masking)      (None, 300, 50)           0         
_________________________________________________________________
drop_words (SpatialDropout1D (None, 300, 50)           0         
_________________________________________________________________
masking_5 (Masking)          (None, 300, 50)           0         
_________________________________________________________________
rnn (LSTM)                   (None, 250)               301000    
_________________________________________________________________
dense_drop (Dropout)         (None, 250)               0         
__________

In [20]:
nn.run_model(out_path="models/nn_50d_lstm_dense.hdf5", **run_args)

Train on 171945 samples, validate on 24564 samples
Epoch 1/1000
Epoch 2/1000
Epoch 3/1000

KeyboardInterrupt: 

## 2.4 - GRU + Dense Final

Fourth and final (50 dimensional) model uses GRU again, but now has a densse final layer

In [21]:
model = nn.create_model(rnn_type="GRU",
                     dense_final=True,
                     **model_args)
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
joke_seq (InputLayer)        (None, 300)               0         
_________________________________________________________________
embedding (Embedding)        (None, 300, 50)           1369800   
_________________________________________________________________
mask_paddings (Masking)      (None, 300, 50)           0         
_________________________________________________________________
drop_words (SpatialDropout1D (None, 300, 50)           0         
_________________________________________________________________
masking_6 (Masking)          (None, 300, 50)           0         
_________________________________________________________________
rnn (GRU)                    (None, 250)               225750    
_________________________________________________________________
dense_drop (Dropout)         (None, 250)               0         
__________

In [22]:
nn.run_model(out_path="models/nn_50d_gru_dense.hdf5",**run_args)

Train on 171945 samples, validate on 24564 samples
Epoch 1/1000
Epoch 2/1000
Epoch 3/1000
Epoch 4/1000
Epoch 5/1000

KeyboardInterrupt: 

# 3 - Run 300d models

This section run the neural network models that take the 300 dimensional word embeddings as inputs.

In [23]:
#these arguments will be the same when creating all four 300d models
model_args = {"embedding_matrix":embedding_matrix300,
                "n_hidden":150}

## 3.1 - LSTM + Averaged Final

In [None]:
#LSTM, average final
model = nn.create_model(rnn_type="LSTM",
                     dense_final=False,
                     **model_args)
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
joke_seq (InputLayer)        (None, 300)               0         
_________________________________________________________________
embedding (Embedding)        (None, 300, 300)          8218800   
_________________________________________________________________
mask_paddings (Masking)      (None, 300, 300)          0         
_________________________________________________________________
drop_words (SpatialDropout1D (None, 300, 300)          0         
_________________________________________________________________
masking_7 (Masking)          (None, 300, 300)          0         
_________________________________________________________________
rnn (LSTM)                   (None, 150)               270600    
_________________________________________________________________
avg_pred (GlobalAverage)     (None, 1)                 0         
Total para

In [None]:
nn.run_model(out_path="models/nn_300d_lstm_avg.hdf5",**run_args)

Train on 171945 samples, validate on 24564 samples
Epoch 1/1000
 36000/171945 [=====>........................] - ETA: 7:00 - loss: 0.6549 - acc: 0.6051

## 3.2 - GRU + Averaged Final

In [None]:
#LSTM, average final
model = nn.create_model(rnn_type="GRU",
                     dense_final=False,
                     **model_args)
model.summary()

In [None]:
nn.run_model(out_path="models/nn_300d_gru_avg.hdf5", **run_args)

## 3.3 - LSTM + Dense Final

In [None]:
model = nn.create_model(rnn_type="LSTM",
                     dense_final=True,
                     **model_args)
model.summary()

In [None]:
nn.run_model(out_path="models/nn_300d_lstm_dense.hdf5", **run_args)

## 3.4 - GRU + Dense Final

In [None]:
model = nn.create_model(rnn_type="GRU",
                     dense_final=True,
                     **model_args)
model.summary()

In [None]:
nn.run_model(out_path="models/nn_300d_gru_dense.hdf5",**run_args)