#  Resources Used

- https://www.youtube.com/watch?v=lV09_8432VA - Optimizing with TensorBoard - Deep Learning w/ Python, TensorFlow & Keras p.5

# Imports

In [1]:
# -------------- Modelling Packages --------------
# For modeling
from keras.models import Model
from keras.layers import Concatenate, Input, Dense
from keras.layers.recurrent import LSTM

# Callback Functions
from keras.callbacks import TensorBoard, ModelCheckpoint

# For Timestamping Models
import time

# -------------- General Packages --------------
# Data Manipulation
import pandas as pd
import numpy as np

# For Saving Files
import pickle
import os

Using TensorFlow backend.


# Loading The Dataset

In [2]:
# Given the split dataset directory, return the train/test split
def load_dataset(split_data_dir):
    pickle_in = open(split_data_dir+'X_train.pickle','rb')
    X_train = pickle.load(pickle_in)
    
    pickle_in = open(split_data_dir+'X_test.pickle','rb')
    X_test = pickle.load(pickle_in)

    pickle_in = open(split_data_dir+'y_train.pickle','rb')
    y_train = pickle.load(pickle_in)

    pickle_in = open(split_data_dir+'y_test.pickle','rb')
    y_test = pickle.load(pickle_in)
    
    return X_train,X_test,y_train,y_test
    
split_data_dir = './split_data/'
X_train,X_test,y_train,y_test = load_dataset(split_data_dir)

# Parameters

In [3]:
# Tokenizer Values
SENTENCE_SIZE = 20

# Layer Size Parameters
LSTM_SIZE = 500
DENSE_SIZE = 20

# Name/Directory of the Model
MODEL_DIR = './models/'
NAME = 'FN-{}S-{}LSTM-{}D-{}'.format(SENTENCE_SIZE,LSTM_SIZE,DENSE_SIZE,time.time())
EXTENSION = '.hdf5'

# Directory of the tensorboard logs
log_dir = os.path.join("logs",NAME)

# Compile Parameters
activation = 'softmax'
optimizer = 'RMSProp'
loss = 'sparse_categorical_crossentropy'
metrics = ['accuracy']

# Callbacks
# access tensorboard from the command line: tensorboard --logdir logs/
tensorboard = TensorBoard(log_dir=log_dir) 
checkpointer = ModelCheckpoint(MODEL_DIR+NAME+EXTENSION, 
                               monitor='val_accuracy', 
                               verbose=1, 
                               save_best_only=True, 
                               mode='auto')
callbacks=[tensorboard,checkpointer]

# Fitting Parameters
epochs = 100
batch_size = 128

# Model Structure

In [4]:
# FIRST MODEL: TITLE1_EN
first_input = Input((SENTENCE_SIZE,1))
first_LSTM = LSTM(LSTM_SIZE)(first_input)
first_dense = Dense(DENSE_SIZE)(first_LSTM)

# SECOND MODEL: TITLE2_EN
second_input = Input((SENTENCE_SIZE,1))
second_LSTM = LSTM(LSTM_SIZE)(second_input)
second_dense = Dense(DENSE_SIZE)(second_LSTM)

# MERGE MODEL
merged = Concatenate(axis=1)([first_dense, second_dense])
merged_dense = Dense(DENSE_SIZE)(merged)
output_layer = Dense(3, activation='softmax')(merged_dense)

model = Model(inputs=[first_input, second_input], outputs=output_layer)
model.compile(optimizer=optimizer, loss=loss,metrics=metrics)

# Training

In [5]:
os.makedirs(os.path.dirname(MODEL_DIR), exist_ok=True)

# Training the model
model.fit([X_train[:,:SENTENCE_SIZE], X_train[:,SENTENCE_SIZE:]], y_train,
          epochs=epochs,
          batch_size=batch_size,
          validation_data=([X_test[:,:SENTENCE_SIZE], X_test[:,SENTENCE_SIZE:]], y_test),
          callbacks=callbacks)

Train on 14667 samples, validate on 5052 samples
Epoch 1/100
  256/14667 [..............................] - ETA: 1:34 - loss: 3.1722 - accuracy: 0.3594




Epoch 00001: val_accuracy improved from -inf to 0.37332, saving model to ./models/FN-20S-500LSTM-20D-1572523243.466474.hdf5
Epoch 2/100

Epoch 00002: val_accuracy improved from 0.37332 to 0.45705, saving model to ./models/FN-20S-500LSTM-20D-1572523243.466474.hdf5
Epoch 3/100

Epoch 00003: val_accuracy did not improve from 0.45705
Epoch 4/100

Epoch 00004: val_accuracy improved from 0.45705 to 0.45863, saving model to ./models/FN-20S-500LSTM-20D-1572523243.466474.hdf5
Epoch 5/100

Epoch 00005: val_accuracy improved from 0.45863 to 0.47842, saving model to ./models/FN-20S-500LSTM-20D-1572523243.466474.hdf5
Epoch 6/100

Epoch 00006: val_accuracy did not improve from 0.47842
Epoch 7/100

Epoch 00007: val_accuracy improved from 0.47842 to 0.47981, saving model to ./models/FN-20S-500LSTM-20D-1572523243.466474.hdf5
Epoch 8/100

Epoch 00008: val_accuracy improved from 0.47981 to 0.49406, saving model to ./models/FN-20S-500LSTM-20D-1572523243.466474.hdf5
Epoch 9/100

Epoch 00009: val_accuracy 

KeyboardInterrupt: 

# Predicting

In [None]:
#prediction = model.predict([X.iloc[0][:n].reshape(1,20,1),X[.loc[0][:n].reshape(1,20,1)])