In [151]:
import pickle
import pandas as pd
import numpy as np
from importlib import reload
from helpers import constants; reload(constants)
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.optimizers import Adam
from helpers.helper_functions import LossAndErrorPrintingCallback
from tensorflow.keras.callbacks import CSVLogger
import datetime
import os

In [152]:
select_label = constants.SELECT_LABEL
intermediate_path = constants.ITM_DATA_DIR
model_data_path = constants.PRCD_DATA_DIR
sample_data_path = constants.SAMPLE_DATA_DIR
max_len = constants.MAX_SEQUENCE_LENGTH # max number of words in a post to use
max_word_no = constants.MAX_NUM_WORDS # how many unique words to use (i.e num rows in embedding vector)

In [121]:
#Load data
data_train, labels_train, data_test, labels_test = pickle.load(open(model_data_path,'rb'))
data_train_sample, labels_train_sample, data_test_sample, labels_test_sample = pickle.load(open(sample_data_path,'rb'))

In [171]:
# Initialize parameters and hyper-parameters
weight_vec = list(np.max(np.sum(labels_train, axis=0))/np.sum(labels_train, axis=0))
class_weight = {i: weight_vec[i] for i in range(labels_train.shape[1])}

learning_rate = 0.01
no_epoch = 20
dropout_rate = 0.2
batch_size = 512

In [172]:
model = keras.Sequential()
model.add(keras.layers.Embedding(input_dim = max_word_no,output_dim = 64, input_length=max_len))
model.add(keras.layers.GlobalAveragePooling1D())
model.add(keras.layers.Dense(64, activation='relu'))
model.add(keras.layers.Dropout(dropout_rate))
model.add(keras.layers.Dense(16, activation='relu'))
model.add(keras.layers.Dropout(dropout_rate))
model.add(keras.layers.Dense(2, activation='softmax'))

In [173]:
adam = Adam(lr=learning_rate, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0)
model.compile(optimizer=adam, loss='binary_crossentropy', metrics=['accuracy'])

In [174]:
#Callback function
# %load_ext tensorboard
log_dir = constants.LOG_DIR
current_time = datetime.datetime.now().strftime('%m%d%H%M%S')
csvlogger = CSVLogger(filename=log_dir+'/' + current_time + '_csvlogger_lr'+str(learning_rate)+'_e'+str(no_epoch)+'_do'+str(dropout_rate)+'.csv')

In [175]:
# Estimate model
model_history = model.fit(data_train_sample, labels_train_sample, 
                          validation_split=0.1, 
                          epochs=no_epoch, 
                          batch_size=batch_size, 
                          shuffle=True, 
                          class_weight=class_weight, 
                          callbacks = [csvlogger],
                          verbose=1)

Train on 20236 samples, validate on 2249 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [176]:
results = model.evaluate(data_test_sample, labels_test_sample, batch_size=128, verbose=1)



In [177]:
for name, value in zip(model.metrics_names, results):
  print("%s: %.3f" % (name, value))

loss: 1.968
accuracy: 0.627


In [178]:
%load_ext tensorboard

ModuleNotFoundError: No module named 'tensorboard.notebook'