In [9]:
import pickle
import pandas as pd
import numpy as np
from importlib import reload
from helpers import constants; reload(constants)
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.optimizers import Adam
from helpers.helper_functions import LossAndErrorPrintingCallback
from tensorflow.keras.callbacks import CSVLogger
from datetime import datetime
import os
from tensorboard.plugins.hparams import api as hp

In [10]:
select_label = constants.SELECT_LABEL
intermediate_path = constants.ITM_DATA_DIR
model_data_path = constants.PRCD_DATA_DIR
sample_data_path = constants.SAMPLE_DATA_DIR
max_len = constants.MAX_SEQUENCE_LENGTH # max number of words in a post to use
max_word_no = constants.MAX_NUM_WORDS # how many unique words to use (i.e num rows in embedding vector)

In [11]:
#Load data
data_train, labels_train, data_test, labels_test = pickle.load(open(model_data_path,'rb'))
data_train_sample, labels_train_sample, data_test_sample, labels_test_sample = pickle.load(open(sample_data_path,'rb'))

In [12]:
X_train, X_test, y_train, y_test = data_train_sample, data_test_sample, labels_train_sample, labels_test_sample

In [13]:
# Initialize parameters and hyper-parameters
weight_vec = list(np.max(np.sum(y_train, axis=0))/np.sum(y_train, axis=0))
class_weight = {i: weight_vec[i] for i in range(y_train.shape[1])}
no_epoch = 20

In [5]:
log_dir = os.path.join('..','log_gender_self_nn')
hparam_dir=os.path.join('..','log_gender_self_nn','hparam_tuning')

In [6]:
HP_LR = hp.HParam('learning_rate', hp.Discrete([0.0001, 0.001, 0.01])
HP_NUM_UNITS = hp.HParam('num_units', hp.Discrete([16, 32]))
HP_DROPOUT = hp.HParam('dropout', hp.Discrete([0.1, 0.2, 0.3])
HP_BATCH = hp.HParam('batch_size', hp.Discrete([128, 256, 512]))

METRIC_ACCURACY = 'accuracy'

with tf.summary.create_file_writer(hparam_dir).as_default():
    hp.hparams_config(
        hparams=[HP_LR, HP_NUM_UNITS, HP_DROPOUT, HP_BATCH],
        metrics=[hp.Metric(METRIC_ACCURACY, display_name='Accuracy')],
      )

In [7]:
def train_test_model(hparams, log_dir):
    model = tf.keras.models.Sequential([
        tf.keras.layers.Embedding(input_dim = max_word_no,output_dim = 64, input_length=max_len,trainable=True),
        tf.keras.layers.GlobalAveragePooling1D(),
        tf.keras.layers.Dense(64, activation=tf.nn.relu),
        tf.keras.layers.Dropout(hparams[HP_DROPOUT]),
        tf.keras.layers.Dense(hparams[HP_NUM_UNITS], activation=tf.nn.relu),
        tf.keras.layers.Dropout(hparams[HP_DROPOUT]),
        tf.keras.layers.Dense(y_train.shape[1], activation=tf.nn.softmax)])
    
    adam = Adam(lr=hparams[HP_LR], beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0)
    
    model.compile(optimizer=adam, loss='binary_crossentropy', metrics=['accuracy'])
    
    model.fit(X_train, y_train, validation_split=0.1, epochs=no_epoch, batch_size=hparams[HP_BATCH],
             shuffle=True, class_weight=class_weight, verbose=1,
              callbacks = [tf.keras.callbacks.TensorBoard(log_dir, histogram_freq=1)]
             )
    _, accuracy = model.evaluate(X_test, y_test)
    return accuracy

In [8]:
session_num = 0

for num_units in HP_NUM_UNITS.domain.values:
    for dropout_rate in HP_DROPOUT.domain.values:
        for learning_rate in HP_LR.domain.values:
            for batch_size in HP_BATCH.domain.values:
                hparams = {
                    HP_NUM_UNITS: num_units,
                    HP_DROPOUT: dropout_rate,
                    HP_LR: learning_rate,
                    HP_BATCH: batch_size
                }

                run_name = "run-%d" % session_num
                print('--- Starting trial: %s' % run_name)
                print({h.name: hparams[h] for h in hparams})
                logdir = hparam_dir + '\\nu_' + str(hparams[HP_NUM_UNITS]) + '_dp_' + str(hparams[HP_DROPOUT]) + '_lr_' + str(hparams[HP_LR]) + '_bz_' + str(hparams[HP_BATCH])
                with tf.summary.create_file_writer(logdir + '\\metrics').as_default():
                    hp.hparams(hparams) # record the values used in this trial
                    accuracy = train_test_model(hparams, logdir)
                    tf.summary.scalar(METRIC_ACCURACY, accuracy, step=1)
                session_num += 1

--- Starting trial: run-0
{'num_units': 16, 'dropout': 0.1, 'learning_rate': 0.0001, 'batch_size': 128}
Train on 40480 samples, validate on 4498 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


--- Starting trial: run-1
{'num_units': 16, 'dropout': 0.1, 'learning_rate': 0.0001, 'batch_size': 256}
Train on 40480 samples, validate on 4498 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


--- Starting trial: run-2
{'num_units': 16, 'dropout': 0.1, 'learning_rate': 0.0001, 'batch_size': 512}
Train on 40480 samples, validate on 4498 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


--- Starting trial: run-3
{'num_units': 16, 'dropout': 0.1, 'learning_rate': 0.01, 'batch_size': 128}
Train on 40480 samples, validate on 4498 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


--- Starting trial: run-4
{'num_units': 16, 'dropout': 0.1, 'learning_rate': 0.01, 'batch_size': 256}
Train on 40480 samples, validate on 4498 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


--- Starting trial: run-5
{'num_units': 16, 'dropout': 0.1, 'learning_rate': 0.01, 'batch_size': 512}
Train on 40480 samples, validate on 4498 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


--- Starting trial: run-6
{'num_units': 16, 'dropout': 0.2, 'learning_rate': 0.0001, 'batch_size': 128}
Train on 40480 samples, validate on 4498 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


--- Starting trial: run-7
{'num_units': 16, 'dropout': 0.2, 'learning_rate': 0.0001, 'batch_size': 256}
Train on 40480 samples, validate on 4498 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


--- Starting trial: run-8
{'num_units': 16, 'dropout': 0.2, 'learning_rate': 0.0001, 'batch_size': 512}
Train on 40480 samples, validate on 4498 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


--- Starting trial: run-9
{'num_units': 16, 'dropout': 0.2, 'learning_rate': 0.01, 'batch_size': 128}
Train on 40480 samples, validate on 4498 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


--- Starting trial: run-10
{'num_units': 16, 'dropout': 0.2, 'learning_rate': 0.01, 'batch_size': 256}
Train on 40480 samples, validate on 4498 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


--- Starting trial: run-11
{'num_units': 16, 'dropout': 0.2, 'learning_rate': 0.01, 'batch_size': 512}
Train on 40480 samples, validate on 4498 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


--- Starting trial: run-12
{'num_units': 32, 'dropout': 0.1, 'learning_rate': 0.0001, 'batch_size': 128}
Train on 40480 samples, validate on 4498 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


--- Starting trial: run-13
{'num_units': 32, 'dropout': 0.1, 'learning_rate': 0.0001, 'batch_size': 256}
Train on 40480 samples, validate on 4498 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


--- Starting trial: run-14
{'num_units': 32, 'dropout': 0.1, 'learning_rate': 0.0001, 'batch_size': 512}
Train on 40480 samples, validate on 4498 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


--- Starting trial: run-15
{'num_units': 32, 'dropout': 0.1, 'learning_rate': 0.01, 'batch_size': 128}
Train on 40480 samples, validate on 4498 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


--- Starting trial: run-16
{'num_units': 32, 'dropout': 0.1, 'learning_rate': 0.01, 'batch_size': 256}
Train on 40480 samples, validate on 4498 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


--- Starting trial: run-17
{'num_units': 32, 'dropout': 0.1, 'learning_rate': 0.01, 'batch_size': 512}
Train on 40480 samples, validate on 4498 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


--- Starting trial: run-18
{'num_units': 32, 'dropout': 0.2, 'learning_rate': 0.0001, 'batch_size': 128}
Train on 40480 samples, validate on 4498 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


--- Starting trial: run-19
{'num_units': 32, 'dropout': 0.2, 'learning_rate': 0.0001, 'batch_size': 256}
Train on 40480 samples, validate on 4498 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


--- Starting trial: run-20
{'num_units': 32, 'dropout': 0.2, 'learning_rate': 0.0001, 'batch_size': 512}
Train on 40480 samples, validate on 4498 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


--- Starting trial: run-21
{'num_units': 32, 'dropout': 0.2, 'learning_rate': 0.01, 'batch_size': 128}
Train on 40480 samples, validate on 4498 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


--- Starting trial: run-22
{'num_units': 32, 'dropout': 0.2, 'learning_rate': 0.01, 'batch_size': 256}
Train on 40480 samples, validate on 4498 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


--- Starting trial: run-23
{'num_units': 32, 'dropout': 0.2, 'learning_rate': 0.01, 'batch_size': 512}
Train on 40480 samples, validate on 4498 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
