# Final Multi Model Building

In [1]:
# Remove TF logging warnings
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'

In [2]:
import pandas as pd
import tensorflow as tf
import keras
from keras import layers
from model_helper_functions import create_fit_and_save_model
from data_pipeline import prep_data, get_train_val_test_data
from itertools import product
import re

# Import correct optimizer
import platform
if platform.system() == "Darwin" and platform.processor() == "arm":
    from keras.optimizers.legacy import RMSprop, Adam
else:
    from keras.optimizers import RMSprop, Adam



In [3]:
# Remove TF logging warnings
tf.get_logger().setLevel('ERROR')

In [4]:
tf.random.set_seed(15)
keras.utils.set_random_seed(15)

In [5]:
data_dir = '../data'
df = prep_data(data_dir)
train_df, val_df, test_df = get_train_val_test_data(df, use_half_data=False)

Training set: 70%, Validation set: 22.5%, Test set: 7.5%


---

## Hyperparameter Grid

In [6]:
vocab_sizes = [2000, 4000, 6000, 8000]
embed_sizes_with_pretrained_model = [
    [200,'glove-twitter-200'],
    [300,'word2vec-google-news-300'],
    [300,'glove-wiki-gigaword-300'],
    [300,'fasttext-wiki-news-subwords-300']
]
batch_sizes = [32]
bidirectional_options = [True]
rnn_layers = [layers.GRU]
rnn_units_configs = [
    [8],
]
dense_units_configs = [
    [1],
]
activations = ['sigmoid']
final_dropouts = [0.5]
optimizers = [Adam]

In [7]:
all_combinations = list(product(vocab_sizes, embed_sizes_with_pretrained_model, batch_sizes, bidirectional_options, rnn_layers,
                                rnn_units_configs, dense_units_configs, activations, final_dropouts, optimizers))
print(f'There are {len(all_combinations)} parameter combinations to run.')

There are 16 parameter combinations to run.


In [8]:
# Keep track of model fitting in order to resume at a later time if needed.
progress_file = './model_checkpoints_final/model_building_progress.csv'
if os.path.exists(progress_file):
    progress = pd.read_csv(progress_file)
else:
    progress = pd.DataFrame.from_records(all_combinations, columns=['vocab_size', 'embed_size_with_pretrained_model', 
                                                                    'batch_size', 'bidirectional', 'rnn_layer', 'rnn_units', 
                                                                    'dense_units', 'activation', 'final_dropout', 'optimizer'])
    progress.insert(0, 'model', range(len(all_combinations)))
    progress['finished'] = False
    class_names = '(GRU|LSTM|Adam|RMSprop)'
    progress['rnn_layer'] = progress['rnn_layer'].apply(lambda x: re.sub(f'.*{class_names}.*','\\1',str(x)))
    progress['optimizer'] = progress['optimizer'].apply(lambda x: re.sub(f'.*{class_names}.*','\\1',str(x)))
    
    progress.to_csv(progress_file, index=False)
    progress = pd.read_csv(progress_file)

---

## Fit Models

In [9]:
EPOCHS = 15

In [10]:
for i, params in enumerate(all_combinations):
    if progress.at[i, 'finished']:
        print(f'Model {i} has already been fitted.')
    else:
        try:
            f1_score = create_fit_and_save_model(f'model{i}',train_df, val_df, test_df, EPOCHS, params, final_fitting=True)
            progress.at[i, 'finished'] = True
            progress.to_csv(progress_file, index=False)
            print(f'Model {i} finished with test f1_score of {f1_score:.4f}')
        except:
            print(f'ERROR fitting model {i}')

Model 0 finished with test f1_score of 0.7478


Model 1 finished with test f1_score of 0.7797


Model 2 finished with test f1_score of 0.7756


Model 3 finished with test f1_score of 0.7733


Model 4 finished with test f1_score of 0.7555


Model 5 finished with test f1_score of 0.8133


Model 6 finished with test f1_score of 0.7783


Model 7 finished with test f1_score of 0.7703


Model 8 finished with test f1_score of 0.7556


Model 9 finished with test f1_score of 0.7860


Model 10 finished with test f1_score of 0.7802


Model 11 finished with test f1_score of 0.7820


Model 12 finished with test f1_score of 0.7467


Model 13 finished with test f1_score of 0.7965


Model 14 finished with test f1_score of 0.7759


Model 15 finished with test f1_score of 0.7720


---