In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import sys
from pathlib import Path
sys.path.append('..')

import pickle
import json

# Script params

In [3]:
max_len = 100
embedding_dim = 32

# Load train and test data

In [4]:
from keras.utils import pad_sequences

2023-08-13 00:22:59.121867: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [5]:
with open('/Users/lukaszmaczewski/Documents/Learning/DeepNeuralNetworks/keras/data/aclImdb/corpus_train_df.pkl', 'rb') as f:
    train_df = pickle.load(f)
with open('/Users/lukaszmaczewski/Documents/Learning/DeepNeuralNetworks/keras/data/aclImdb/corpus_test_df.pkl', 'rb') as f:
    test_df = pickle.load(f)
with open('/Users/lukaszmaczewski/Documents/Learning/DeepNeuralNetworks/keras/data/aclImdb/token2index.json', 'r') as f:
    token2index = json.load(f)
with open('/Users/lukaszmaczewski/Documents/Learning/DeepNeuralNetworks/keras/data/aclImdb/index2token.json', 'r') as f:
    index2token = json.load(f)

In [6]:
max_index = max(list(token2index.values())) + 1

In [7]:
x_train = pad_sequences(train_df.token_indexes.values, maxlen = max_len)
y_train = train_df.labels.values
x_test = pad_sequences(test_df.token_indexes.values, maxlen = max_len)
y_test = test_df.labels.values

In [8]:
x_train.shape, y_train.shape

((25000, 100), (25000,))

In [9]:
x_test.shape, y_test.shape

((25000, 100), (25000,))

# Training simple dnn with embedding layer

In [10]:
from keras.models import Sequential
from keras.layers import Flatten, Dense, Embedding, BatchNormalization, Dropout
from keras.metrics import AUC, BinaryAccuracy
from keras.optimizers import RMSprop

In [11]:
model = Sequential()
model.add(Embedding(max_index, embedding_dim, input_length = max_len))
model.add(Flatten())
model.add(BatchNormalization())
model.add(Dense(16, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(8, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(1, activation='sigmoid'))

In [12]:
optimizer = RMSprop(learning_rate=0.0001, momentum = 0.3)
model.compile(optimizer=optimizer, loss='binary_crossentropy', metrics=[BinaryAccuracy(), AUC()])

In [13]:
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding (Embedding)       (None, 100, 32)           554944    
                                                                 
 flatten (Flatten)           (None, 3200)              0         
                                                                 
 batch_normalization (BatchN  (None, 3200)             12800     
 ormalization)                                                   
                                                                 
 dense (Dense)               (None, 16)                51216     
                                                                 
 dropout (Dropout)           (None, 16)                0         
                                                                 
 dense_1 (Dense)             (None, 8)                 136       
                                                        

In [14]:
fit_history = model.fit(x_train,
                        y_train,
                        epochs=15,
                        batch_size=64,
                        validation_data=(x_test, y_test)
                       )

Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


In [15]:
from kst.plots.BasePlots import BasePlots, ScatterPlot
import pandas as pd
from plotnine import aes, geom_point, geom_line, ylim, labs

In [16]:
df = pd.DataFrame.from_dict(fit_history.history)
df['epoch'] = df.index.to_list()
df

Unnamed: 0,loss,binary_accuracy,auc,val_loss,val_binary_accuracy,val_auc,epoch
0,0.702004,0.50868,0.513217,0.693041,0.50764,0.513667,0
1,0.669788,0.58416,0.619963,0.667944,0.61284,0.649801,1
2,0.581425,0.70772,0.782291,0.53071,0.75464,0.835776,2
3,0.434117,0.81648,0.890664,0.410948,0.81632,0.897828,3
4,0.337037,0.86592,0.932812,0.381016,0.83356,0.915504,4
5,0.277965,0.89676,0.954614,0.360738,0.84396,0.923166,5
6,0.227456,0.91956,0.969325,0.372622,0.84612,0.925023,6
7,0.186972,0.93492,0.979131,0.389431,0.84692,0.923915,7
8,0.145408,0.95352,0.987384,0.428259,0.84596,0.922279,8
9,0.120045,0.96008,0.991185,0.463891,0.84396,0.918754,9


In [17]:
sc = ScatterPlot(data_set=df,
                 aes = {'x':'epoch', 'y':'loss', 'color':"'training'"},
                 main_title='Binary crossentropy',
                 x_label='Epoch',
                 y_label='Loss',
                 legend_title='',
                 line = geom_line()
              )

sc.add_gg_object(geom_point(df, aes(x = 'epoch', y = 'val_loss', color = "'validation'")))
sc.add_gg_object(geom_line(df, aes(x = 'epoch', y = 'val_loss', color = "'validation'")))

In [None]:
sc.plot

In [None]:
sc = ScatterPlot(data_set=df,
                 aes = {'x':'epoch', 'y':'auc', 'color':"'training'"},
                 main_title='Area under curve',
                 x_label='Epoch',
                 y_label='Binary accuracy',
                 legend_title='',
                 line = geom_line()
              )

sc.add_gg_object(geom_point(df, aes(x = 'epoch', y = 'val_auc', color = "'validation'")))
sc.add_gg_object(geom_line(df, aes(x = 'epoch', y = 'val_auc', color = "'validation'")))

In [None]:
sc.plot + ylim(0,1)

In [None]:
sc = ScatterPlot(data_set=df,
                 aes = {'x':'epoch', 'y':'binary_accuracy', 'color':"'training'"},
                 main_title='Binarry accuracy',
                 x_label='Epoch',
                 y_label='Binary accuracy',
                 legend_title='',
                 line = geom_line()
              )

sc.add_gg_object(geom_point(df, aes(x = 'epoch', y = 'val_binary_accuracy', color = "'validation'")))
sc.add_gg_object(geom_line(df, aes(x = 'epoch', y = 'val_binary_accuracy', color = "'validation'")))

In [None]:
sc.plot + ylim(0,1)