In [1]:
import pandas as pd
import numpy as np
import warnings

import plotly.express as px
from plotly.subplots import make_subplots

# Config notebook
warnings.filterwarnings('ignore')

# create path
path = './data_files/'
path_models = './best_models/'

# Result DistilBert Classifier

In [2]:
distilbertclass = pd.read_csv('./data_files/train_log_distil.csv')

In [3]:
distilbertclass.head(3)

Unnamed: 0,epoch,AUC,categorical_accuracy,loss,precision,recall,val_AUC,val_categorical_accuracy,val_loss,val_precision,val_recall
0,0,0.743094,0.259849,1.884916,0.0,0.0,0.754699,0.285714,1.855672,0.0,0.0
1,1,0.747976,0.264878,1.873322,0.25,0.000419,0.744017,0.266917,1.881466,0.0,0.0
2,2,0.792658,0.369656,1.748215,0.4375,0.044007,0.80657,0.390977,1.686006,0.447205,0.270677


In [4]:
def assign_trial(row, prev_epoch=[-1], trial=[1]):
    if row['epoch'] < prev_epoch[0]:
        trial[0] += 1
    prev_epoch[0] = row['epoch']
    return trial[0]

In [5]:
distilbertclass.insert(0, 'trail', distilbertclass.apply(assign_trial , axis=1))

In [6]:
distilbertclass.tail(3)

Unnamed: 0,trail,epoch,AUC,categorical_accuracy,loss,precision,recall,val_AUC,val_categorical_accuracy,val_loss,val_precision,val_recall
112,11,9,0.981786,0.818525,0.530384,0.84897,0.777452,0.916599,0.556391,1.288521,0.585366,0.541353
113,11,10,0.985118,0.836966,0.479688,0.868242,0.800922,0.916463,0.597744,1.302394,0.62449,0.575188
114,11,11,0.9872,0.84451,0.438982,0.88078,0.814334,0.915034,0.586466,1.326323,0.609053,0.556391


In [7]:
fig1 = px.line(distilbertclass, x="epoch", y="val_AUC", color='trail', color_discrete_sequence=px.colors.qualitative.Dark24)
fig1.update_layout(title={'text': 'DistilBert Classifier - Validation AUC', 'y': 0.95, 'x': 0.5, 'xanchor': 'center', 'yanchor': 'top'}, showlegend=False)
fig1.update_layout(xaxis=dict(range=[0, 11.1]))
# fig1.show()

fig2 = px.line(distilbertclass, x="epoch", y="val_loss", color='trail', color_discrete_sequence=px.colors.qualitative.Dark24)
fig2.update_layout(title={'text': 'DistilBert Classifier - Validation Loss', 'y': 0.95, 'x': 0.5, 'xanchor': 'center', 'yanchor': 'top'}, showlegend=False)
fig2.update_layout(xaxis=dict(range=[0, 11.1]))
# fig2.show()

fig3 = px.line(distilbertclass, x="epoch", y="val_categorical_accuracy", color='trail', color_discrete_sequence=px.colors.qualitative.Dark24)
fig3.update_layout(title={'text': 'DistilBert Classifier - Validation Categorical Accuracy', 'y': 0.95, 'x': 0.5, 'xanchor': 'center', 'yanchor': 'top'}, showlegend=False)
fig3.update_layout(xaxis=dict(range=[0, 11.1]))
# fig3.show()

fig4 = px.line(distilbertclass, x="epoch", y="val_precision", color='trail', color_discrete_sequence=px.colors.qualitative.Dark24)
fig4.update_layout(title={'text': 'DistilBert Classifier - Validation Precision', 'y': 0.95, 'x': 0.5, 'xanchor': 'center', 'yanchor': 'top'}, showlegend=False)
fig4.update_layout(xaxis=dict(range=[0, 11.1]));
# fig4.show()

In [8]:
# Create subplots 2x2
fig = make_subplots(rows=2, cols=2, subplot_titles=('Validation AUC', 'Validation Loss', 'Validation Categorical Accuracy', 'Validation Precision'))

# Add multiple lines in structure
for trace in fig1.data:
    trace.showlegend = False
    fig.add_trace(trace, row=1, col=1)
for trace in fig2.data:
    trace.showlegend = False
    fig.add_trace(trace, row=1, col=2)
for trace in fig3.data:
    trace.showlegend = False
    fig.add_trace(trace, row=2, col=1)
for trace in fig4.data:
    trace.showlegend = False
    fig.add_trace(trace, row=2, col=2)

# Layout
fig.update_layout(height=800, width=1400)

# legend graph
legend_fig = px.line(distilbertclass, x="epoch", y="val_AUC", color='trail', color_discrete_sequence=px.colors.qualitative.Dark24)
for trace in legend_fig.data:
    trace.showlegend = True  
    fig.add_trace(trace, row=2, col=2)

fig.show()

# Result LSTM with PCA

In [9]:
pca = pd.read_csv('./data_files/train_log_LSTM_PCA.csv')

In [10]:
pca.head(3)

Unnamed: 0,epoch,AUC,categorical_accuracy,loss,precision,recall,val_AUC,val_categorical_accuracy,val_loss,val_precision,val_recall
0,0,0.547331,0.067045,2.999618,0.0,0.0,0.555771,0.001825,2.193382,0.0,0.0
1,1,0.557438,0.069108,2.957476,0.0,0.0,0.555761,0.001825,2.186373,0.0,0.0
2,2,0.555375,0.079938,2.950368,0.0,0.0,0.556332,0.001825,2.178847,0.0,0.0


In [11]:
def assign_trial(row, prev_epoch=[-1], trial=[1]):
    if row['epoch'] < prev_epoch[0]:
        trial[0] += 1
    prev_epoch[0] = row['epoch']
    return trial[0]

In [12]:
pca.insert(0, 'trail', pca.apply(assign_trial, axis=1))

In [13]:
pca.head(3)

Unnamed: 0,trail,epoch,AUC,categorical_accuracy,loss,precision,recall,val_AUC,val_categorical_accuracy,val_loss,val_precision,val_recall
0,1,0,0.547331,0.067045,2.999618,0.0,0.0,0.555771,0.001825,2.193382,0.0,0.0
1,1,1,0.557438,0.069108,2.957476,0.0,0.0,0.555761,0.001825,2.186373,0.0,0.0
2,1,2,0.555375,0.079938,2.950368,0.0,0.0,0.556332,0.001825,2.178847,0.0,0.0


In [14]:
fig1 = px.line(pca, x="epoch", y="val_AUC", color='trail', color_discrete_sequence=px.colors.qualitative.Dark24)
fig1.update_layout(title={'text': 'LSTM PCA Classifier - Validation AUC', 'y': 0.95, 'x': 0.5, 'xanchor': 'center', 'yanchor': 'top'}, showlegend=False)
fig1.update_layout(xaxis=dict(range=[0, 11.1]))
# fig1.show()

fig2 = px.line(pca, x="epoch", y="val_loss", color='trail', color_discrete_sequence=px.colors.qualitative.Dark24)
fig2.update_layout(title={'text': 'LSTM PCA Classifier - Validation Loss', 'y': 0.95, 'x': 0.5, 'xanchor': 'center', 'yanchor': 'top'}, showlegend=False)
fig2.update_layout(xaxis=dict(range=[0, 11.1]))
# fig2.show()

fig3 = px.line(pca, x="epoch", y="val_categorical_accuracy", color='trail', color_discrete_sequence=px.colors.qualitative.Dark24)
fig3.update_layout(title={'text': 'LSTM PCA Classifier - Validation Categorical Accuracy', 'y': 0.95, 'x': 0.5, 'xanchor': 'center', 'yanchor': 'top'}, showlegend=False)
fig3.update_layout(xaxis=dict(range=[0, 11.1]))
# fig3.show()

fig4 = px.line(pca, x="epoch", y="val_precision", color='trail', color_discrete_sequence=px.colors.qualitative.Dark24)
fig4.update_layout(title={'text': 'LSTM PCA Classifier - Validation Precision', 'y': 0.95, 'x': 0.5, 'xanchor': 'center', 'yanchor': 'top'}, showlegend=False)
fig4.update_layout(xaxis=dict(range=[0, 11.1]));
# fig4.show()

In [15]:
# Create subplots 2x2
fig = make_subplots(rows=2, cols=2, subplot_titles=('Validation AUC', 'Validation Loss', 'Validation Categorical Accuracy', 'Validation Precision'))

# Add multiple lines in structure
for trace in fig1.data:
    trace.showlegend = False
    fig.add_trace(trace, row=1, col=1)
for trace in fig2.data:
    trace.showlegend = False
    fig.add_trace(trace, row=1, col=2)
for trace in fig3.data:
    trace.showlegend = False
    fig.add_trace(trace, row=2, col=1)
for trace in fig4.data:
    trace.showlegend = False
    fig.add_trace(trace, row=2, col=2)

# Layout
fig.update_layout(height=800, width=1400)
fig.update_layout(
    title={'text': 'LSTM PCA Classifier Metrics', 'y': 0.95, 'x': 0.5, 'xanchor': 'center', 'yanchor': 'top'}, 
    showlegend = True, 
    legend=dict(x=1.05, y=1, traceorder='normal') # position legend
    )
fig.update_layout(title_font_size= 30)

# legend graph
legend_fig = px.line(distilbertclass, x="epoch", y="val_AUC", color='trail', color_discrete_sequence=px.colors.qualitative.Dark24)
for trace in legend_fig.data:
    trace.showlegend = True  
    fig.add_trace(trace, row=2, col=2)

fig.show()



# Result LSTM

In [16]:
lstm = pd.read_csv('./data_files/train_log_LSTM.csv')

In [17]:
lstm.head(3)

Unnamed: 0,epoch,AUC,categorical_accuracy,loss,precision,recall,val_AUC,val_categorical_accuracy,val_loss,val_precision,val_recall
0,0,0.341903,0.025271,3.546433,0.0,0.0,0.44113,0.144161,2.44596,0.0,0.0
1,1,0.484617,0.129448,2.787602,0.1875,0.001547,0.511219,0.14781,2.335315,0.0,0.0
2,2,0.56202,0.13409,2.606878,0.114286,0.010315,0.589872,0.14781,2.280632,0.0,0.0


In [18]:
def assign_trial(row, prev_epoch=[-1], trial=[1]):
    if row['epoch'] < prev_epoch[0]:
        trial[0] += 1
    prev_epoch[0] = row['epoch']
    return trial[0]

In [19]:
lstm.insert(0, 'trail', lstm.apply(assign_trial, axis=1))

In [20]:
lstm.head(3)

Unnamed: 0,trail,epoch,AUC,categorical_accuracy,loss,precision,recall,val_AUC,val_categorical_accuracy,val_loss,val_precision,val_recall
0,1,0,0.341903,0.025271,3.546433,0.0,0.0,0.44113,0.144161,2.44596,0.0,0.0
1,1,1,0.484617,0.129448,2.787602,0.1875,0.001547,0.511219,0.14781,2.335315,0.0,0.0
2,1,2,0.56202,0.13409,2.606878,0.114286,0.010315,0.589872,0.14781,2.280632,0.0,0.0


In [21]:
fig1 = px.line(lstm, x="epoch", y="val_AUC", color='trail', color_discrete_sequence=px.colors.qualitative.Dark24)
fig1.update_layout(title={'text': 'LSTM Classifier - Validation AUC', 'y': 0.95, 'x': 0.5, 'xanchor': 'center', 'yanchor': 'top'}, showlegend=False)
fig1.update_layout(xaxis=dict(range=[0, 11.1]))
# fig1.show()

fig2 = px.line(lstm, x="epoch", y="val_loss", color='trail', color_discrete_sequence=px.colors.qualitative.Dark24)
fig2.update_layout(title={'text': 'LSTM Classifier - Validation Loss', 'y': 0.95, 'x': 0.5, 'xanchor': 'center', 'yanchor': 'top'}, showlegend=False)
fig2.update_layout(xaxis=dict(range=[0, 11.1]))
# fig2.show()

fig3 = px.line(lstm, x="epoch", y="val_categorical_accuracy", color='trail', color_discrete_sequence=px.colors.qualitative.Dark24)
fig3.update_layout(title={'text': 'LSTM Classifier - Validation Categorical Accuracy', 'y': 0.95, 'x': 0.5, 'xanchor': 'center', 'yanchor': 'top'}, showlegend=False)
fig3.update_layout(xaxis=dict(range=[0, 11.1]))
# fig3.show()

fig4 = px.line(lstm, x="epoch", y="val_precision", color='trail', color_discrete_sequence=px.colors.qualitative.Dark24)
fig4.update_layout(title={'text': 'LSTM Classifier - Validation Precision', 'y': 0.95, 'x': 0.5, 'xanchor': 'center', 'yanchor': 'top'}, showlegend=False)
fig4.update_layout(xaxis=dict(range=[0, 11.1]));
# fig4.show()

In [22]:
# Create subplots 2x2
fig = make_subplots(rows=2, cols=2, subplot_titles=('Validation AUC', 'Validation Loss', 'Validation Categorical Accuracy', 'Validation Precision'))

# Add multiple lines in structure
for trace in fig1.data:
    trace.showlegend = False
    fig.add_trace(trace, row=1, col=1)
for trace in fig2.data:
    trace.showlegend = False
    fig.add_trace(trace, row=1, col=2)
for trace in fig3.data:
    trace.showlegend = False
    fig.add_trace(trace, row=2, col=1)
for trace in fig4.data:
    trace.showlegend = False
    fig.add_trace(trace, row=2, col=2)

# Layout
fig.update_layout(height=800, width=1400)
fig.update_layout(
    title={'text': 'LSTM Classifier Metrics', 'y': 0.95, 'x': 0.5, 'xanchor': 'center', 'yanchor': 'top'}, 
    showlegend = True, 
    legend=dict(x=1.05, y=1, traceorder='normal') # position legend
    )
fig.update_layout(title_font_size= 30)

# legend graph
legend_fig = px.line(distilbertclass, x="epoch", y="val_AUC", color='trail', color_discrete_sequence=px.colors.qualitative.Dark24)
for trace in legend_fig.data:
    trace.showlegend = True  
    fig.add_trace(trace, row=2, col=2)

fig.show()



In [23]:
print(10 * '.' + ' Best AUC Models ' + 10 * '.' + '\n') 
print(f'Validation AUC - Distil:\n{distilbertclass.iloc[distilbertclass.val_AUC.argmax(), :]}\n{"-"*37}\n'
      f'Validation AUC - LSTM-PCA:\n{pca.iloc[pca.val_AUC.argmax(), :]}\n{"-"*37}\n'
      f'Validation AUC - LSTM:\n{lstm.iloc[lstm.val_AUC.argmax(), :]}')

.......... Best AUC Models ..........

Validation AUC - Distil:
trail                       5.000000
epoch                       5.000000
AUC                         0.958004
categorical_accuracy        0.722967
loss                        0.816160
precision                   0.778739
recall                      0.641660
val_AUC                     0.935818
val_categorical_accuracy    0.642857
val_loss                    1.015937
val_precision               0.690583
val_recall                  0.578947
Name: 45, dtype: float64
-------------------------------------
Validation AUC - LSTM-PCA:
trail                       47.000000
epoch                       24.000000
AUC                          0.821653
categorical_accuracy         0.385250
loss                         1.703779
precision                    0.521875
recall                       0.172254
val_AUC                      0.770173
val_categorical_accuracy     0.286496
val_loss                     1.940974
val_precision         