# ResNet

- **Training set**: LO_SARD102
- **Test/Validation set:** JTT
- **Features**: word2vec sequence
- **Class imbalance processing**: SMOTE

In [1]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras import applications
from tensorflow.keras.utils import to_categorical
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from helper_functions import *
from sklearn.model_selection import train_test_split
from collections import Counter
from sklearn.metrics import classification_report

X_train = numpy.load('/mnt/md0/user/scheuererra68323/LOSARD_w2vseq_SMOTE_X_full_train.npy', mmap_mode='r')
Y_train = numpy.load('/mnt/md0/user/scheuererra68323/LOSARD_w2vseq_SMOTE_Y_full_train.npy', mmap_mode='r')

X_val = numpy.load('/mnt/md0/user/scheuererra68323/LOSARD_w2vseq_X_full_val.npy', mmap_mode='r')
Y_val = numpy.load('/mnt/md0/user/scheuererra68323/LOSARD_w2vseq_Y_full_val.npy', mmap_mode='r')

X_test =  numpy.load('/mnt/md0/user/scheuererra68323/JTT_w2vseq_X.npy',  mmap_mode='r')
Y_test =  numpy.load('/mnt/md0/user/scheuererra68323/JTT_w2vseq_Y.npy',  mmap_mode='r')


train_gen = ResNetDataGenerator(_X=X_train, _y=Y_train, batch_size=512)
val_gen =   ResNetDataGenerator(_X=X_val,   _y=Y_val,   batch_size=512)
test_gen =  ResNetDataGenerator(_X=X_test,  _y=Y_test,  batch_size=512)

print("train:")
print(Counter(Y_train))
print(X_train.shape)
print(Y_train.shape)

print("\nval:")
print(Counter(Y_val))
print(X_val.shape)
print(Y_val.shape)

print("\ntest:")
print(Counter(Y_test))
print(X_test.shape)
print(Y_test.shape)

train:
Counter({0.0: 127721, 1.0: 127721})
(255442, 100, 100)
(255442,)

val:
Counter({0.0: 31975, 1.0: 1240})
(33215, 100, 100)
(33215,)

test:
Counter({0.0: 21243, 1.0: 608})
(21851, 100, 100)
(21851,)


## Build & train keras model

In [11]:
# https://keras.io/api/applications/resnet/#resnet50-function
model = applications.ResNet50(
    include_top=True,
    weights=None,
    input_tensor=None,
    input_shape=(100, 100, 1),
    pooling=None,
    classes=2
)
sgd = tf.keras.optimizers.SGD(learning_rate=0.1, momentum=0.95)
model.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=['accuracy'])
model.load_weights('testJTT_ResNet_word2vec_SMOTE.h5')

In [None]:
history = model.fit(train_gen,
                    epochs=8,
                    verbose=True,
                    validation_data=val_gen
                   )

## Evaluation

In [13]:
Y_pred = numpy.argmax( model.predict(test_gen) , axis=1 )

print(classification_report(Y_test, Y_pred))
print_metrics(confusion_matrix(Y_test, Y_pred), Y_test)
print()
score = model.evaluate(test_gen, verbose=False)
print('Test loss:\t{:.6f}'.format(score[0]))
print('Test accuracy:\t{:.6f}'.format(score[1]))

#plot_loss(history)
#plot_accuracy(history)

              precision    recall  f1-score   support

         0.0       0.96      0.57      0.72     21243
         1.0       0.01      0.22      0.03       608

    accuracy                           0.56     21851
   macro avg       0.49      0.40      0.37     21851
weighted avg       0.94      0.56      0.70     21851

Confusion matrix:
[[12109  9134]
 [  472   136]]

Confusion matrix (Percentages):
[[55.416 41.801]
 [ 2.16   0.622]]

Metrics:
Sensitivity (TPR): 0.223684
Specifity (TNR):   0.570023

FPR: 0.429977
FNR: 0.776316

Balanced accuracy: 0.396854

Test loss:	0.602773
Test accuracy:	0.560386


In [None]:
model.save('testJTT_ResNet_word2vec_SMOTE.h5')