In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras import layers
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping
from tensorflow import keras
# from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import accuracy_score, f1_score, roc_auc_score
import random
import sys
sys.path.append('../')
from modules import utils
import matplotlib.pyplot as plt
%matplotlib inline

The TensorFlow contrib module will not be included in TensorFlow 2.0.
For more information, please see:
  * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md
  * https://github.com/tensorflow/addons
  * https://github.com/tensorflow/io (for I/O related ops)
If you depend on functionality not listed there, please file an issue.



  "stable-baselines is in maintenance mode, please use [Stable-Baselines3 (SB3)](https://github.com/DLR-RM/stable-baselines3) for an up-to-date version. You can find a [migration guide](https://stable-baselines3.readthedocs.io/en/master/guide/migration.html) in SB3 documentation."





In [2]:
SEED = 42
random.seed(SEED)
np.random.seed(SEED)
tf.set_random_seed(SEED)
tf.compat.v1.set_random_seed(SEED)

#### The data

In [3]:
# train_df = pd.read_csv('../data/train_set_basic.csv')
train_df = pd.read_csv('../new_data/train_set_noisiness_0.2.csv')
train_df = train_df.fillna(-1)
train_df.head()

Unnamed: 0,ana,fever,leukopenia,thrombocytopenia,auto_immune_hemolysis,delirium,psychosis,seizure,non_scarring_alopecia,oral_ulcers,...,proteinuria,biopsy_proven_lupus_nephritis,anti_cardioliphin_antibodies,anti_β2gp1_antibodies,lupus_anti_coagulant,low_c3,low_c4,anti_dsdna_antibody,anti_smith_antibody,label
0,1,1,0,0,0,0,0,0,0,0,...,1,0,0,0,0,0,1,0,0,1
1,1,0,1,0,0,0,0,0,1,0,...,0,0,0,1,0,0,0,1,0,1
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,1,0,0,0,0,0,0,0,0,0
4,1,0,0,0,0,1,0,0,1,0,...,1,3,0,0,0,0,0,0,0,0


In [4]:
val_df = pd.read_csv('../new_data/val_set_constant.csv')
val_df = val_df.fillna(-1)
val_df.head()

Unnamed: 0,ana,fever,leukopenia,thrombocytopenia,auto_immune_hemolysis,delirium,psychosis,seizure,non_scarring_alopecia,oral_ulcers,...,proteinuria,biopsy_proven_lupus_nephritis,anti_cardioliphin_antibodies,anti_β2gp1_antibodies,lupus_anti_coagulant,low_c3,low_c4,anti_dsdna_antibody,anti_smith_antibody,label
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,1,0,0,0,0,0
1,1,0,1,0,0,0,0,0,0,0,...,0,0,0,0,0,1,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,1,1,0,0,0
4,0,0,0,0,0,0,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [5]:
test_df = pd.read_csv('../new_data/test_set_constant.csv')
test_df = test_df.fillna(-1)
test_df.head()

Unnamed: 0,ana,fever,leukopenia,thrombocytopenia,auto_immune_hemolysis,delirium,psychosis,seizure,non_scarring_alopecia,oral_ulcers,...,proteinuria,biopsy_proven_lupus_nephritis,anti_cardioliphin_antibodies,anti_β2gp1_antibodies,lupus_anti_coagulant,low_c3,low_c4,anti_dsdna_antibody,anti_smith_antibody,label
0,1,0,1,0,0,0,0,0,0,0,...,1,0,0,0,0,1,0,1,0,1
1,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,1,0,0,0
2,1,0,1,0,0,0,0,0,0,0,...,0,0,0,0,0,1,0,1,0,1
3,1,0,0,0,0,0,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0
4,1,0,0,0,0,0,0,0,0,0,...,1,4,1,0,0,0,1,0,0,1


In [6]:
X_train = train_df.iloc[:, 0:-1]
y_train = train_df.iloc[:, -1]

X_val = val_df.iloc[:, 0:-1]
y_val = val_df.iloc[:, -1]

X_test = test_df.iloc[:, 0:-1]
y_test = test_df.iloc[:, -1]

X_train, y_train = np.array(X_train), np.array(y_train)
X_val, y_val = np.array(X_val), np.array(y_val)
X_test, y_test = np.array(X_test), np.array(y_test)

# X_train_scaled = scale_data(X_train)
# X_val_scaled = scale_data(X_val)
# X_test_scaled = scale_data(X_test)

X_train.shape, y_train.shape, X_val.shape, y_val.shape, X_test.shape, y_test.shape

((50400, 24), (50400,), (5600, 24), (5600,), (14000, 24), (14000,))

In [7]:
train_df.label.value_counts()

1    25210
0    25190
Name: label, dtype: int64

In [8]:
y_train_onehot = keras.utils.to_categorical(y_train, num_classes=3) 
y_val_onehot = keras.utils.to_categorical(y_val, num_classes=3) 
y_test_onehot = keras.utils.to_categorical(y_test, num_classes=3) 
y_train_onehot.shape, y_val_onehot.shape, y_test_onehot.shape

((50400, 3), (5600, 3), (14000, 3))

#### The Model

In [9]:
model = keras.Sequential([
    layers.Dense(64, activation='relu', input_shape=(24,)),
    layers.Dense(64, activation='relu'),
    layers.Dense(3, activation='softmax')
])

Instructions for updating:
If using Keras pass *_constraint arguments to layers.


In [10]:
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

In [11]:
checkpoint = keras.callbacks.ModelCheckpoint(
    '../models/ffnn/ffnn_checkpoint.h5', monitor='val_acc', save_best_only=True, save_weights_only=False
)

In [12]:
early_stopping = EarlyStopping(monitor='val_acc', patience=200)

In [None]:
history = model.fit(X_train, y_train_onehot, epochs=1000, batch_size=32, validation_data=(X_val, y_val_onehot), 
                    callbacks=[checkpoint, early_stopping])

Train on 50400 samples, validate on 5600 samples
Epoch 1/1000
 1920/50400 [>.............................] - ETA: 2s - loss: 0.6605 - acc: 0.7083  

2023-08-07 03:49:39.402462: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcuda.so.1
2023-08-07 03:49:43.983863: E tensorflow/stream_executor/cuda/cuda_driver.cc:318] failed call to cuInit: CUDA_ERROR_NO_DEVICE: no CUDA-capable device is detected
2023-08-07 03:49:43.983903: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:169] retrieving CUDA diagnostic information for host: PL1207-PRO.paris.inria.fr
2023-08-07 03:49:43.983907: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:176] hostname: PL1207-PRO.paris.inria.fr
2023-08-07 03:49:43.984044: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:200] libcuda reported version is: 525.125.6
2023-08-07 03:49:43.984068: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:204] kernel reported version is: Not found: could not find kernel module information in driver version file contents: "NVRM version: NVIDIA UNIX Open Kernel Module for x86_64  525.125.06  Release Build  (

Epoch 2/1000
Epoch 3/1000
Epoch 4/1000
Epoch 5/1000
Epoch 6/1000
Epoch 7/1000
Epoch 8/1000
Epoch 9/1000
Epoch 10/1000
Epoch 11/1000
Epoch 12/1000
Epoch 13/1000
Epoch 14/1000
Epoch 15/1000
Epoch 16/1000
Epoch 17/1000
Epoch 18/1000
Epoch 19/1000
Epoch 20/1000
Epoch 21/1000
Epoch 22/1000
Epoch 23/1000
Epoch 24/1000
Epoch 25/1000
Epoch 26/1000
Epoch 27/1000
Epoch 28/1000
Epoch 29/1000
Epoch 30/1000
Epoch 31/1000
Epoch 32/1000
Epoch 33/1000
Epoch 34/1000
Epoch 35/1000
Epoch 36/1000
Epoch 37/1000
Epoch 38/1000
Epoch 39/1000
Epoch 40/1000
Epoch 41/1000
Epoch 42/1000
Epoch 43/1000
Epoch 44/1000
Epoch 45/1000
Epoch 46/1000
Epoch 47/1000
Epoch 48/1000
Epoch 49/1000
Epoch 50/1000
Epoch 51/1000
Epoch 52/1000
Epoch 53/1000
Epoch 54/1000
Epoch 55/1000
Epoch 56/1000
Epoch 57/1000
Epoch 58/1000


Epoch 59/1000
Epoch 60/1000
Epoch 61/1000
Epoch 62/1000
Epoch 63/1000
Epoch 64/1000
Epoch 65/1000
Epoch 66/1000
Epoch 67/1000
Epoch 68/1000
Epoch 69/1000
Epoch 70/1000
Epoch 71/1000
Epoch 72/1000
Epoch 73/1000
Epoch 74/1000
Epoch 75/1000
Epoch 76/1000
Epoch 77/1000
Epoch 78/1000
Epoch 79/1000
Epoch 80/1000
Epoch 81/1000
Epoch 82/1000
Epoch 83/1000
Epoch 84/1000
Epoch 85/1000
Epoch 86/1000
Epoch 87/1000
Epoch 88/1000
Epoch 89/1000
Epoch 90/1000

In [None]:
# Plot the training and validation accuracy
plt.plot(history.history['acc'], label='Training Accuracy')
plt.plot(history.history['val_acc'], label='Validation Accuracy')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.title('Training and Validation Accuracy')
plt.legend()
plt.show()

In [None]:
# Plot the training and validation loss
plt.plot(history.history['loss'], label='Training Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.title('Training and Validation Loss')
plt.legend()
plt.show()

#### Performance evaluation of the best model

In [None]:
best_model = keras.models.load_model('../models/ffnn/ffnn_checkpoint.h5')

In [None]:
loss, accuracy = best_model.evaluate(X_test, y_test_onehot)
loss, accuracy

#### More metrics

In [None]:
model_predictions = best_model.predict(X_test)
y_pred = np.argmax(model_predictions, axis=1)

In [None]:
y_pred.shape, y_test.shape

In [None]:
y_pred

In [None]:
acc, f1, roc_auc = utils.test(y_test, y_pred)
acc, f1, roc_auc

In [None]:
# accuracy = accuracy_score(y_test, y_pred)
# f1 = f1_score(y_test, y_pred, average='macro')
# roc_auc = roc_auc_score(y_test_onehot, model_predictions, average='macro', multi_class='ovr')
# print(f'Accuracy: {accuracy}, F1: {f1}, ROC-AUC:{roc_auc}')

In [None]:
test_df = pd.DataFrame()
test_df['y_actual'] = y_test
test_df['y_pred'] = y_pred
acc, f1, roc_auc =  utils.test(test_df.y_actual, test_df.y_pred)
acc, f1, roc_auc

In [None]:
utils.test(y_test, test_df.y_pred)