In [82]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import keras
from keras.models import Sequential, save_model, load_model
from keras.layers import Conv2D, Flatten, Dense, MaxPooling2D, Dropout
from keras.callbacks import ModelCheckpoint, EarlyStopping
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix

In [61]:
import tensorflow as tf
import tensorflow_addons as tfa
from tensorflow.python.client import device_lib
print(device_lib.list_local_devices())

[name: "/device:CPU:0"
device_type: "CPU"
memory_limit: 268435456
locality {
}
incarnation: 4422851184106451769
xla_global_id: -1
, name: "/device:GPU:0"
device_type: "GPU"
memory_limit: 22655533056
locality {
  bus_id: 1
  links {
  }
}
incarnation: 4572494463556360476
physical_device_desc: "device: 0, name: NVIDIA GeForce RTX 3090, pci bus id: 0000:01:00.0, compute capability: 8.6"
xla_global_id: 416903419
]


2023-03-12 16:03:13.178359: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:966] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2023-03-12 16:03:13.216532: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:966] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2023-03-12 16:03:13.216644: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:966] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2023-03-12 16:03:13.268584: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:966] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2023-03-12 16:03:13.268620: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1700] Could not ident

In [62]:
import tensorflow as tf
print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))

Num GPUs Available:  1


In [63]:
import ipywidgets as widgets
import io
import os
from PIL import Image
import tqdm
from sklearn.model_selection import train_test_split
import cv2
from sklearn.utils import shuffle
import tensorflow as tf

#### We are manually dividing as we do not want generated GAN MRI in the testing dataset

In [64]:
X_train = []
y_train = []
X_test = []
y_test = []
image_size = 128

In [65]:
train_path = 'data/Training_GAN'
labels = os.listdir(train_path)

test_path = 'data/Test_GAN'
labels = os.listdir(test_path)
print(labels)

['pituitary', 'glioma', 'meningioma']


## Get all of the training and testing data and label them

In [66]:
# Get the training data and their labels.
for i in labels:
    folder_path = os.path.join(train_path, i)
    for j in os.listdir(folder_path):
        img = cv2.imread(os.path.join(folder_path, j))
        img = cv2.resize(img, (image_size, image_size))
        
        X_train.append(img)
        y_train.append(i)

# Get the testing data and their labels.
for i in labels:
    folder_path = os.path.join(test_path, i)
    for j in os.listdir(folder_path):
        img = cv2.imread(os.path.join(folder_path, j))
        img = cv2.resize(img, (image_size, image_size))
        
        X_test.append(img)
        y_test.append(i)

In [67]:
X_train = np.array(X_train)
y_train = np.array(y_train)

X_test = np.array(X_test)
y_test = np.array(y_test)

X_train.shape, y_train.shape, X_test.shape, y_test.shape

((4292, 128, 128, 3), (4292,), (1844, 128, 128, 3), (1844,))

### Turn labels into number format for both training and testing

In [68]:
y_train_new = []
y_test_new = []

for i in y_train:
    y_train_new.append(labels.index(i))
y_new = np.array(y_train_new)

for i in y_test:
    y_test_new.append(labels.index(i))
y_test_new = np.array(y_test_new)


In [69]:
## StratifiedKFold needs 1-dimensional Y array.
# Do not convert to_categorical when using StratifiedKFold
#y_new = tf.keras.utils.to_categorical(y_new)
#y_new

#### To get F1Score while training

In [70]:
import keras.backend as K

def get_f1(y_true, y_pred): #taken from old keras source code
    true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
    possible_positives = K.sum(K.round(K.clip(y_true, 0, 1)))
    predicted_positives = K.sum(K.round(K.clip(y_pred, 0, 1)))
    precision = true_positives / (predicted_positives + K.epsilon())
    recall = true_positives / (possible_positives + K.epsilon())
    f1_val = 2*(precision*recall)/(precision+recall+K.epsilon())
    return f1_val

### Build CNN Model with K-Fold
> We have X and y_new. We'll perform K-Fold cross validation<br>
**iterative-stratification** is a project that provides scikit-learn compatible cross validators with stratification for multilabel data.<br>
`!pip install iterative-stratification`


In [71]:
# from sklearn.model_selection import KFold, StratifiedKFold 
from iterstrat.ml_stratifiers import MultilabelStratifiedKFold

In [72]:
y_train_new = tf.keras.utils.to_categorical(y_train_new)
y_test_new = tf.keras.utils.to_categorical(y_test_new)

In [73]:
# fix random seed for reproducibility
seed = 7
np.random.seed(seed)

# define 10-fold cross validation test harness
kfold = MultilabelStratifiedKFold(n_splits=10, shuffle=True, random_state=seed)
cvscores = []
precisionscores = []
recallscores = []
f1scores = []
histories = []

fold_no = 1
best_fold = 1.5

for train, test in kfold.split(X_train, y_train_new):

    ## StratifiedKFold needs 1-dimensional Y array.
    # Do not convert to_categorical when using StratifiedKFold
    # instead, use to_categorical after splitting with k-fold...
    #y_new = tf.keras.utils.to_categorical(y_new)
    #y_new = tf.keras.utils.to_categorical(y_new)
    #test

    model = Sequential()
    model.add(Conv2D(32, (3,3), activation = 'relu', input_shape = (image_size, image_size, 3)))
    model.add(Conv2D(64, (3,3), activation = 'relu'))
    model.add(MaxPooling2D(2,2))
    model.add(Dropout(0.4))

    model.add(Conv2D(64, (3,3), activation = 'relu'))
    model.add(Dropout(0.3))
    model.add(MaxPooling2D(2,2))
    model.add(Dropout(0.4))

    model.add(Conv2D(128, (3,3), activation = 'relu'))
    model.add(Conv2D(128, (3,3), activation = 'relu'))
    model.add(MaxPooling2D(2,2))
    model.add(Dropout(0.4))

    model.add(Conv2D(128, (3,3), activation = 'relu'))
    model.add(Conv2D(256, (3,3), activation = 'relu'))
    model.add(MaxPooling2D(2,2))
    model.add(Dropout(0.4))

    model.add(Flatten())

    model.add(Dense(256, activation = 'relu'))
    model.add(Dropout(0.4))

    model.add(Dense(3, activation = 'softmax'))

    model.compile(loss = 'categorical_crossentropy', optimizer = 'Adam', metrics = ['accuracy', tf.keras.metrics.Precision(), tf.keras.metrics.Recall(), get_f1])

    # Fit the model
    history = model.fit(X_train[train], y_train_new[train], epochs=10, batch_size=32, verbose=0)

    # Save the fold model
    model_save_path = f'./fold_models/{fold_no}.h5'
    save_model(model, model_save_path, save_format='h5')

    # evaluate the model
    scores = model.evaluate(X_train[test], y_train_new[test], verbose=1)
    print("%s: %.2f%%" % (model.metrics_names[1], scores[1]*100))

    cvscores.append(scores[1] * 100)
    precisionscores.append(scores[2] * 100)
    recallscores.append(scores[3] * 100)
    f1scores.append(scores[4] * 100)
    histories.append(history)

    # take the fold history that has the best loss for our model 
    if (scores[0] < best_fold):
        best_fold_history = history

    # Increase fold number
    fold_no = fold_no + 1

print("Accuracy (mean): %.2f%% (+/- %.2f%%)" % (np.mean(cvscores), np.std(cvscores)))
print("Precision (mean): %.2f%% (+/- %.2f%%)" % (np.mean(precisionscores), np.std(precisionscores)))
print("Recall (mean): %.2f%% (+/- %.2f%%)" % (np.mean(recallscores), np.std(recallscores)))
print("F1 (mean): %.2f%% (+/- %.2f%%)" % (np.mean(f1scores), np.std(f1scores)))

accuracy: 97.21%
accuracy: 96.97%
accuracy: 98.60%
accuracy: 98.14%
accuracy: 98.60%
accuracy: 99.07%
accuracy: 98.14%
accuracy: 94.41%
accuracy: 96.04%
accuracy: 98.37%
Accuracy (mean): 97.55% (+/- 1.36%)
Precision (mean): 97.97% (+/- 0.99%)
Recall (mean): 97.06% (+/- 1.78%)
F1 (mean): 97.35% (+/- 1.45%)


In [77]:
print(cvscores)

[97.20930457115173, 96.96969985961914, 98.60140085220337, 98.13953638076782, 98.60140085220337, 99.06759858131409, 98.13519716262817, 94.40559148788452, 96.03729844093323, 98.36829900741577]


In [78]:
tf.keras.backend.clear_session()

### Load best performing model and train it
> We'll train this model to be our final model to make predictions on test data.

In [81]:
# load selected model
# FOLD 9
loaded_model = load_model('./fold_models/9.h5', custom_objects={"get_f1": get_f1})
#loaded_model.summary()

X_train.shape, y_train_new.shape, y_test.shape, y_test_new.shape

In [88]:
# Define callbacks
checkpoint_path = './final_model'
os.mkdir(checkpoint_path)

keras_callbacks = [
ModelCheckpoint(checkpoint_path + '.h5', monitor='val_loss', save_best_only=True, mode='min', verbose=1),
EarlyStopping(monitor='val_loss', mode='min', verbose= 1, patience= 5)
]

# fit final model.
history = loaded_model.fit(X_train, y_train_new, validation_data=(X_test, y_test_new), epochs= 15, batch_size= 32, callbacks= keras_callbacks)

Epoch 1/15
Epoch 1: val_loss improved from inf to 2.25185, saving model to ./final_model.h5
Epoch 2/15
Epoch 2: val_loss improved from 2.25185 to 1.29750, saving model to ./final_model.h5
Epoch 3/15
Epoch 3: val_loss did not improve from 1.29750
Epoch 4/15
Epoch 4: val_loss did not improve from 1.29750
Epoch 5/15
Epoch 5: val_loss did not improve from 1.29750
Epoch 6/15
Epoch 6: val_loss did not improve from 1.29750
Epoch 7/15
Epoch 7: val_loss did not improve from 1.29750
Epoch 7: early stopping


### Testing the model

In [22]:
X_test.shape, y_test_new.shape

((1844, 128, 128, 3), (1844, 3))

In [52]:
yhat_probs = model.predict(X_test, verbose=0)
yhat_classes = np.argmax(yhat_probs,axis=1)

In [53]:
yhat_classes.shape, y_test_new.shape

((1844,), (1844, 3))

### Revert from label_encoder to 1-d pred. array

In [54]:
y_test_new_2 = np.argmax(y_test_new, axis=1)

In [55]:
yhat_classes.shape, y_test_new_2.shape

((1844,), (1844,))

### Got predicitons. Now evaluate

In [56]:
acc = accuracy_score(y_test_new_2, yhat_classes)
precision = precision_score(y_test_new_2, yhat_classes, average='weighted')
recall = recall_score(y_test_new_2, yhat_classes, average='weighted')
f1 = f1_score(y_test_new_2, yhat_classes, average='weighted')

In [57]:
print('Accuracy: {0} | Precision: {1} | Recall: {2} | F1: {3}'.format(acc, precision, recall, f1))

Accuracy: 0.5954446854663774 | Precision: 0.7271697835352666 | Recall: 0.5954446854663774 | F1: 0.505723197761198
