# README

In questo notebook vengono utilizzare delle Keras Application per effettuare il training sulla 
base di un dataset di slice. Il training viene effettuato suddividendo il dataset in 80%-20%: 
80% delle slice viene utilizzata per il training e il 20% per il test.
Una volta terminato il training dopo un numero alto di epoche, vengono generate le features per il dataset
di validazione (20% delle slice totali).
Questo dataset viene splittato ulteriormente in 80-20: Si utilizza l'80% delle slice per effettuare il 
training di una SVM e il 20% per effettuare la validazione.



pre-trained per estrarre le feature
da un dataset di slice. In seguito il dataset di slice viene suddiviso in train e test

In [None]:
import numpy as np
from keras import layers
from keras.layers import Input, Add, Dense, Dropout, Activation, ZeroPadding2D, BatchNormalization, Flatten, Conv2D, AveragePooling2D, MaxPooling2D, GlobalMaxPooling2D
from keras.models import Model, load_model
from keras.optimizers import SGD, Adam
from keras.preprocessing import image
from keras.utils import layer_utils
from keras.utils.data_utils import get_file
from keras.applications.imagenet_utils import preprocess_input
#import pydot
from IPython.display import SVG
from keras.utils.vis_utils import model_to_dot
from keras.utils import plot_model
#from resnets_utils import *
from keras.initializers import glorot_uniform
import scipy.misc
from matplotlib.pyplot import imshow
%matplotlib inline
from gliomi import *

import keras.backend as K
K.set_image_data_format('channels_last')
K.set_learning_phase(1)

In [114]:
def normalize_min_max(value):
    return (value - np.min(value)) / (np.max(value) - np.min(value))

# Variables Def

In [None]:

slice_datasets = [
    "/data/RMN/dataset-gliomi-cnn/datasets-full-brain",
    "/data/RMN/dataset-gliomi-cnn/datasets-tumor-crop"
]

datasets = [
    "survivor", 
    "idh", 
    "ki67", 
    "egfr", 
    "mgmt"
]

percentiles = [
    100, 
    70
]

sequences = [
    "t1", 
    "t2", 
    "flair", 
    "rcbv", 
    "adc"
]

keras_models = [
    "MobileNetV2",
    "NASNetMobile",
    "VGG19",
    "ResNet50",
    "ResNet101",
    "DenseNet169",
]

optimizer_names = [
    'rmsprop',
    'sgd',
    'adam'
]

# Model

In [None]:
from tensorflow.keras.applications import *

from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D,Dropout

from keras.callbacks import EarlyStopping, ModelCheckpoint

from keras.optimizers import SGD, Adam

import keras.backend as K

def get_model(model_name, optimizer, include_dropout=False):
    
    K.clear_session()

    K.set_image_data_format('channels_last')

    IMG_SHAPE = (224, 224, 1)

    # Base model is a Keras Application
    base_model = eval(model_name)(weights=None, include_top=False, input_shape=IMG_SHAPE)

    # add a global spatial average pooling layer
    global_spatial_avg_pool_layer = GlobalAveragePooling2D()(base_model.output)

    # Feature leayer
    if include_dropout:
        # Drop-out
        dropout_layer = Dropout(0.5)(global_spatial_avg_pool_layer)
        feature_layer = Dense(1024, activation='relu')(dropout_layer)
        
    else:
        feature_layer = Dense(1024, activation='relu')(global_spatial_avg_pool_layer)

    # and a logistic layer -- let's say we have 200 classes
    prediction_layer = Dense(2, activation='softmax')(feature_layer)

    # Final model
    model = Model(inputs=base_model.input, outputs=prediction_layer)
    
    # Compile model
    model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy','binary_crossentropy'])
    
    return model

# Model fitting

In [None]:
def train_and_test(model_name, model, X_train, y_train, X_test, y_test, random_state=42, epochs=100, batch_size=16):
    
    early_stopping_callback = tf.keras.callbacks.EarlyStopping(monitor='loss', patience=10)
    tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=f"logs-2.2-test/{model_name}")
            
    fit = model.fit(
            X_train, y_train, 
            epochs=epochs,
            callbacks=[tensorboard_callback, early_stopping_callback],
            validation_data=(X_test, y_test), 
            shuffle=True,
            batch_size=batch_size)
    
    return fit

# SVM evaluation

In [None]:
def evaluate_model_SVM(fit, dataset, times):
    
    scores = []
    
    for random_state in range(times):
        
        X_train, y_train, X_test, y_test = dataset.get_split(test_size=0.2, random_state=random_state)
            
        features_train = fit.predict([make_3_channels(X_train)])

        features_test = fit.predict([make_3_channels(X_test)])
            
        clf = svm.SVC()

        clf.fit(features_train, y_train)

        score = clf.score(features_test, y_test)

        print(random_state, ":", score)

        scores.append(score)

    return np.array(scores)

# Plotting 

In [None]:
import matplotlib.pyplot as plt

"""
Save results in plot
"""
def save_plot(history, output_name):

    acc = history.history['acc']
    val_acc = history.history['val_acc']

    loss = history.history['loss']
    val_loss = history.history['val_loss']

    plt.figure(figsize=(8, 8))
    plt.subplot(2, 1, 1)
    plt.plot(acc, label='Training Accuracy')
    plt.plot(val_acc, label='Validation Accuracy')
    plt.legend(loc='lower right')
    plt.ylabel('Accuracy')
    plt.ylim([min(plt.ylim()),1])
    plt.title('Training and Validation Accuracy')

    plt.subplot(2, 1, 2)
    plt.plot(loss, label='Training Loss')
    plt.plot(val_loss, label='Validation Loss')
    plt.legend(loc='upper right')
    plt.ylabel('Cross Entropy')
    # plt.ylim([0,1.0])
    plt.title('Training and Validation Loss')
    plt.xlabel('epoch')

    plt.savefig(f"{output_name}.png")
    # plt.show()

In [None]:
# Main Iteration


In [None]:
import tensorflow as tf
from keras import backend as K
from sklearn import svm
import numpy as np
from tensorflow.keras.applications import *
from sklearn.model_selection import train_test_split
from sklearn.base import clone
import sklearn
import pandas as pd
import numpy as np

IMG_SHAPE = (224, 224, 3) # X.shape[1:]

dataset = Dataset("dataset-survivor-t1-adc-flair-224-70-perc.pickle")

keras_models = [
    "MobileNetV2",
    "NASNetMobile",
    "VGG19",
    "ResNet50",
    "ResNet101",
    "DenseNet169",
]

result = {}

times = 10

result = {}

subjects = np.array(list(dataset.get_subjects()))

for model_name in keras_models:
    
    #try:

        K.clear_session()

        print("Loading", model_name)

        base_model = eval(model_name)(weights='imagenet', include_top=True, input_shape=IMG_SHAPE)

        print("Loaded", model_name)
        
        features = base_model.predict([dataset.slices])
        
        feature_df = pd.DataFrame(
            np.concatenate([
                np.expand_dims(dataset.subjects, axis=1), 
                np.expand_dims(dataset.labels, axis=1), 
                features
            ], axis=1))
        
        scores = []
        
        for random_state in range(times):

            train_index, test_index = train_test_split(list(range(subjects.shape[0])), test_size=0.2, random_state=random_state)

            subjects_train = subjects[train_index]
            subjects_test = subjects[test_index]
            
            X_features_train = feature_df[feature_df[0].isin(subjects_train)].iloc[:,2:]
            y_train = feature_df[feature_df[0].isin(subjects_train)].iloc[:,1]
            
            X_features_test = feature_df[feature_df[0].isin(subjects_test)].iloc[:,2:]
            y_test = feature_df[feature_df[0].isin(subjects_test)].iloc[:,1]

            # X_train, y_train, X_test, y_test = dataset.get_split(train_index, test_index)
            # X_features_train = base_model.predict([X_train])
            # X_features_test = base_model.predict([X_test])
            
            clf = svm.SVC()

            clf.fit(np.array(X_features_train), y_train)

            score = clf.score(X_features_test, y_test)
    
            print(random_state, ":", score)
        
            scores.append(score)
        
        scores = np.array(scores)
        print("----> Scores:", np.average(scores * 100), "% +-", np.std(scores * 100), "%")
        result[model_name] = scores

    #except:
    #    pass

In [None]:
import os

def make_name(slice_dataset, dataset, model_name, optimizer_name, sequence, percentile):
    slice_dataset = os.path.basename(slice_dataset).replace("datasets-", "")
    return ", ".join([slice_dataset, dataset, model_name, optimizer_name, sequence, percentile])

In [None]:
from tensorflow.keras.applications import *
from keras import backend as K

img_shape = (224, 224, 1)

#####
#optimizers

import tensorflow as tf

# rmsprop = tf.keras.optimizers.RMSprop(learning_rate=0.0001)

# from keras.optimizers import SGD, Adam

# gd = tf.keras.optimizers.SGD(lr=0.001, momentum=0.9, decay=0.001/10, nesterov=False)

adam = tf.keras.optimizers.Adam(lr=0.0001)

# model.compile(optimizer= gd, loss='categorical_crossentropy', metrics=['accuracy'])

base_model = eval(model_name)(weights=None, include_top=False, input_shape=img_shape)

In [None]:
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D, Dropout

# add a global spatial average pooling layer
x = base_model.output

x = GlobalAveragePooling2D()(x)

#drop-out
x = Dropout(0.5)(x)

# Feature leayer
x = Dense(512, activation='relu')(x)

#x = Dropout(0.5)(x)

# and a logistic layer -- let's say we have 200 classes
predictions = Dense(2, activation='softmax')(x)

# this is the model we will train
model = Model(inputs=base_model.input, outputs=predictions)

In [None]:
# compile the model (should be done *after* setting layers to non-trainable)
model.compile(optimizer=adam, loss="binary_crossentropy", metrics=['accuracy', 'binary_crossentropy'])

# Model fitting

In [None]:
'''Model fitting'''
from keras.callbacks import EarlyStopping
from keras.callbacks import ModelCheckpoint

early_stopping_callback = tf.keras.callbacks.EarlyStopping(monitor='loss', patience=50)
tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir="logs/testing")

In [None]:
epochs = 15  # Increase this value for better results (i.e., more training)

batch_size = 16  # Increasing this value might speed up fitting

In [None]:
import numpy as np

# Fit
fit = model.fit(
        np.array(t1_train), y_train, 
        epochs=epochs,
        # callbacks=[tensorboard_callback, early_stopping_callback],
        # validation_data=(np.array(X_test_3d), categorical(y_test_3d)), 
        shuffle=True,
        batch_size=batch_size)

In [1]:
dataset = Dataset("dataset-survivor-t1-adc-flair-224-90-perc.pickle")

NameError: name 'Dataset' is not defined

In [80]:
sequence_index = 1

In [81]:
from sklearn.model_selection import train_test_split

random_state = 42

subjects = list(dataset.get_subjects())

train_index, test_index = train_test_split(list(range(len(subjects))), test_size=0.2, random_state=random_state)

In [82]:
X_train = dataset.slices[np.isin(dataset.subjects, np.array(subjects)[train_index])]

In [83]:
import tensorflow as tf

categorical_labels = tf.keras.utils.to_categorical(dataset.labels)

In [84]:
y_train = np.array(categorical_labels)[np.isin(dataset.subjects, np.array(subjects)[train_index])]

In [85]:
t1_train = np.expand_dims(X_train[:,:,:,sequence_index], axis=3)

In [86]:
from tensorflow.keras.applications import *
from keras import backend as K

model_name = "ResNet50"

IMG_SHAPE = (224, 224, 1)

K.clear_session()

K.set_image_data_format('channels_last')

base_model = eval(model_name)(weights=None, include_top=False, input_shape=IMG_SHAPE)

In [87]:
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D, Dropout

# add a global spatial average pooling layer
x = base_model.output

x = GlobalAveragePooling2D()(x)

#drop-out
x = Dropout(0.5)(x)

# Feature leayer
x = Dense(512, activation='relu')(x)

x = Dropout(0.5)(x)

# and a logistic layer -- let's say we have 200 classes
predictions = Dense(2, activation='softmax')(x)

# this is the model we will train
model = Model(inputs=base_model.input, outputs=predictions)

In [88]:
import tensorflow as tf

# rmsprop = tf.keras.optimizers.RMSprop(learning_rate=0.0001)

# from keras.optimizers import SGD, Adam

# gd = tf.keras.optimizers.SGD(lr=0.001, momentum=0.9, decay=0.001/10, nesterov=False)

adam = tf.keras.optimizers.Adam(lr=0.0001)

# model.compile(optimizer= gd, loss='categorical_crossentropy', metrics=['accuracy'])

# compile the model (should be done *after* setting layers to non-trainable)
model.compile(optimizer=adam, loss="binary_crossentropy", metrics=['accuracy', 'binary_crossentropy'])

In [89]:
'''Model fitting'''
from keras.callbacks import EarlyStopping
from keras.callbacks import ModelCheckpoint

early_stopping_callback = tf.keras.callbacks.EarlyStopping(monitor='loss', patience=50)
tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir="logs/testing")

In [90]:
epochs = 15  # Increase this value for better results (i.e., more training)

batch_size = 16  # Increasing this value might speed up fitting

In [91]:
import numpy as np

# Fit
fit = model.fit(
        np.array(t1_train), y_train, 
        epochs=epochs,
        # callbacks=[tensorboard_callback, early_stopping_callback],
        # validation_data=(np.array(X_test_3d), categorical(y_test_3d)), 
        shuffle=True,
        batch_size=batch_size)

Train on 782 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


In [92]:
def uncategorical(values):
    zeros = np.sum(np.round(values) == (0, 1))
    ones = np.sum(np.round(values) == (1, 0))
    if zeros > ones:
        return 0
    else:
        return 1

In [93]:
equals = 0
tot = 0

for subject in np.array(subjects)[test_index]:
    
    testing_slices = dataset.slices[np.isin(dataset.subjects, [subject])]
    
    t1_testing = testing_slices[:,:,:,sequence_index]

    prediction = model.predict([np.expand_dims(t1_testing, axis=3)])

    label = categorical_labels[np.isin(dataset.subjects, [subject])]
    
    print(subject, uncategorical(prediction), uncategorical(label))
    
    if uncategorical(prediction) == uncategorical(label):
        equals = equals + 1
    
    tot = tot + 1
    
print(equals, "/", tot, (equals / tot * 100.0), "%")

MONACELLI_LAURA 0 0
CARZEDDA_PAOLO 1 1
TEMPORIN_PATRIZIA 0 0
MARIANI_BERNARDO 0 1
PROIETTI_GIOVANNI 0 0
NERONE_GIANLUCA 1 0
DI_CARLATONIO_MAURIZIO 0 1
GIORDANO_STEFANIA 0 0
TEOFILI_STEFANO 0 0
FILIPPONI_QUINTINO 0 1
BARONTINI_MARIA_GIOVANNA 0 1
SALA_CLARA 0 1
DOBRISAN_DORINA 0 1
QUATTROCIOCCHI_EVELINA 1 1
NERVEGNA_G 0 0
CARULLI_L 0 0
PODDA_ANTONINO 0 1
BIAVATI_S 0 1
IONTA_LUCIANA 0 1
BOEZI_MARIO 0 0
FIUCCI_A 0 1
DARIDA 0 1
MUSAT_DORINA 0 0
ALESSANDRINI_GLAUCO 0 1
ISONI_FRANCESCO 0 1
FABIANI_ANNA 1 0
11 / 26 42.30769230769231 %


In [94]:
test_subjects = np.array(subjects)[test_index]

test_X = dataset.slices[np.isin(dataset.subjects, test_subjects)]

sequence_testing = np.expand_dims(test_X[:,:,:,sequence_index], axis=3)

test_y = categorical_labels[np.isin(dataset.subjects, test_subjects)]

model.evaluate(sequence_testing, test_y)



[3.364440954776842, 0.45771143, 3.364441]

In [None]:
X_train = dataset.slices[np.isin(dataset.subjects, np.array(subjects)[train_index])]

In [None]:
X_test = dataset.slices[np.isin(dataset.subjects, np.array(subjects)[test_index])]

In [None]:
y_train = np.array(dataset.labels)[np.isin(dataset.subjects, np.array(subjects)[train_index])]

In [None]:
y_test = np.array(dataset.labels)[np.isin(dataset.subjects, np.array(subjects)[test_index])]

In [None]:
def triplicate(y):
    return np.concatenate([y for i in range(3)], axis=0)

def split(X):
    return np.expand_dims(np.concatenate([X[:,:,:,i] for i in range(3)], axis=0), axis=3)
    
def split_and_triplicate(X):
    X_splitted = np.concatenate([X[:,:,:,i] for i in range(3)], axis=0)
    X_splitted_reshaped = X_splitted.reshape(X_splitted.shape[0], X_splitted.shape[1], X_splitted.shape[2], 1)
    return np.repeat(X_splitted_reshaped, 3, axis=3)

In [None]:
X_train.shape, X_test.shape, y_train.shape, y_test.shape

In [None]:
X_train_3d, X_test_3d, y_train_3d, y_test_3d = split(X_train), split(X_test), triplicate(y_train), triplicate(y_test)

In [None]:
X_train_3d.shape, X_test_3d.shape, y_train_3d.shape, y_test_3d.shape

In [None]:
sequences_features = [base_model.predict([sequence_3d]) for sequence_3d in sequences_3d]

In [None]:
features = np.concatenate(sequences_features, axis=1)

In [None]:
import pandas as pd

feature_df = pd.DataFrame(
    np.concatenate([
        np.expand_dims(dataset.subjects, axis=1), 
        np.expand_dims(dataset.labels, axis=1), 
        features
    ], axis=1))

In [None]:
subjects = np.array(list(dataset.get_subjects()))

In [None]:
from sklearn.model_selection import train_test_split
from sklearn import svm

times = 10

result = {}

scores = []

for random_state in range(times):

    train_index, test_index = train_test_split(list(range(subjects.shape[0])), test_size=0.2, random_state=random_state)

    subjects_train = subjects[train_index]
    subjects_test = subjects[test_index]

    X_features_train = feature_df[feature_df[0].isin(subjects_train)].iloc[:,2:]
    y_train = feature_df[feature_df[0].isin(subjects_train)].iloc[:,1]

    X_features_test = feature_df[feature_df[0].isin(subjects_test)].iloc[:,2:]
    y_test = feature_df[feature_df[0].isin(subjects_test)].iloc[:,1]

    clf = svm.SVC()

    clf.fit(np.array(X_features_train), y_train)

    score = clf.score(X_features_test, y_test)

    print(random_state, ":", score)

    scores.append(score)

scores = np.array(scores)
print("----> Scores:", np.average(scores * 100), "% +-", np.std(scores * 100), "%")

In [None]:
train_index, test_index = train_test_split(list(range(subjects.shape[0])), test_size=0.2, random_state=0)

In [None]:
subjects_train = subjects[train_index]
subjects_test = subjects[test_index]

In [None]:
subjects.shape, subjects_train.shape, subjects_test.shape

In [None]:
    X_features_train = feature_df[feature_df[0].isin(subjects_train)].iloc[:,2:]
    y_train = feature_df[feature_df[0].isin(subjects_train)].iloc[:,1]

    X_features_test = feature_df[feature_df[0].isin(subjects_test)].iloc[:,2:]
    y_test = feature_df[feature_df[0].isin(subjects_test)].iloc[:,1]
