In [None]:
n_fold= 1
total_fold= '5'
use_enchanced_dataset= False
model_name= "Full-ChexFPN"

full_dataset = True

In [None]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib as mpl
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import time
from tqdm.notebook import tqdm
import os

# Keras
import tensorflow as tf
# import tensorflow_addons as tfa
import tensorflow_hub as hub
from tensorflow import keras
from tensorflow.keras import *
from tensorflow.keras.layers import *
from tensorflow.keras.models import Model, Sequential, model_from_json, load_model
# from tensorflow.keras.preprocessing import image_dataset_from_directory
from keras.preprocessing.image import ImageDataGenerator
from keras.callbacks import ModelCheckpoint,EarlyStopping,TensorBoard,CSVLogger,ReduceLROnPlateau
from keras import backend as K
from keras_retinanet import layers as rlayers


import util
from sklearn.metrics import confusion_matrix, classification_report
from livelossplot import PlotLossesKeras
from keras.initializers import RandomNormal

In [None]:
gpus = tf.config.experimental.list_physical_devices('GPU')
print(gpus)
for gpu in gpus:
    tf.config.experimental.set_memory_growth(gpu, True)
if gpus:
    tf.config.set_visible_devices(gpus[0], 'GPU')

print("Test built: {}".format(tf.test.is_built_with_cuda()))

In [None]:
if use_enchanced_dataset == True:
    IMAGE_DIR = "/home/cries/Dataset/X-Ray/enchanced/"
else:
    IMAGE_DIR = "/home/cries/Dataset/X-Ray/full/images/"
    
if full_dataset == True:
    train_df= pd.read_csv("/home/cries/Dataset/X-Ray/DataFrame/official_train.csv")
    test_df= pd.read_csv("/home/cries/Dataset/X-Ray/DataFrame/official_test.csv")
else:    
    train_df= pd.read_csv("/home/cries/Dataset/X-Ray/DataFrame/" + str(total_fold) + "Fold/" + "train_Fold" + str(n_fold) + ".csv").loc[:,'Image Index':]
    test_df= pd.read_csv("/home/cries/Dataset/X-Ray/DataFrame/" + str(total_fold) + "Fold/" + "test_Fold" + str(n_fold) + ".csv").loc[:,'Image Index':]

labels = ['No Finding',
          'Cardiomegaly', 
          'Emphysema', 
          'Effusion', 
          'Hernia', 
          'Infiltration', 
          'Mass', 
          'Nodule', 
          'Atelectasis',
          'Pneumothorax',
          'Pleural_Thickening', 
          'Pneumonia', 
          'Fibrosis', 
          'Edema', 
          'Consolidation']

print("Leakage between train and test: {}".format(util.check_for_leakage(train_df, test_df, 'Image Index')))

In [None]:
# Setting
seed= 1
batch_size= 32
# target_w= 320; target_h= 320; dim= (3,)
target_w= 224; target_h= 224; dim= (3,)
image_size_target= (target_w,target_h)
image_shape= image_size_target + dim
class_mode= 'raw'   # raw, categorical 

use_aug= False
use_normalize= True
index_col= "Image Index"
labels_col= labels

def prepare_generator(use_Aug, use_Normalize):
    # == Aug Image
    if use_Aug== True and use_Normalize== False:
        return ImageDataGenerator(
            rescale= 1./255,        
            horizontal_flip= True,
            # vertical_flip= False,
            # shear_range=0.1,
            # zoom_range=0.1,
            # cval=0.0,
            # fill_mode='constant',
            # rotation_range = 10
            )
    # == Normalize Image
    if use_Aug== False and use_Normalize== True:
        return ImageDataGenerator(
            rescale= 1./255,
            # horizontal_flip=True,
            samplewise_center= True,
            samplewise_std_normalization= True
            )
    
    # == Without
    if use_Aug== False and use_Normalize== False:
        return ImageDataGenerator(rescale= 1./255)
    
    if use_Aug== True and use_Normalize== True:
        return ImageDataGenerator(
            samplewise_center= True,
            samplewise_std_normalization= True,
            rescale= 1./255,        
            horizontal_flip=True,
            vertical_flip= False,
            shear_range=0.1,
            zoom_range=0.1,
            cval=0.0,
            fill_mode='constant',
            rotation_range = 20
            )

In [None]:
# === Image Train Generator
print("============ getting train generator ===========") 
image_train= prepare_generator(use_Aug=True, use_Normalize=True).flow_from_dataframe(
    dataframe= train_df,
    directory= IMAGE_DIR,
    x_col= index_col,
    y_col= labels,
    class_mode= class_mode,
    batch_size= batch_size,
    shuffle= True,
    target_size= image_size_target
)

# === Image Validation and Test Generator
print("")
print("==== getting train and test/valid generators ====")
raw_train_generator= prepare_generator(False, False).flow_from_dataframe(
                        dataframe= train_df,
                        directory= IMAGE_DIR,
                        x_col= index_col,
                        y_col= labels,
                        class_mode= class_mode,
                        batch_size= batch_size,
                        shuffle= True,
                        target_size= image_size_target
                    )
batch= raw_train_generator.next()
data_sample= batch[0]
imagegenerator= prepare_generator(False, True)
imagegenerator.fit(data_sample)
image_val = imagegenerator.flow_from_dataframe(
                        dataframe= test_df,
                        directory= IMAGE_DIR,
                        x_col= index_col,
                        y_col= labels,
                        class_mode= class_mode,
                        batch_size= batch_size,
                        shuffle= False,
                        target_size= image_size_target
                    )

x, y = image_train.__getitem__(0)
plt.figure(figsize=(2,2))
plt.axis('off')
plt.imshow(x[0]);

In [None]:
plt.figure(figsize=(10,3))
plt.xticks(rotation=90)
plt.bar(x=labels, height=np.mean(image_train.labels, axis=0))
plt.title("Frequency of Each Class")
plt.show()

In [None]:
def compute_class_freqs(labels):
    # total number of patients (rows)
    N = labels.shape[0]
    positive_frequencies = np.sum(labels, axis = 0) / N
    negative_frequencies = 1 - positive_frequencies
    return positive_frequencies, negative_frequencies

freq_pos, freq_neg = compute_class_freqs(image_train.labels)
freq_pos

In [None]:
data = pd.DataFrame({"Class": labels, "Label": "Positive", "Value": freq_pos})
data = data.append([{"Class": labels[l], "Label": "Negative", "Value": v} for l,v in enumerate(freq_neg)], ignore_index=True)
plt.figure(figsize=(10,3))
plt.xticks(rotation=90)
f = sns.barplot(x="Class", y="Value", hue="Label" ,data=data)

In [None]:
pos_weights = freq_neg
neg_weights = freq_pos
pos_contribution = freq_pos * pos_weights 
neg_contribution = freq_neg * neg_weights

data = pd.DataFrame({"Class": labels, "Label": "Positive", "Value": pos_contribution})
data = data.append([{"Class": labels[l], "Label": "Negative", "Value": v} 
                        for l,v in enumerate(neg_contribution)], ignore_index=True)
plt.figure(figsize=(10,3))
plt.xticks(rotation=90)
sns.barplot(x="Class", y="Value", hue="Label" ,data=data);

In [None]:
class_weights = util.generate_class_weights(image_train.labels, multi_class=False, one_hot_encoded=True)

In [None]:
class_weights

In [None]:
image_shape

In [None]:
num_class = len(labels)

In [None]:
def mlp_head(x, depth=[128, 128], dropout_rate=0.3):
    for units in depth:
        x = Dropout(dropout_rate)(x)
        x = layers.Dense(units, activation='selu')(x)
        # x = BatchNormalization()(x)
        # x = Dropout(dropout_rate)(x)
        # x = Flatten()(x)
    return x

In [None]:
inputs = Input(shape=image_shape)

In [None]:
# mdl1 = hub.KerasLayer("https://tfhub.dev/sayakpaul/swin_s3_tiny_224_fe/1", trainable=False)(inputs)
# swin_output = Flatten()(swin_output)

# mdl1 = Model(inputs, mdl1)
# mdl1.trainable = False

# mdl2 = tf.keras.applications.DenseNet121(weights='imagenet', include_top=False)(inputs)

# outputs = GlobalMaxPooling2D()(mdl2)
# mdl2 = Model(inputs=inputs, outputs=outputs)
# mdl2.trainable = False 

In [None]:
weights_path = '/home/cries/Workshop/X-Ray/TransX-Ray/jeremie/chexnet_pretrained/brucechou1983_CheXNet_Keras_0.3.0_weights.h5'
mdl = tf.keras.applications.DenseNet121(weights=weights_path, include_top=True, classes=14, input_tensor=inputs)

base_model = Model(inputs=inputs, outputs=mdl.layers[-3].output)
base_model.trainable = False

In [None]:
# keras.utils.plot_model(base_model)

In [None]:
layer_names = ["pool2_conv", "pool3_conv", "pool4_conv", "relu"]

layer_outputs = [base_model.get_layer(name).output for name in layer_names]
x1, x2, x3, x4=layer_outputs

x = BatchNormalization()(x4)
x = GlobalMaxPooling2D()(x)

In [None]:
feature_size = 256

P6 = Conv2D(feature_size, kernel_size=3, strides=2, padding='same')(x4)

P7 = Conv2D(feature_size, kernel_size=3, strides=2, padding='same', activation='relu')(P6)

P5 = Conv2D(feature_size, kernel_size=1, strides=1, padding='same')(x4) # up
P5_UP = rlayers.UpsampleLike()([P5, x3])
P5 = Conv2D(feature_size, kernel_size=3, strides=1, padding='same')(P5)

P4 = Conv2D(feature_size, kernel_size=1, strides=1, padding='same')(x3) # up
P4 = Concatenate(axis=3)([P5_UP, P4])
P4_UP = rlayers.UpsampleLike()([P4, x2])
P4 = Conv2D(feature_size, kernel_size=3, strides=1, padding='valid')(P4)

P3 = Conv2D(feature_size, kernel_size=1, strides=1, padding='same')(x2) # up
P3 = Concatenate(axis=3)([P4_UP, P3])
P3_UP = rlayers.UpsampleLike()([P3, x1])
P3 = Conv2D(feature_size, kernel_size=3, strides=1, padding='valid')(P4)

P2 = Conv2D(feature_size, kernel_size=1, strides=1, padding='same')(x1) # up
P2 = Concatenate(axis=3)([P3_UP, P2])
P2 = Conv2D(feature_size, kernel_size=3, strides=1, padding='valid')(P2)

In [None]:
hidden_units = [2048]

f0 = mlp_head(x, depth=hidden_units, dropout_rate=0.5)

f1 = BatchNormalization()(P2)
f1 = GlobalMaxPooling2D()(f1)
f1 = mlp_head(f1, depth=hidden_units, dropout_rate=0.5)

f2 = BatchNormalization()(P3)
f2 = GlobalMaxPooling2D()(f2)
f2 = mlp_head(f2, depth=hidden_units, dropout_rate=0.5)

f3 = BatchNormalization()(P4)
f3 = GlobalMaxPooling2D()(f3)
f3 = mlp_head(f3, depth=hidden_units, dropout_rate=0.5)

f4 = BatchNormalization()(P5)
f4 = GlobalMaxPooling2D()(f4)
f4 = mlp_head(f4, depth=hidden_units, dropout_rate=0.5)

f5 = BatchNormalization()(P6)
f5 = GlobalMaxPooling2D()(f5)
f5 = mlp_head(f5, depth=hidden_units, dropout_rate=0.5)

f6 = BatchNormalization()(P7)
f6 = GlobalMaxPooling2D()(f6)
f6 = mlp_head(f6, depth=hidden_units, dropout_rate=0.5)

head = Concatenate()([f0, f1, f2, f3, f4, f5, f6])
# head = Concatenate()([f1, f2, f3, f4, f5, f6])
head = BatchNormalization()(head)

head = mlp_head(head, depth= [2048, 1024], dropout_rate=0.0)

predictions = Dense(num_class, activation='sigmoid')(head)
model = Model(inputs=inputs, outputs=predictions) 

In [None]:
# keras.utils.plot_model(model)

In [None]:
output_dir = './output_weights/{}'.format(model_name)
weight_path = '{}_ckp_weights.h5'.format(model_name)

output_weights_path = os.path.join(output_dir, weight_path)

if not os.path.isdir(output_dir):
        os.makedirs(output_dir)


adaptive_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=2, verbose=1, mode="min", min_lr=5e-6) 

In [None]:
loss = util.set_binary_crossentropy_weighted_loss(
    positive_weights=pos_weights,
    negative_weights=neg_weights)

# train_steps = len(image_train) / 10
# val_steps = len(image_val) / 5

In [None]:
epoch = 100

lr = 0.0001
decay_rate = lr / epoch
momentum = 0.99
adam = tf.keras.optimizers.Adam(learning_rate=lr, weight_decay=decay_rate, epsilon=1e-07, amsgrad=False)
sgd = tf.keras.optimizers.SGD(learning_rate=lr, momentum=momentum, weight_decay=decay_rate, nesterov=False)
checkpoint = ModelCheckpoint(output_weights_path, monitor='val_loss', verbose=1, save_best_only=True, mode='auto', period=1)

# class_weigths = dict(zip(range(len(neg_weights)), neg_weights))
# weighted_loss = get_weighted_loss(pos_weights, neg_weights)
bce_ls = tf.keras.losses.BinaryCrossentropy(label_smoothing=0.0)

metric_auc = tf.keras.metrics.AUC(multi_label=True)

model.compile(loss=bce_ls, optimizer=sgd, metrics=[metric_auc])

In [None]:
start = time.time() 
history = model.fit(image_train, epochs=epoch, validation_data=image_val, callbacks=[checkpoint,PlotLossesKeras()],verbose=1)
end_train = time.time() - start

In [None]:
print("=========== TRAINING ENDED IN {} min ================".format(end_train/60))

In [None]:
save_model_path = os.path.join(output_dir, 'model_{}'.format(model_name))
model.save(save_model_path)

In [None]:
final_weight_model_path = os.path.join(output_dir, '{}_weights.h5'.format(model_name))
model.save_weights(final_weight_model_path)
file_stats = os.stat(final_weight_model_path)
model_size = round(file_stats.st_size / (1024 * 1024), 2)
model_parameter = model.count_params()

In [None]:
model_parameter

In [None]:
import pickle

with open('trainHistoryDict', 'wb') as file_pi:
        pickle.dump(history.history, file_pi)

In [None]:
save_model_path

In [None]:
#Load Model
from tensorflow import keras
# model_fusion = keras.models.load_model(save_model_path, custom_objects={
#     "binary_crossentropy_weighted_loss": util.set_binary_crossentropy_weighted_loss})

In [None]:
model_fusion = keras.models.load_model(save_model_path)

In [None]:
predicted_vals = model_fusion.predict_generator(image_val)

In [None]:
auc_rocs = util.get_roc_curve(labels, predicted_vals, image_val)

In [None]:
mean_auroc = np.mean(auc_rocs)
print("Mean AUC: {}".format(mean_auroc))

In [None]:
from numba import cuda 
device = cuda.get_current_device()
device.reset()
print(device)

In [None]:
# from sklearn.metrics import confusion_matrix
# from sklearn.metrics import classification_report
# from sklearn.metrics import precision_score
# from sklearn.metrics import recall_score
# from sklearn.metrics import multilabel_confusion_matrix
# from sklearn.metrics import f1_score
# from sklearn.metrics import accuracy_score

# start = time.time()
# y_prob = model_fusion.predict(image_val)
# end_test = time.time() - start

# y_pred = np.argmax(y_prob, axis=1)
# y_true = image_val.classes

# print("Precision:", precision_score(y_true, y_pred, average='weighted'), "Recall:", recall_score(y_true, y_pred, average='weighted'))

# cm = multilabel_confusion_matrix(y_true, y_pred)

# a = []

# for i in range(len(cm)):
#     a.append(cm[i].ravel())

# tp, fn, fp, tn =np.sum(np.array(a), axis = 0)

# print("tp, fn, fp, tn:", tp, fn, fp, tn)

# print("Specificity:", tn / (tn+fp), "Sensitivity:", tp / (tp+fn))
# print("F1-Score:", f1_score(y_true, y_pred, average='weighted'), "Accuracy:", accuracy_score(y_true, y_pred))

# Specificty = tn / (tn+fp)
# Sensitivity = tp / (tp+fn)
# F1_Score = f1_score(y_true, y_pred, average='weighted')
# Accuracy = accuracy_score(y_true, y_pred)

In [None]:
# result = [y_true, y_pred]
# pd.DataFrame(result).to_csv("{}_{}_result.csv".format(model_name, n_fold))
# pd.DataFrame(y_prob).to_csv("{}_{}_probability.csv".format(model_name, n_fold))

In [None]:
# from sklearn.metrics import roc_curve
# from sklearn.metrics import roc_auc_score

# fpr_keras, tpr_keras, thresholds_keras = roc_curve(y_true, y_pred)
# m_auc = roc_auc_score(y_true, y_pred)

# print(fpr_keras, tpr_keras, thresholds_keras, m_auc)

# from sklearn.metrics import auc
# auc_keras = auc(fpr_keras, tpr_keras)
# print(auc_keras)

In [None]:
# # keep probabilities for the positive outcome only
# lr_probs = y_prob[:, 1]
# lr_auc = roc_auc_score(y_true, lr_probs)
# print('Model Fusion: ROC AUC=%.3f' % (lr_auc))

# lr_fpr, lr_tpr, _ = roc_curve(y_true, lr_probs)

# plt.figure(figsize=(10, 10))
# font={'size':'15'}
# plt.rc('font',**font)

# plt.plot([0, 1], [0, 1], 'k--', label='No Skill')
# plt.plot(lr_fpr, lr_tpr, marker='.', label='{} Model ROC curve (area = %0.2f)'.format(model_name) % lr_auc)

# roc_auc = [lr_fpr, lr_fpr]
# pd.DataFrame(roc_auc).to_csv("{}_roc_auc.csv".format(model_name))

# # axis labels
# plt.xlabel('False Positive Rate')
# plt.ylabel('True Positive Rate')
# # show the legend
# plt.legend()

# AUC_Score = lr_auc

In [None]:
# Result = [Specificty, Sensitivity, F1_Score, Accuracy, AUC_Score, end_train, end_test, model_size, model_parameter]
# colname = ['Specificty', 'Sensitivity', 'F1_Score', 'Accuracy', 'AUC_Score', 'end_train', 'end_test', 'model_size', 'model_parameter']

# pd.DataFrame([Result], columns=colname).to_csv("{}_fold_".format(model_name)+str(n_fold)+".csv")

In [None]:
# from sklearn.metrics import ConfusionMatrixDisplay
# from sklearn.metrics import confusion_matrix
# import matplotlib.pyplot as plt

# plt.figure(figsize=(15, 15))

# labels = ["Sperm", "Impurity"]
# cm = confusion_matrix(y_true, y_pred)
# disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=labels)

# plt.rcParams['figure.figsize']=[15,15]
# font={'size':'30'}
# plt.rc('font',**font)
# disp.plot(cmap=plt.cm.Blues)