In [None]:

import matplotlib.pyplot as plt
import matplotlib
matplotlib.use('Agg')

import numpy as np
import pandas as pd
import os
from glob import glob
# !pip install seaborn
# !pip install scikit-plot
import seaborn as sns
from PIL import Image
import sys

from keras.utils.np_utils import to_categorical
from sklearn.model_selection import train_test_split
from scipy import stats
from sklearn.preprocessing import LabelEncoder

import tensorflow as tf
from tensorflow.keras import layers, models, optimizers
from tensorflow.keras.applications.densenet import DenseNet201
from tensorflow.keras.layers import Dense, BatchNormalization, Dropout, Activation, GlobalAveragePooling2D, Input, Conv2D, MaxPooling2D, Flatten
from tensorflow.keras.models import Sequential, load_model, Model
from tensorflow.keras import regularizers
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.callbacks import ModelCheckpoint, ReduceLROnPlateau, EarlyStopping, LearningRateScheduler
from tensorflow.keras.preprocessing import image
from tensorflow.keras.layers.experimental import preprocessing
import tensorflow.keras.backend as K
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import MobileNet, EfficientNetB0, EfficientNetB4, InceptionV3, VGG16, ResNet50, Xception, InceptionResNetV2

import datetime
total_start = datetime.datetime.now()


try:
    tpu = None
    tpu = tf.distribute.cluster_resolver.TPUClusterResolver()
    print('Device:', tpu.master())
    tf.config.experimental_connect_to_cluster(tpu)
    tf.tpu.experimental.initialize_tpu_system(tpu)

    strategy = tf.distribute.TPUStrategy(tpu)
except:
    strategy = tf.distribute.get_strategy()
print('Number of replicas:', strategy.num_replicas_in_sync)
    
print("Tensorflow version ", tf.__version__)

# **Setting Parameters**

In [None]:
np.random.seed(42)

RESIZED_TRAIN_DATA_PATH = '/kaggle/input/isic2018-224x224/resized-dataset/'
csv_path = '/kaggle/input/isic2018-task3-gt/ISIC2018_Task3_Training_GroundTruth.csv'
densenet_weights_path = '../../densenet201_weights_tf_dim_ordering_tf_kernels_notop.h5'

classes = ['MEL', 'NV', 'BCC', 'AKIEC', 'BKL', 'DF', 'VASC']
size = 224

BATCH_SIZE = 64

CLASSES = ['AKIEC', 'BCC', 'BKL', 'DF', 'MEL', 'NV', 'VASC']
NUM_CLASSES = len(CLASSES)
IMAGE_SIZE = [224, 224] 
input_shape =  (224, 224, 3) 

METRICS = ['accuracy']
LOSS = ['categorical_crossentropy']

Epochs = 150 
Early_Stop = 15 
OPTIMIZER = tf.keras.optimizers.legacy.Adam(learning_rate = 1e-3, decay = 1e-6)

# Define the learning rate schedule function
lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay(
    initial_learning_rate=1e-3,
    decay_steps=10000,
    decay_rate=1e-7
)

#Define the function that returns the learning rate for the current epoch
def lr_callback(epoch):
    return tf.keras.backend.get_value(lr_schedule(epoch))

# Define the learning rate scheduler callback
lr_scheduler = LearningRateScheduler(lr_callback)

fold = 0
# Define the name for the checkpoint file
checkpoint_name = f'fold_{fold}_{{epoch:02d}}_{{val_accuracy:.4f}}.tf'
checkpoint_dir = '../../2018/balanced-approach/checkpoints'
# Define the path for the checkpoint file
checkpoint_path = os.path.join(checkpoint_dir, checkpoint_name)

Callbacks = [
    ReduceLROnPlateau(monitor='val_loss', factor=0.6, patience=5, verbose=2, mode='min', min_delta=0.0001, cooldown=1, min_lr=1e-6),
    ModelCheckpoint(checkpoint_path, monitor='val_accuracy', verbose=2, save_best_only=True, save_weights_only=False, mode='max'),
    EarlyStopping(monitor='val_accuracy', patience=Early_Stop, mode='auto', min_delta=0.00001, verbose=2, restore_best_weights=True),
    lr_scheduler
    ]


# **Read and Process CSV file**

In [None]:

data =pd.read_csv(csv_path)
print(data.head())


# Convert the one-hot encoded labels to categorical labels
data['target'] = data[classes].idxmax(axis=1)
print(data.head())

le = LabelEncoder()
le.fit(data['target'])
LabelEncoder()
print(list(le.classes_))

data['label'] = le.transform(data['target'])
print(data.sample(10))

# Data Distrubution Visualization
plt.figure(figsize=(8, 5))
data['target'].value_counts().plot(kind='bar')
plt.ylabel('count')
plt.title('lesion type')

plt.tight_layout()
plt.show()

# Distribution of data into various classes
from sklearn.utils import resample
print(data['target'].value_counts())


# **Balance the Dataset**

In [None]:

n_samples = 1150 #2000 #1150

cls_0 = data[data['label'] == 0]
cls_1 = data[data['label'] == 1]
cls_2 = data[data['label'] == 2]
cls_3 = data[data['label'] == 3]
cls_4 = data[data['label'] == 4]
cls_5 = data[data['label'] == 5]
cls_6 = data[data['label'] == 6]

cls_0_balanced = resample(cls_0, replace=True, n_samples=n_samples, random_state=42)
cls_1_balanced = resample(cls_1, replace=True, n_samples=n_samples, random_state=42)
cls_2_balanced = resample(cls_2, replace=True, n_samples=n_samples, random_state=42)
cls_3_balanced = resample(cls_3, replace=True, n_samples=n_samples, random_state=42)
cls_4_balanced = resample(cls_4, replace=True, n_samples=n_samples, random_state=42)
cls_5_balanced = resample(cls_5, replace=True, n_samples=n_samples, random_state=42)
cls_6_balanced = resample(cls_6, replace=True, n_samples=n_samples, random_state=42)

# Combined back to a single dataframe
data_balanced = pd.concat([cls_0_balanced, cls_1_balanced, cls_2_balanced, cls_3_balanced, cls_4_balanced, cls_5_balanced, cls_6_balanced])

# Check the distrubution. All the classes should be balanced now
print(data_balanced['label'].value_counts())


# Define the path as a new column
data_balanced['path'] = RESIZED_TRAIN_DATA_PATH  + data_balanced['image'] + '.jpg'
data_balanced['image_id'] = data_balanced['image'] + '.jpg'

print(data_balanced.head())


In [None]:
# Check the distrubution. All the classes should be balanced now
print(data_balanced['label'].value_counts())


# Define the path as a new column
data_balanced['path'] = RESIZED_TRAIN_DATA_PATH  + data_balanced['image'] + '.jpg'
data_balanced['image_id'] = data_balanced['image'] + '.jpg'

print(data_balanced.head())


# **Read images and resize(if needed)**

In [None]:
from keras.preprocessing.image import ImageDataGenerator
from sklearn.model_selection import StratifiedKFold
from keras.preprocessing.image import Iterator
import numpy as np

datagen = ImageDataGenerator(
    rescale=1./255,  
    rotation_range=20,  
    width_shift_range=0.2,  
    height_shift_range=0.2, 
    shear_range=0.2, 
    zoom_range=0.2,  
    horizontal_flip=True, 
    vertical_flip=True, 
    fill_mode='nearest' 
)

n_splits = 5 # number of folds
kf = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=42)

# **Define a Model**

In [None]:

# Define the CNN model
def create_model(input_shape):

        model = None
        base_model = DenseNet201(include_top=False, input_tensor=Input(shape=input_shape), weights="imagenet", pooling ='avg')       
        
        for layer in base_model.layers:
            layer.trainable=False

        for layer in base_model.layers[-200:]:
            layer.trainable=True


        x = BatchNormalization(axis = -1, name="Batch-Normalization-1")(base_model.output)
        x1 = Dense(NUM_CLASSES * 3, activation="relu", name="intra-dense-layers")(x)

        outputs = Dense(NUM_CLASSES, activation="softmax", name="Classifier")(x1)
        model = tf.keras.Model(inputs=base_model.input, outputs=outputs)
        model.compile(optimizer = OPTIMIZER, loss = LOSS, metrics = METRICS)

        return model


model = create_model(input_shape)
model.summary()

# **Plotting the learning curves**

In [None]:

def Plot_Learning_Curves(history):
    acc = history.history['accuracy']
    val_acc = history.history['val_accuracy']

    loss = history.history['loss']
    val_loss = history.history['val_loss']

    plt.rcParams['figure.figsize'] = (14, 5)

    plt.subplot(1,2,1)
    plt.plot(loss, label='Training loss')
    plt.plot(val_loss, linestyle="--", label='Validation loss')
    plt.title('Training and validation loss')
    plt.ylabel('Loss')
    plt.xlabel('Epoch')
    plt.legend()

    plt.subplot(1,2,2)
    plt.plot(acc, label='Training Accuracy')
    plt.plot(val_acc, linestyle="--", label='Validation Accuracy')
    plt.title('Training and validation accuracy')
    plt.ylabel('Accuracy')
    plt.xlabel('Epoch')
    plt.legend()

    plt.show()


# **Train the Model**

In [None]:
from sklearn.metrics import precision_score, recall_score, confusion_matrix, roc_curve, auc
import scikitplot as skplt

scores, losses = [], []
y_pred_all = []
y_true_all = []
precision_scores = []
sensitivity_scores = []
specificity_scores = []

for fold, (train_index, val_index) in enumerate(kf.split(data_balanced['image_id'], data_balanced['target'])):

        train_df = data_balanced.iloc[train_index]
        val_df = data_balanced.iloc[val_index]
        print("lenght of train_df: ", len(train_df), " lenght of val_df: ", len(val_df))

        train_generator = datagen.flow_from_dataframe(
            dataframe=train_df,
            directory=RESIZED_TRAIN_DATA_PATH,
            x_col='image_id',
            y_col='target',
            target_size=(size, size),
            batch_size=BATCH_SIZE,
            class_mode='categorical',
        )

        val_generator = datagen.flow_from_dataframe(
            dataframe=val_df,
            directory=RESIZED_TRAIN_DATA_PATH,
            x_col='image_id',
            y_col='target',
            target_size=(size, size),
            batch_size=BATCH_SIZE,
            class_mode='categorical',
        )

        print(f"Fold {fold}: Train indices: {train_index}, Test indices: {val_index}")

        # build and compile the model
        model = create_model(input_shape)
        model.compile(optimizer=OPTIMIZER,
                      loss=LOSS,   #focal_loss(gamma=2., alpha=.25),
                      metrics=METRICS)

        # Train the model
        history = model.fit(train_generator, epochs=Epochs, batch_size=BATCH_SIZE, validation_data=val_generator, callbacks=Callbacks)
         
         # Evaluate the model on the test data
        test_loss, test_acc = model.evaluate(val_generator, verbose=1)

        scores.append(test_acc)
        losses.append(test_loss)

        print("Fold # ", fold, " Test accuracy:", test_acc, " Test Loss: ", test_loss)
        Plot_Learning_Curves(history)
        model.reset_states()

        # Get the true labels and predicted labels for the validation set
        y_pred = model.predict(val_generator)
        y_pred = np.argmax(y_pred, axis=1)
        y_true = val_generator.classes

        # Append the predicted labels and true labels
        y_pred_all.extend(y_pred)
        y_true_all.extend(y_true)


mean_accuracy = np.mean(scores)
print(f'Mean accuracy: {mean_accuracy}')
print(f'Losses during the Folds: ', losses)


# Calculate precision
precision = precision_score(y_true_all, y_pred_all, average='weighted')

# Calculate recall
recall = recall_score(y_true_all, y_pred_all, average='weighted')

# Calculate confusion matrix
confusion_mat = confusion_matrix(y_true_all, y_pred_all)

# Calculate ROC curve and AUC for each class (assuming one-hot encoding for y_pred_all and y_true_all)
fpr = dict()
tpr = dict()
roc_auc = dict()
for i in range(NUM_CLASSES):
    fpr[i], tpr[i], _ = roc_curve(y_true_all[:, i], y_pred_all[:, i])
    roc_auc[i] = auc(fpr[i], tpr[i])
    
    skplt.metrics.plot_confusion_matrix(y_true_all[:, i], y_pred_all[:, i], normalize=True)
    plt.show()

# Print the results
print("Precision:", precision)
print("Recall:", recall)
print("Confusion Matrix:")
print(confusion_mat)
print("ROC AUC:")
for i in range(NUM_CLASSES):
    print(f"Class {i}: {roc_auc[i]}")
    


In [None]:
total_end = datetime.datetime.now()
elapsed = total_end - total_start

print ('Total time elapsed: ', elapsed)