<a href="https://colab.research.google.com/github/rahatarinasir/HAM10000/blob/main/Transfer_learning_implementation_code_.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Initialization 

In [None]:
import keras
import tensorflow as tf
from tensorflow.keras.applications.resnet import ResNet50, ResNet101, ResNet152, preprocess_input
import random 
import numpy as np
import pandas as pd
import cv2

# data_path = 'https://dataverse.harvard.edu/dataset.xhtml?persistentId=doi:10.7910/DVN/DBW86T'

## Meta data loading 


In [None]:
path_to_metadata = 'G:/Data/HAM10000_metadata.csv'
metadata =pd.read_csv(path_to_metadata)

## Computation of class weights

In [None]:
# class_weights = {0:1, 1:1, 2:1, 3:10, 4:1, 5:20, 6:20}

label = ['mel', 'nv', 'bcc',  'akiec', 'bkl', 'df', 'vasc']

def estimate_class_weights(label, method = 'mfb'):
    class_weights = np.zeros_like(label, dtype = np.float32)
    counts = np.zeros_like(label)
    for i,l in enumerate(label):
        counts[i] = metadata[metadata['dx']==str(l)]['dx'].value_counts()[0]
    counts = counts.astype(np.float32)
    median_freq = np.median(counts)
    mode_freq = np.max(counts)

    func = lambda x:median_freq / x if method == 'mfb' else mode_freq/x
    class_weights = list(map(func, counts))
    weights = {i:class_weights[i] for i in range(7)}
    
    return weights


class_weights = estimate_class_weights(label, method = 'mfb')
class_weights

## Sequence generator

In [None]:
class DataGenerator(tf.keras.utils.Sequence):

    def __init__(self,
                 batch_size = 5,
                 dim = (224, 224),
                 n_channels = 3,
                 n_classes = 7,
                 shuffle = True,
                 images_address = 'G:\Data\ISIC2018_Task3_Training_Input\\',
                 label_address = 'G:\Data\ISIC2018_Task3_Training_GroundTruth\ISIC2018_Task3_Training_GroundTruth.csv'
                 ):

        self.dim = dim
        self.batch_size = batch_size
        self.n_channels = n_channels
        self.n_classes = n_classes
        self.shuffle = shuffle
        self.images_address = images_address
        self.labels = pd.read_csv(label_address)
        self.image_name = self.labels['image'].values
        self.labels.drop(columns=['image'], inplace=True)
        self.on_epoch_end()

    def __len__(self):

        'Denotes the number of batches per epoch'
        return int(np.floor(len(self.image_name) / self.batch_size))

    def __getitem__(self, index):

        # Generate indexes of the batch
        indexes = self.indexes[index * self.batch_size:(index + 1) * self.batch_size]

        # Find a list of labels
        labels_index_temp = [self.image_name[k] for k in indexes]

        # Generate data
        X, y = self.__data_generation(labels_index_temp)

        return X, y

    def on_epoch_end(self):

        'Updates indexes after each epoch'
        self.indexes = np.arange(len(self.image_name))
        if self.shuffle:
            np.random.shuffle(self.indexes)


    def __data_generation(self, labels_index_temp):

        # Generates data containing batch_size samples'

        input_data = np.empty((self.batch_size, *self.dim, self.n_channels))
        label = np.empty((self.batch_size, self.n_classes), dtype=np.float32)

        for index, item in enumerate(labels_index_temp):

            img = cv2.imread(self.images_address + item + '.jpg')
            img = cv2.resize(img, self.dim, cv2.INTER_CUBIC)
            input_data[index,] = tf.keras.applications.resnet50.preprocess_input(img)
            label[index] = self.labels.values[np.where(self.image_name == item)]


        return input_data.astype(np.float32), label


## Data loading 

In [None]:
img_path_tr = 'G:\Data\ISIC2018_Task3_Training_Input\\'
label_path_tr = 'G:\Data\ISIC2018_Task3_Training_GroundTruth\ISIC2018_Task3_Training_GroundTruth.csv'
img_path_val = 'G:\Data\ISIC2018_Task3_Validation_Input\\'
label_path_val = 'G:\Data\ISIC2018_Task3_Validation_GroundTruth\ISIC2018_Task3_Validation_GroundTruth.csv'
 
batch_size = 5
train_generator = DataGenerator(batch_size = batch_size, 
                               images_address = img_path_tr, 
                               label_address = label_path_tr)

validation_generator = DataGenerator(batch_size = batch_size, 
                               images_address = img_path_val, 
                               label_address = label_path_val)


## Transfer learning - Feature represenation of ResNet50 with a customized learning) 

In [None]:
feature_maps = ResNet50(weights = 'imagenet', include_top = False, input_shape = (224, 224, 3)) 
model = keras.models.Sequential()
feature_maps.trainable = False
model.add(feature_maps)
# model.add(keras.layers.MaxPool2D((7, 7)))
model.add(keras.layers.Flatten())
model.add(keras.layers.Dense(7, activation = 'softmax'))
model.summary()

In [None]:
# model.load_weights('cancer_model_primary_frozen_layers_weighting.hdf5')

#### Another way of construction

In [None]:
# target_model = feature_maps.output
# target_model = keras.layers.AveragePooling2D(pool_size = (7, 7))(target_model)
# target_model = keras.layers.Flatten()(target_model)
# target_model = keras.layers.Dense(7, activation = 'softmax')(target_model)
# model = keras.models.Model(inputs = feature_maps.input, outputs = target_model)

## Model configuration

In [None]:
# class_weights = {0:1, 1:1, 2:1, 3:10, 4:1, 5:20, 6:20}

model.compile(optimizer = tf.keras.optimizers.Adam(learning_rate=1e-3), 
              loss = 'categorical_crossentropy', 
              metrics =['categorical_crossentropy', 'accuracy'])

model.fit(train_generator, 
          validation_data = validation_generator, 
          epochs = 10,  
          shuffle = True, 
          class_weight=class_weights)

In [None]:
# model.save('model.hdf5')

In [None]:
validation_generator = DataGenerator(batch_size = 1, 
                               images_address = img_path_val, 
                               label_address = label_path_val)


y_tr_pre = []
y_tr_te = []
for x, y in validation_generator:
    y_pre_te = model.predict(x)
    y_tr_pre.append(np.argmax(y_pre_te[0], axis = 0))
    y_tr_te.append(np.argmax(y[0], axis = 0))


In [None]:
from sklearn.metrics import confusion_matrix, classification_report
print(confusion_matrix(y_tr_te, y_tr_pre))

In [None]:
print(classification_report(y_tr_te, y_tr_pre))

In [None]:
train_generator = DataGenerator(batch_size = 1, 
                               images_address = img_path_tr, 
                               label_address = label_path_tr)

y_tr_pre = []
y_tr_te = []
for x, y in train_generator:
    y_pre_te = model.predict(x)
    y_tr_pre.append(np.argmax(y_pre_te[0], axis = 0))
    y_tr_te.append(np.argmax(y[0], axis = 0))

In [None]:
print(confusion_matrix(y_tr_te, y_tr_pre))

In [None]:
print(classification_report(y_tr_te, y_tr_pre))