# **IMPORTING REQUIRED LIBRARIES**





In [3]:
#pip install keras==2.3.0

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

#from google.colab import files
import os
import zipfile

from glob import glob
from PIL import Image as pil_image
from matplotlib.pyplot import imshow, imsave
from IPython.display import Image as Image

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

from keras.utils.np_utils import to_categorical
import keras
from keras.models import Model, Sequential
from keras.layers import Dense, Dropout, Flatten, Input, AveragePooling2D, merge, Activation
from keras.layers import Conv2D, MaxPooling2D, BatchNormalization
from keras.layers import Concatenate, GlobalAveragePooling2D
from keras.optimizers import Adam, SGD
from keras import regularizers, initializers
from keras.layers.advanced_activations import LeakyReLU, ReLU, Softmax
from keras.layers import Reshape, Activation, Conv2D, Input, MaxPooling2D, BatchNormalization, Flatten, Dense, Lambda
from keras.layers.merge import concatenate
from keras.preprocessing.image import ImageDataGenerator
from keras.callbacks import *
from keras.utils import plot_model
from keras.models import load_model

ModuleNotFoundError: No module named 'keras'

# **DATA ANALYSIS**

In [None]:
# LOADING THE DATASET CSV IN PANDAS DATAFRAME
main_df = pd.read_csv('HAM10000_metadata.csv')
main_df.head()

In [None]:
sns.countplot(main_df['dx'])

In [None]:
main_df['dx'].value_counts()

In [None]:
plt.figure(figsize=(10,8))
sns.countplot(main_df['localization'])
plt.xticks(rotation=90)

In [None]:
sns.countplot(main_df['dx_type'])

In [None]:
main_df['dx_type'].value_counts()

In [None]:
# FINDING THE NUMBER OF MISSING VALUES 
main_df.isna().sum().sort_values(ascending=False)

In [None]:
print(main_df['age'].mean())
print(main_df['age'].median())
main_df['age'].fillna(main_df['age'].mean(),inplace=True)

In [None]:
sns.kdeplot(main_df['age'],shade=True)

In [None]:
main_df['sex'].value_counts()

In [None]:
# CREATING A DICTIONARY TO FEED IN LABEL VALUES IN OUR DATAFRAME
lesion_type_dict = {
    'nv': 'Melanocytic nevi',
    'mel': 'Melanoma',
    'bkl': 'Benign keratosis-like lesions ',
    'bcc': 'Basal cell carcinoma',
    'akiec': 'Actinic keratoses',
    'vasc': 'Vascular lesions',
    'df': 'Dermatofibroma'
}

lesion_classes_dict = {
    0: 'Melanocytic nevi',
    1: 'Melanoma',
    2: 'Benign keratosis-like lesions ',
    3: 'Basal cell carcinoma',
    4: 'Actinic keratoses',
    5: 'Vascular lesions',
    6: 'Dermatofibroma'
}

main_df['cell_type'] = main_df['dx'].map(lesion_type_dict)

In [None]:
  main_df['cell_type_idx'] = pd.Categorical(main_df['cell_type']).codes

In [None]:
# CREATING IMAGE PATH
image_path = {os.path.splitext(os.path.basename(x))[0]: x for x in glob(os.path.join('', '*', '*.jpg'))}
print(image_path)

In [None]:
main_df['path'] = main_df['image_id'].map(image_path.get)
main_df.head()

In [None]:
image_example = np.asarray(pil_image.open(main_df['path'][0]))

In [None]:
image_example.shape

In [None]:
# PRINTING A SAMPLE IMAGE
plt.imshow(image_example)

In [None]:
# RESIZING THE IMAGE TO SMALLER SIZE TO MAKE TRAINING EASY
main_df['image'] = main_df['path'].map(lambda x: np.asarray(pil_image.open(x).resize((120,90))))

In [None]:
main_df.head()

In [None]:
plt.imshow(main_df['image'][0])

In [None]:
main_df['image'][0].shape

# **PRINTING IMAGES OF EACH CLASS**

In [None]:

fig,axes = plt.subplots(7,5,figsize=(20,21))
for nth_axis,(cell_type_name,cell_type_row) in zip(axes,main_df.sort_values(['cell_type']).groupby('cell_type')):
    nth_axis[0].set_title(cell_type_name)
    for column_axis,(_,column_row) in zip(nth_axis,cell_type_row.sample(5).iterrows()):
        column_axis.imshow(column_row['image'])  
        column_axis.axis('off')

# **DATA PROCESSING AND MODELING**

In [None]:
features = main_df.drop(['cell_type_idx'],axis=1)

target = main_df['cell_type_idx']

# SPLITTING THE DATASETS INTO TRAIN AND TEST SET
X_TRAIN, X_TEST, Y_TRAIN, Y_TEST = train_test_split(features,target,test_size=0.01)

In [None]:
# CREATING IMAGE TRAINING AND TEST SET
x_train = np.asarray(X_TRAIN['image'].tolist())
x_test = np.asarray(X_TEST['image'].tolist())

In [None]:
print(x_train.shape)
print(x_test.shape)

In [None]:
train_mean = x_train.mean()
train_std = x_train.std()
test_mean = x_test.mean()
test_std = x_test.std()

In [None]:
print(train_mean)
print(train_std)
print(test_mean)
print(test_std)

# **IMAGE STANDARDIZATION**

In [None]:
x_train = (x_train-train_mean) / train_std
x_test = (x_test-test_mean) / test_std

In [None]:
print(Y_TRAIN.shape)
print(Y_TEST.shape)

In [None]:
# CONVERTING NX1 TO NX7 SINCE WE HAVE 7 CLASSES 
y_train = to_categorical(Y_TRAIN,num_classes=7)
y_test = to_categorical(Y_TEST,num_classes=7)

In [None]:
print(y_train.shape)
print(y_test.shape)

In [None]:
X_train,X_val, Y_train,Y_val = train_test_split(x_train,y_train,test_size=0.15)

In [None]:
# RESHAPING SO THAT WE CAN FEED THIS INTO OUR NEURAL NETWORK
X_train  = X_train.reshape(X_train.shape[0],90,120,3)
x_test  = x_test.reshape(x_test.shape[0],90,120,3)
X_val  = X_val.reshape(X_val.shape[0],90,120,3)

In [None]:
print(X_train.shape)
print(x_test.shape)
print(X_val.shape)

# **MODEL**

In [None]:
# COMMAND TO INSTALL KERAS OPTIMIZERS
#pip install -q efficientnet
#git clone https://github.com/bckenstler/CLR.git
#pip install -q keras-radam
#git clone https://github.com/bojone/accum_optimizer_for_keras.git
#pip install -q keras-lr-multiplier

In [None]:
!pip install efficientnet

In [None]:
# IMPORTING THE EFFICIENTNETB3 MODEL
import tensorflow as tf
import keras
import efficientnet.tfkeras as efn

In [None]:
# INITIALIZING THE MODEL
model = efn.EfficientNetB3(weights ='noisy-student', include_top=False,
                           input_shape = (90, 120, 3))

In [None]:
# IMPORTING LIBRARIES
from tensorflow.keras.layers import GlobalAveragePooling2D,Dropout,Dense
from tensorflow.keras.callbacks import ModelCheckpoint,ReduceLROnPlateau
from tensorflow.keras.optimizers import Adam
import tensorflow.keras.backend as K
from tensorflow.keras.models import Sequential,Model

In [None]:
# MAKING CHANGES IN THE OUTPUT LAYER OF EFFICIENTNETB3 MODEL
x = model.output
x = GlobalAveragePooling2D()(x)
x = Dropout(0.3)(x)
x = Dense(128, activation="relu")(x)
x = Dropout(0.3)(x)
x = Dense(64, activation="relu")(x)
predictions = Dense(7, activation="softmax")(x)
model = Model(inputs=model.input, outputs=predictions)

In [None]:
# COMPILING THE MODEL
model.compile(optimizer = Adam(0.001) , loss = "categorical_crossentropy", metrics=['accuracy'])

In [None]:
# SET A LEARNING RATE ANNEALER
learning_rate_reduction = ReduceLROnPlateau(monitor='val_loss',
                                            patience=3, 
                                            factor=0.5, 
                                            min_lr=0.000001)

# **TRAINING THE DATA**

In [None]:
# WITH DATA AUGMENTATION TO PREVENT OVERFITTING

datagen = ImageDataGenerator(
        rotation_range=10,  # RANDOMLY ROTATE IMAGES IN THE RANGE (0 - 180)
        zoom_range = 0.1 # RANDOMLY ZOOM IMAGES 
)


# FIT THE MODEL
epochs = 20
batch_size = 16
model.fit_generator(datagen.flow(X_train,Y_train, batch_size=batch_size),
                              epochs = epochs, validation_data = (X_val,Y_val),
                              verbose = 1, steps_per_epoch=X_train.shape[0] // batch_size
                              , callbacks=[learning_rate_reduction])

# **PLOTTING GRAPH**

In [None]:
def plot_(history):
    acc = history.history['acc']
    val_acc = history.history['val_acc']

    loss = history.history['loss']
    val_loss = history.history['val_loss']

    f, [ax1, ax2] = plt.subplots(1,2, figsize=(15, 5))
    ax1.plot(range(len(acc)), acc, label="accuracy")
    ax1.plot(range(len(acc)), val_acc, label="val_accuracy")
    ax1.set_title("Training Accuracy vs Validation Accuracy")
    ax1.legend()

    ax2.plot(range(len(loss)), loss, label="loss")
    ax2.plot(range(len(loss)), val_loss, label="val_loss")
    ax2.set_title("Training Loss vs Validation Loss")
    ax2.legend()
    
    
plot_(model.history)

In [None]:
# PRINTING THE ACCURACY OF THE MODEL
print("MAXIMUM ACCURACY : ",
      round(max(model.history.history["val_acc"])*100,4))

# **SAVING THE MODEL**

In [None]:
model.save("model.h5")

In [None]:
model = efn.EfficientNetB3(weights ='noisy-student', include_top=False,
                           input_shape = (90, 120, 3))
x = model.output
x = GlobalAveragePooling2D()(x)
x = Dropout(0.3)(x)
x = Dense(128, activation="relu")(x)
x = Dropout(0.3)(x)
x = Dense(64, activation="relu")(x)
predictions = Dense(7, activation="softmax")(x)
model = Model(inputs=model.input, outputs=predictions)
model.compile(optimizer = Adam(0.001) , loss = "categorical_crossentropy", metrics=['accuracy'])

model.save_weights("model.h5")

# **PREDICTION OF IMAGE**

In [None]:
# RESIZING THE IMAGE
resized_image = np.asarray(pil_image.open('C:\\Users\\shiva\\Desktop\\project\\SkinCancer\\Code\\HAM10000_images_part_2\\ISIC_0029310.jpg').resize((120,90)))
image_array = np.asarray(resized_image.tolist())
test_image = image_array.reshape(1,90,120,3)

In [None]:
prediction_class = model.predict(test_image)
prediction_class = nphttp://localhost:8888/notebooks/SKIN_CANCER_PREDICTION.ipynb#.argmax(prediction_class,axis=1)

In [None]:
prediction_class

In [None]:
# PREDICTING THE IMAGE
lesion_classes_dict[prediction_class[0]]