<a href="https://colab.research.google.com/github/mahenoor22/skin-disease-build/blob/main/model_build/HAM28_Final.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import PIL as pil
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.utils import to_categorical
from sklearn.model_selection import train_test_split
import seaborn as sns
 
!chmod 600 /content/kaggle.json

# **Visualising Dataset**

In [None]:
os.environ['KAGGLE_CONFIG_DIR']='/content'
!kaggle datasets download -d kmader/skin-cancer-mnist-ham10000
!unzip \*.zip && rm *.zip
meta=pd.read_csv('/content/HAM10000_metadata.csv')
meta.info()

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
  inflating: ham10000_images_part_1/ISIC_0027606.jpg  
  inflating: ham10000_images_part_1/ISIC_0027607.jpg  
  inflating: ham10000_images_part_1/ISIC_0027608.jpg  
  inflating: ham10000_images_part_1/ISIC_0027609.jpg  
  inflating: ham10000_images_part_1/ISIC_0027610.jpg  
  inflating: ham10000_images_part_1/ISIC_0027611.jpg  
  inflating: ham10000_images_part_1/ISIC_0027612.jpg  
  inflating: ham10000_images_part_1/ISIC_0027613.jpg  
  inflating: ham10000_images_part_1/ISIC_0027614.jpg  
  inflating: ham10000_images_part_1/ISIC_0027615.jpg  
  inflating: ham10000_images_part_1/ISIC_0027616.jpg  
  inflating: ham10000_images_part_1/ISIC_0027617.jpg  
  inflating: ham10000_images_part_1/ISIC_0027618.jpg  
  inflating: ham10000_images_part_1/ISIC_0027619.jpg  
  inflating: ham10000_images_part_1/ISIC_0027620.jpg  
  inflating: ham10000_images_part_1/ISIC_0027621.jpg  
  inflating: ham10000_images_part_1/ISIC_0027622.jpg  


In [None]:
meta.head()

In [None]:
g = sns.catplot(x="dx", kind="count", palette='bright', data=meta)
g.fig.set_size_inches(16, 5)

g.ax.set_title('Skin Cancer by Class', fontsize=20)
g.set_xlabels('Skin Cancer Class', fontsize=14)
g.set_ylabels('Number of Data Points', fontsize=14)

This graph shows the dataset has a major problem of class imbalance.

In [None]:
g = sns.catplot(x="dx", kind="count", hue="sex", palette='coolwarm', data=meta)
g.fig.set_size_inches(16, 5)

g.ax.set_title('Skin Cancer by Sex', fontsize=20)
g.set_xlabels('Skin Cancer Class', fontsize=14)
g.set_ylabels('Number of Data Points', fontsize=14)
g._legend.set_title('Sex')

In [None]:
g = sns.catplot(x="dx", kind="count", hue="age", palette='bright', data=meta)
g.fig.set_size_inches(16, 9)

g.ax.set_title('Skin Cancer by Age', fontsize=20)
g.set_xlabels('Skin Cancer Class', fontsize=14)
g.set_ylabels('Number of Data Points', fontsize=14)
g._legend.set_title('Age')

# **Extracting x and y from csv file**

In [None]:
df=pd.read_csv('/content/hmnist_28_28_RGB.csv')
x=df.drop('label',axis=1)
y=df['label']
x=x.to_numpy()
x=x/255
y=to_categorical(y)

In [None]:
df['label'].value_counts()

In [None]:
label={
    ' Actinic keratoses':0,
    'Basal cell carcinoma':1,
    'Benign keratosis-like lesions':2,
    'Dermatofibroma':3,
    'Melanocytic nevi':4,
    'Melanoma':5,
    'Vascular lesions':6
}

In [None]:
x=x.reshape(-1,28,28,3)

# **Spliiting into train test**

In [None]:
xtrain,xtest,ytrain,ytest = train_test_split(x,y,test_size=0.1,random_state=13,stratify=df['label'])

# **Image augmentation**

In [None]:
datagen=ImageDataGenerator(rotation_range=20, # rotate the image 20 degrees
                               width_shift_range=0.10, # Shift the pic width by a max of 5%
                               height_shift_range=0.10, # Shift the pic height by a max of 5%
                               rescale=1/255, # Rescale the image by normalzing it.
                               shear_range=0.1, # Shear means cutting away part of the image (max 10%)
                               zoom_range=0.1, # Zoom in by 10% max
                               horizontal_flip=True,
                               vertical_flip=True,
                               fill_mode='nearest')

In [None]:
datagen.fit(xtrain)

# **Model**

In [None]:
from tensorflow.keras.metrics import categorical_accuracy, top_k_categorical_accuracy
def accuracy(y_true, y_pred):
    return top_k_categorical_accuracy(y_true, y_pred, k=2)

In [None]:
from tensorflow.keras.layers import Flatten,Dense,Dropout,BatchNormalization,Conv2D,MaxPooling2D
from tensorflow.keras.models import Sequential
from tensorflow.keras.metrics import Recall
from tensorflow.keras.optimizers import RMSprop

model=Sequential()

model.add(Conv2D(64,(2,2),input_shape=(28,28,3),activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(BatchNormalization())

model.add(Conv2D(512,(2,2),input_shape=(28,28,3),activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(BatchNormalization())

model.add(Dropout(0.3))

model.add(Conv2D(1024,(2,2),input_shape=(28,28,3),activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(BatchNormalization())

model.add(Dropout(0.3))

model.add(Conv2D(1024,(1,1),input_shape=(28,28,3),activation='relu'))
model.add(MaxPooling2D(pool_size=(1, 1)))
model.add(BatchNormalization())

model.add(Dropout(0.3))

model.add(Flatten())

model.add(Dense(256,activation='relu'))
model.add(Dropout(0.5))


model.add(Dense(7,activation='softmax'))

model.compile(optimizer='adam',loss='categorical_crossentropy',metrics=[accuracy])

model.summary()

In [None]:
from tensorflow.keras.callbacks import EarlyStopping,ReduceLROnPlateau

early=EarlyStopping(monitor='accuracy',patience=3)
reduce_lr = ReduceLROnPlateau(monitor='accuracy', factor=0.5, patience=2, verbose=1, mode='min', min_lr=0.0001)

# **Training**

In [None]:
from tensorflow.keras.metrics import categorical_accuracy, top_k_categorical_accuracy
def accuracy(y_true, y_pred):
    return top_k_categorical_accuracy(y_true, y_pred, k=2)

In [None]:
class_weights={0:1,1:1,2:1,3:1,4:0.5,5:1,6:1}

In [None]:
model.fit(xtrain,ytrain,epochs=30,validation_data=(xtest,ytest),callbacks=[reduce_lr,early],class_weight=class_weights)

# **Evaluation**


In [None]:
plt.figure(figsize=(15,10))
loss=pd.DataFrame(model.history.history)
loss[['accuracy','val_accuracy']].plot()

In [None]:
plt.figure(figsize=(15,10))
loss[['loss','val_loss']].plot()

In [None]:
decode={
    0:'Actinic keratosis',
    1:'Basal cell carcinoma',
    2:'Benign keratosis-like lesions',
    3:'Dermatofibroma',
    4:'Melanocytic nevi',
    5:'Melanoma',
    6:'Vascular lesion'  
}

In [None]:
plt.figure(figsize=(10,8))

pred=model.predict(xtest)

from sklearn.metrics import roc_curve,auc
fpr = dict()
tpr = dict()
roc_auc = dict()
for i in range(7):
    fpr[i], tpr[i], _ = roc_curve(ytest[:, i], pred[:, i])
    roc_auc[i] = auc(fpr[i], tpr[i])

for i in range(7):
    plt.plot(fpr[i],tpr[i],label=decode[i],linewidth=2)
plt.plot([0, 1], [0, 1], 'k--', lw=2,label='random guess')
plt.legend(loc="lower right")

In [None]:
from sklearn.metrics import classification_report,confusion_matrix

predictions=model.predict_classes(xtest)

check=[]
for i in range(len(ytest)):
  for j in range(7):
    if(ytest[i][j]==1):
      check.append(j)
check=np.asarray(check)
print(classification_report(check,predictions))

In [None]:
from tensorflow.keras.applications.resnet50 import preprocess_input, decode_predictions
from tensorflow.keras.preprocessing import image
from PIL import Image
img = Image.open('/content/_ml.jpg')
img = img.resize((28,28))
img = np.array(img)
img = img / 255.0
img = img.reshape(1,28,28,3)

preds = model.predict(img)
print(preds)
preds=np.around(preds, decimals = 3) 
max_value=max(preds[0])
print(max_value)
def index(array, item):
    for idx, val in np.ndenumerate(array):
        if val == item:
            return idx
result=index(preds,max_value)[1]
print(decode[result])


In [None]:
if os.path.isfile('/content/model_build/Final_model.h5') is False:
  model.save('model_build/Final_model.h5')