<a href="https://www.kaggle.com/code/realshaktigupta/skin-cancer-classification-100-accuracy?scriptVersionId=221535941" target="_blank"><img align="left" alt="Kaggle" title="Open in Kaggle" src="https://kaggle.com/static/images/open-in-kaggle.svg"></a>

In [1]:
import numpy as np
import pandas as pd
import os
import glob
import tensorflow as tf
from tensorflow import keras
import random

In [2]:
cancer=pd.read_csv("/kaggle/input/skin-cancer-mnist-ham10000/HAM10000_metadata.csv")

In [3]:
cancer.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10015 entries, 0 to 10014
Data columns (total 7 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   lesion_id     10015 non-null  object 
 1   image_id      10015 non-null  object 
 2   dx            10015 non-null  object 
 3   dx_type       10015 non-null  object 
 4   age           9958 non-null   float64
 5   sex           10015 non-null  object 
 6   localization  10015 non-null  object 
dtypes: float64(1), object(6)
memory usage: 547.8+ KB


In [4]:
cancer.fillna({'age': np.mean(cancer['age'])}, inplace=True)
image_paths=[]
for part in ("part_1","part_2"):
    image_paths+=glob.glob("/kaggle/input/skin-cancer-mnist-ham10000/HAM10000_images_"+part+"/*")
image_ids_n_paths = {os.path.splitext(os.path.basename(path))[0]:path for path in image_paths}
cancer['path']=cancer['image_id'].map(image_ids_n_paths)
labels=cancer['dx'].to_frame()
cancer=cancer.drop('dx',axis=1)
labels=pd.get_dummies(labels)

In [5]:
cancer.head()

Unnamed: 0,lesion_id,image_id,dx_type,age,sex,localization,path
0,HAM_0000118,ISIC_0027419,histo,80.0,male,scalp,/kaggle/input/skin-cancer-mnist-ham10000/HAM10...
1,HAM_0000118,ISIC_0025030,histo,80.0,male,scalp,/kaggle/input/skin-cancer-mnist-ham10000/HAM10...
2,HAM_0002730,ISIC_0026769,histo,80.0,male,scalp,/kaggle/input/skin-cancer-mnist-ham10000/HAM10...
3,HAM_0002730,ISIC_0025661,histo,80.0,male,scalp,/kaggle/input/skin-cancer-mnist-ham10000/HAM10...
4,HAM_0001466,ISIC_0031633,histo,75.0,male,ear,/kaggle/input/skin-cancer-mnist-ham10000/HAM10...


In [6]:
labels.head()

Unnamed: 0,dx_akiec,dx_bcc,dx_bkl,dx_df,dx_mel,dx_nv,dx_vasc
0,0,0,1,0,0,0,0
1,0,0,1,0,0,0,0
2,0,0,1,0,0,0,0
3,0,0,1,0,0,0,0
4,0,0,1,0,0,0,0


In [7]:
preliminary_data=list(zip(cancer['path'],labels.values.tolist()))

In [8]:
random.shuffle(preliminary_data)
paths,labels=zip(*preliminary_data)

In [9]:
data=tf.data.Dataset.from_tensor_slices((list(paths),list(labels)))

In [10]:
def final_data(path,label):
    image = tf.io.read_file(path)
    image = tf.image.decode_jpeg(image,channels=3)
    image = tf.image.resize(image,[90,120])
    image = image/255
    return image,label

In [11]:
data=data.map(final_data).prefetch(1)

In [12]:
train_size=round(0.8*10015)
val_size=round(0.1*10015)
test_size=10015-train_size-val_size

In [13]:
train=data.take(train_size)
val=data.skip(train_size).take(val_size)
test=data.skip(train_size).skip(val_size).take(test_size)

In [14]:
mirrored_strategy = tf.distribute.MirroredStrategy(devices=["/gpu:0","/gpu:1"])

In [15]:
# with mirrored_strategy.scope():
model=tf.keras.Sequential()
model.add(tf.keras.layers.Conv2D(30,(5,5),strides=(1,1),padding='valid',activation='relu',input_shape=(90,120,3)))
model.add(tf.keras.layers.Conv2D(30,(3,3),strides=(1,1),padding='valid',activation='relu'))
model.add(tf.keras.layers.BatchNormalization())
model.add(tf.keras.layers.MaxPool2D(pool_size=(2,2),strides=None,padding='valid'))
model.add(tf.keras.layers.Conv2D(20,(3,3),strides=(1,1),padding='valid',activation='relu'))
model.add(tf.keras.layers.Conv2D(20,(3,3),strides=(1,1),padding='valid',activation='relu'))
model.add(tf.keras.layers.BatchNormalization())
model.add(tf.keras.layers.MaxPool2D(pool_size=(2,2),strides=None,padding='valid'))
model.add(tf.keras.layers.Conv2D(15,(3,3),strides=(1,1),padding='valid',activation='relu'))
model.add(tf.keras.layers.Conv2D(15,(3,3),strides=(1,1),padding='valid',activation='relu'))
model.add(tf.keras.layers.GroupNormalization(groups=3))
model.add(tf.keras.layers.BatchNormalization())
model.add(tf.keras.layers.MaxPool2D(pool_size=(2,2),strides=None,padding='valid'))
model.add(tf.keras.layers.Conv2D(10,(3,3),strides=(1,1),padding='valid',activation='relu'))
model.add(tf.keras.layers.Conv2D(10,(3,3),strides=(1,1),padding='valid',activation='relu'))
model.add(tf.keras.layers.BatchNormalization())
model.add(tf.keras.layers.MaxPool2D(pool_size=(2,2),strides=None,padding='valid'))
model.add(tf.keras.layers.Flatten())
model.add(tf.keras.layers.BatchNormalization())
model.add(tf.keras.layers.Dense(256,activation='relu'))
model.add(tf.keras.layers.BatchNormalization())
model.add(tf.keras.layers.Dropout(0.1))
model.add(tf.keras.layers.Dense(128,activation='relu'))
model.add(tf.keras.layers.Dense(7,activation='softmax'))
model.compile(optimizer="Adam",loss='categorical_crossentropy',metrics=['accuracy'])

In [16]:
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 86, 116, 30)       2280      
                                                                 
 conv2d_1 (Conv2D)           (None, 84, 114, 30)       8130      
                                                                 
 batch_normalization (BatchN  (None, 84, 114, 30)      120       
 ormalization)                                                   
                                                                 
 max_pooling2d (MaxPooling2D  (None, 42, 57, 30)       0         
 )                                                               
                                                                 
 conv2d_2 (Conv2D)           (None, 40, 55, 20)        5420      
                                                                 
 conv2d_3 (Conv2D)           (None, 38, 53, 20)        3

In [17]:
# with mirrored_strategy.scope():
checkpoint= tf.keras.callbacks.ModelCheckpoint(filepath='/kaggle/working/skin_cancer_detection7.h5',save_weights_only=False,
                                               monitor='val_accuracy',save_best_only=True,save_freq="epoch",)
early_stopping= tf.keras.callbacks.EarlyStopping(monitor='val_loss',patience=10,restore_best_weights=True)
def lr_scheduler(epoch,lr,epochs=50):
    initial=1e-3
    if epoch<epochs*0.1:
        return initial
    elif epoch>epochs*0.1 and epoch<epochs*0.25:
        lr*=tf.math.exp(-0.1)
        return lr
    else:
        lr*=tf.math.exp(-0.008)
        return lr
lr_scheduling=tf.keras.callbacks.LearningRateScheduler(lr_scheduler)

In [18]:
# with mirrored_strategy.scope():
history = model.fit(train.batch(32),epochs=50,validation_data=val.batch(32),
                    callbacks=[checkpoint,early_stopping,lr_scheduling],shuffle=True)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50


In [19]:
model1=tf.keras.models.load_model("/kaggle/working/skin_cancer_detection7.h5")

In [20]:
predictions=model1.predict(test.batch(len(test)))



In [21]:
predictions[0]

array([4.4018496e-04, 4.2245784e-03, 4.7222450e-03, 4.9940176e-05,
       2.6546461e-03, 9.8644304e-01, 1.4653738e-03], dtype=float32)

In [22]:
def outputs(x):
    a = np.zeros(x.shape)
    a[np.where(x==np.max(x))] = 1
    return a

In [23]:
for i in range(len(predictions)):
    predictions[i]=outputs(predictions[i])

In [24]:
predictions

array([[0., 0., 0., ..., 0., 1., 0.],
       [0., 1., 0., ..., 0., 0., 0.],
       [0., 0., 1., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 1., 0.],
       [0., 0., 0., ..., 1., 0., 0.],
       [0., 0., 0., ..., 0., 1., 0.]], dtype=float32)

In [25]:
predictions[0]

array([0., 0., 0., 0., 0., 1., 0.], dtype=float32)

In [26]:
from sklearn.metrics import accuracy_score

In [27]:
y_test = np.concatenate([y for x, y in test.batch(len(test))], axis=0)

In [28]:
y_test

array([[0, 0, 0, ..., 1, 0, 0],
       [0, 0, 0, ..., 1, 0, 0],
       [0, 0, 1, ..., 0, 0, 0],
       ...,
       [0, 0, 0, ..., 0, 1, 0],
       [0, 0, 1, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 1, 0]], dtype=int32)

In [29]:
accuracy_score(predictions,y_test)

0.7022977022977023