In [16]:
import pathlib
import tensorflow as tf
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import os
import PIL
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.models import Sequential

Mounting the dataset and providing the path for it:

In [17]:
data_dir_train = pathlib.Path("/content/drive/MyDrive/Train")
data_dir_test = pathlib.Path('/content/drive/MyDrive/Test')

In [18]:
image_count_train = len(list(data_dir_train.glob('*/*.jpg')))
print(image_count_train)
image_count_test = len(list(data_dir_test.glob('*/*.jpg')))
print(image_count_test)

2239
118


In [19]:
batch_size = 32
img_height = 180
img_width = 180

In [20]:
from tensorflow.python.ops.gen_batch_ops import batch

## Dividing into train and validation sets:

In [21]:
train_ds = tf.keras.utils.image_dataset_from_directory(data_dir_train, validation_split=0.2, subset='training',seed=123,image_size=(img_height, img_width), batch_size=batch_size)

Found 2239 files belonging to 9 classes.
Using 1792 files for training.


In [22]:
val_ds = tf.keras.utils.image_dataset_from_directory(data_dir_train, validation_split=0.2, subset='validation',seed=123,image_size=(img_height, img_width), batch_size=batch_size)

Found 2239 files belonging to 9 classes.
Using 447 files for validation.


In [23]:
class_names = train_ds.class_names
print(class_names)

['actinic keratosis', 'basal cell carcinoma', 'dermatofibroma', 'melanoma', 'nevus', 'pigmented benign keratosis', 'seborrheic keratosis', 'squamous cell carcinoma', 'vascular lesion']


In [24]:

import matplotlib.pyplot as plt

In [25]:
AUTOTUNE = tf.data.experimental.AUTOTUNE
train_ds = train_ds.cache().shuffle(1000).prefetch(buffer_size=AUTOTUNE)
val_ds = val_ds.cache().prefetch(buffer_size=AUTOTUNE)

In [26]:
num = len(class_names)

## Creating the model and training :

In [27]:
model = Sequential([
                    layers.experimental.preprocessing.Rescaling(scale=1./255, input_shape=(img_height, img_width, 3)),
                    layers.Conv2D(16, 3, padding='same',strides=(1,1),activation='relu'),
                    layers.MaxPool2D(),
                    layers.Conv2D(32,3,padding='same',strides=(1,1),activation='relu'),
                    layers.MaxPool2D(),
                    layers.Conv2D(64,3,padding='same',strides=(1,1),activation='relu'),
                    layers.MaxPool2D(),
                    layers.Flatten(),
                    layers.Dense(128, activation='relu'),
                    layers.Dense(num,activation='softmax')
                    ])

In [28]:

model.compile(optimizer='Adam',loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True), metrics=['accuracy'])

In [29]:
model.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 rescaling_1 (Rescaling)     (None, 180, 180, 3)       0         
                                                                 
 conv2d_3 (Conv2D)           (None, 180, 180, 16)      448       
                                                                 
 max_pooling2d_3 (MaxPooling  (None, 90, 90, 16)       0         
 2D)                                                             
                                                                 
 conv2d_4 (Conv2D)           (None, 90, 90, 32)        4640      
                                                                 
 max_pooling2d_4 (MaxPooling  (None, 45, 45, 32)       0         
 2D)                                                             
                                                                 
 conv2d_5 (Conv2D)           (None, 45, 45, 64)       

In [30]:
epochs = 20
history = model.fit(
  train_ds,
  validation_data=val_ds,
  epochs=epochs
)

Epoch 1/20


  return dispatch_target(*args, **kwargs)


Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


## **Findings**:
Here we can clearly see the overfitting.

In [31]:
model_1 = Sequential([
                    layers.experimental.preprocessing.Rescaling(scale=1./255, input_shape=(img_height, img_width, 3)),
                    layers.Conv2D(16, 3, padding='same',strides=(1,1),activation='relu'),
                    layers.MaxPool2D(),
                    layers.Conv2D(32,3,padding='same',strides=(1,1),activation='relu'),
                    layers.MaxPool2D(),
                    layers.Dropout(0.5),
                    layers.Conv2D(64,3,padding='same',strides=(1,1),activation='relu'),
                    layers.MaxPool2D(),
                    layers.Flatten(),
                    layers.Dense(128, activation='relu'),
                    layers.Dropout(0.5),
                    layers.Dense(num,activation='softmax')
                    ])

In [32]:
model_1.compile(optimizer='Adam',loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True), metrics=['accuracy'])

In [33]:
model_1.summary()

Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 rescaling_2 (Rescaling)     (None, 180, 180, 3)       0         
                                                                 
 conv2d_6 (Conv2D)           (None, 180, 180, 16)      448       
                                                                 
 max_pooling2d_6 (MaxPooling  (None, 90, 90, 16)       0         
 2D)                                                             
                                                                 
 conv2d_7 (Conv2D)           (None, 90, 90, 32)        4640      
                                                                 
 max_pooling2d_7 (MaxPooling  (None, 45, 45, 32)       0         
 2D)                                                             
                                                                 
 dropout (Dropout)           (None, 45, 45, 32)       

In [34]:
epochs = 20
history = model_1.fit(
  train_ds,
  validation_data=val_ds,
  epochs=epochs
)

Epoch 1/20


  return dispatch_target(*args, **kwargs)


Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


## **Findings**:
Here we can almost the overfitting issue was solved.

## Check for Imbalance of Dataset

In [35]:
cancer = list(data_dir_train.glob('pigmented benign keratosis/*'))

In [36]:

len(cancer)

462

In [37]:
print(class_names)

['actinic keratosis', 'basal cell carcinoma', 'dermatofibroma', 'melanoma', 'nevus', 'pigmented benign keratosis', 'seborrheic keratosis', 'squamous cell carcinoma', 'vascular lesion']


In [38]:
cancer_1 = list(data_dir_train.glob('actinic keratosis/*'))
cancer_2 = list(data_dir_train.glob('basal cell carcinoma/*'))
cancer_3 = list(data_dir_train.glob('dermatofibroma/*'))
cancer_4 = list(data_dir_train.glob('melanoma/*'))
cancer_5 = list(data_dir_train.glob('nevus/*'))
cancer_6 = list(data_dir_train.glob('seborrheic keratosis/*'))
cancer_7 = list(data_dir_train.glob('squamous cell carcinoma/*'))
cancer_8 = list(data_dir_train.glob('vascular lesion/*'))

In [39]:
print(len(cancer_1))
print(len(cancer_2))
print(len(cancer_3))
print(len(cancer_4))
print(len(cancer_5))
print(len(cancer_6))
print(len(cancer_7))
print(len(cancer_8))

114
376
95
438
357
77
181
139


# **Findings**:
seborrheic keratosis has lowest number of images.

## Solving the imbalance data by using Augmentor:

In [40]:
pip install Augmentor

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting Augmentor
  Downloading Augmentor-0.2.10-py2.py3-none-any.whl (38 kB)
Installing collected packages: Augmentor
Successfully installed Augmentor-0.2.10


In [41]:
import Augmentor

In [42]:
images = list(data_dir_train.glob('*/*'))

In [43]:
y = class_names

In [44]:
y

['actinic keratosis',
 'basal cell carcinoma',
 'dermatofibroma',
 'melanoma',
 'nevus',
 'pigmented benign keratosis',
 'seborrheic keratosis',
 'squamous cell carcinoma',
 'vascular lesion']

In [45]:
path_to_training_dataset="/content/drive/MyDrive/Train"
for i in y:
    p = Augmentor.Pipeline(path_to_training_dataset + "/" + i)
    p.rotate(probability=0.7, max_left_rotation=10, max_right_rotation=10)
    p.sample(500)

Initialised with 114 image(s) found.
Output directory set to /content/drive/MyDrive/Train/actinic keratosis/output.

Processing <PIL.Image.Image image mode=RGB size=600x450 at 0x7FB9701CB9D0>: 100%|██████████| 500/500 [00:20<00:00, 23.97 Samples/s]


Initialised with 376 image(s) found.
Output directory set to /content/drive/MyDrive/Train/basal cell carcinoma/output.

Processing <PIL.Image.Image image mode=RGB size=600x450 at 0x7FBA059480D0>: 100%|██████████| 500/500 [00:22<00:00, 21.85 Samples/s]


Initialised with 95 image(s) found.
Output directory set to /content/drive/MyDrive/Train/dermatofibroma/output.

Processing <PIL.Image.Image image mode=RGB size=600x450 at 0x7FBA88094BD0>: 100%|██████████| 500/500 [00:25<00:00, 19.83 Samples/s]


Initialised with 438 image(s) found.
Output directory set to /content/drive/MyDrive/Train/melanoma/output.

Processing <PIL.Image.Image image mode=RGB size=1024x768 at 0x7FB9F0202ED0>: 100%|██████████| 500/500 [01:34<00:00,  5.31 Samples/s]


Initialised with 357 image(s) found.
Output directory set to /content/drive/MyDrive/Train/nevus/output.

Processing <PIL.Image.Image image mode=RGB size=2048x1536 at 0x7FB9867B7A10>: 100%|██████████| 500/500 [01:23<00:00,  6.01 Samples/s]


Initialised with 462 image(s) found.
Output directory set to /content/drive/MyDrive/Train/pigmented benign keratosis/output.

Processing <PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=600x450 at 0x7FB9695C1450>: 100%|██████████| 500/500 [00:19<00:00, 25.35 Samples/s]


Initialised with 77 image(s) found.
Output directory set to /content/drive/MyDrive/Train/seborrheic keratosis/output.

Processing <PIL.Image.Image image mode=RGB size=1024x768 at 0x7FBA891AE750>: 100%|██████████| 500/500 [00:44<00:00, 11.35 Samples/s]


Initialised with 181 image(s) found.
Output directory set to /content/drive/MyDrive/Train/squamous cell carcinoma/output.

Processing <PIL.Image.Image image mode=RGB size=600x450 at 0x7FBA04319AD0>: 100%|██████████| 500/500 [00:19<00:00, 26.19 Samples/s]


Initialised with 139 image(s) found.
Output directory set to /content/drive/MyDrive/Train/vascular lesion/output.

Processing <PIL.Image.Image image mode=RGB size=600x450 at 0x7FB9F0165AD0>: 100%|██████████| 500/500 [00:20<00:00, 24.38 Samples/s]


In [46]:
image_count_train = len(list(data_dir_train.glob('*/output/*.jpg')))
print(image_count_train)

4500


In [47]:
im = list(data_dir_train.glob('*/output/*.jpg'))

In [50]:
lesion_list_new = [os.path.basename(os.path.dirname(os.path.dirname(y))) for y in glob.glob(os.path.join(data_dir_train, '*','output', '*.jpg'))]
lesion_list_new

['actinic keratosis',
 'actinic keratosis',
 'actinic keratosis',
 'actinic keratosis',
 'actinic keratosis',
 'actinic keratosis',
 'actinic keratosis',
 'actinic keratosis',
 'actinic keratosis',
 'actinic keratosis',
 'actinic keratosis',
 'actinic keratosis',
 'actinic keratosis',
 'actinic keratosis',
 'actinic keratosis',
 'actinic keratosis',
 'actinic keratosis',
 'actinic keratosis',
 'actinic keratosis',
 'actinic keratosis',
 'actinic keratosis',
 'actinic keratosis',
 'actinic keratosis',
 'actinic keratosis',
 'actinic keratosis',
 'actinic keratosis',
 'actinic keratosis',
 'actinic keratosis',
 'actinic keratosis',
 'actinic keratosis',
 'actinic keratosis',
 'actinic keratosis',
 'actinic keratosis',
 'actinic keratosis',
 'actinic keratosis',
 'actinic keratosis',
 'actinic keratosis',
 'actinic keratosis',
 'actinic keratosis',
 'actinic keratosis',
 'actinic keratosis',
 'actinic keratosis',
 'actinic keratosis',
 'actinic keratosis',
 'actinic keratosis',
 'actinic 

In [49]:
import glob as glob

In [51]:
path_list = [x for x in glob.glob(os.path.join(data_dir_train, '*', 'output', '*.jpg'))]
path_list

['/content/drive/MyDrive/Train/actinic keratosis/output/actinic keratosis_original_ISIC_0026468.jpg_5198fa09-d78e-4885-af69-01b923091be4.jpg',
 '/content/drive/MyDrive/Train/actinic keratosis/output/actinic keratosis_original_ISIC_0027334.jpg_3707eec7-22ed-4b20-ad4a-8e654b573315.jpg',
 '/content/drive/MyDrive/Train/actinic keratosis/output/actinic keratosis_original_ISIC_0031292.jpg_2455ff1d-eeb7-48a3-9a55-05895e349cbf.jpg',
 '/content/drive/MyDrive/Train/actinic keratosis/output/actinic keratosis_original_ISIC_0031430.jpg_a78aa89a-244c-42de-8a7a-6bde8ab228e5.jpg',
 '/content/drive/MyDrive/Train/actinic keratosis/output/actinic keratosis_original_ISIC_0028393.jpg_638c6e0f-9e66-4ff5-976b-58c82897a92f.jpg',
 '/content/drive/MyDrive/Train/actinic keratosis/output/actinic keratosis_original_ISIC_0031381.jpg_9ce9f5b5-bc16-4c5a-be7c-cece827a1446.jpg',
 '/content/drive/MyDrive/Train/actinic keratosis/output/actinic keratosis_original_ISIC_0030142.jpg_5fe5b477-fc0b-488c-8ba7-f4f58638a267.jpg',

In [52]:
dataframe_dict_new = dict(zip(path_list, lesion_list_new))

In [53]:
df = dict(zip(images, y))

In [54]:
original_df = pd.DataFrame(list(df.items()),columns = ['Path','Label'])

In [55]:
df2 = pd.DataFrame(list(dataframe_dict_new.items()),columns = ['Path','Label'])
new_df = original_df.append(df2)

In [56]:
new_df['Label'].value_counts()

actinic keratosis             501
basal cell carcinoma          501
dermatofibroma                501
melanoma                      501
nevus                         501
pigmented benign keratosis    501
seborrheic keratosis          501
squamous cell carcinoma       501
vascular lesion               501
Name: Label, dtype: int64

In [57]:
batch_size = 32
img_height = 180
img_width = 180

In [58]:
type(path_list)

list

In [59]:
data_dir_train= data_dir_train
train_ds = tf.keras.preprocessing.image_dataset_from_directory(
  data_dir_train,
  seed=123,
  validation_split = 0.2,
  subset = 'training',## Todo choose the correct parameter value, so that only training data is refered to,,
  image_size=(img_height, img_width),
  batch_size=batch_size)

Found 6739 files belonging to 9 classes.
Using 5392 files for training.


In [60]:
val_ds = tf.keras.preprocessing.image_dataset_from_directory(
  data_dir_train,
  seed=123,
  validation_split = 0.2,
  subset = 'validation',## Todo choose the correct parameter value, so that only validation data is refered to,
  image_size=(img_height, img_width),
  batch_size=batch_size)

Found 6739 files belonging to 9 classes.
Using 1347 files for validation.


In [61]:
model_2 = Sequential([
                    layers.experimental.preprocessing.Rescaling(scale=1./255, input_shape=(img_height, img_width, 3)),
                    layers.Conv2D(16, 3, padding='same',strides=(1,1),activation='relu'),
                    layers.MaxPool2D(),
                    layers.Conv2D(32,3,padding='same',strides=(1,1),activation='relu'),
                    layers.MaxPool2D(),
                    layers.Dropout(0.5),
                    layers.Conv2D(64,3,padding='same',strides=(1,1),activation='relu'),
                    layers.MaxPool2D(),
                    layers.Flatten(),
                    layers.Dense(128, activation='relu'),
                    layers.Dropout(0.5),
                    layers.Dense(num,activation='softmax')
                    ])

In [62]:
model_2.compile(optimizer='Adam',loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True), metrics=['accuracy'])

In [63]:
model_2.summary()

Model: "sequential_3"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 rescaling_3 (Rescaling)     (None, 180, 180, 3)       0         
                                                                 
 conv2d_9 (Conv2D)           (None, 180, 180, 16)      448       
                                                                 
 max_pooling2d_9 (MaxPooling  (None, 90, 90, 16)       0         
 2D)                                                             
                                                                 
 conv2d_10 (Conv2D)          (None, 90, 90, 32)        4640      
                                                                 
 max_pooling2d_10 (MaxPoolin  (None, 45, 45, 32)       0         
 g2D)                                                            
                                                                 
 dropout_2 (Dropout)         (None, 45, 45, 32)       

In [64]:
epochs = 20
history = model_2.fit(
  train_ds,
  validation_data=val_ds,
  epochs=epochs
)

Epoch 1/20


  return dispatch_target(*args, **kwargs)


Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [65]:
im

[PosixPath('/content/drive/MyDrive/Train/actinic keratosis/output/actinic keratosis_original_ISIC_0026468.jpg_5198fa09-d78e-4885-af69-01b923091be4.jpg'),
 PosixPath('/content/drive/MyDrive/Train/actinic keratosis/output/actinic keratosis_original_ISIC_0027334.jpg_3707eec7-22ed-4b20-ad4a-8e654b573315.jpg'),
 PosixPath('/content/drive/MyDrive/Train/actinic keratosis/output/actinic keratosis_original_ISIC_0031292.jpg_2455ff1d-eeb7-48a3-9a55-05895e349cbf.jpg'),
 PosixPath('/content/drive/MyDrive/Train/actinic keratosis/output/actinic keratosis_original_ISIC_0031430.jpg_a78aa89a-244c-42de-8a7a-6bde8ab228e5.jpg'),
 PosixPath('/content/drive/MyDrive/Train/actinic keratosis/output/actinic keratosis_original_ISIC_0028393.jpg_638c6e0f-9e66-4ff5-976b-58c82897a92f.jpg'),
 PosixPath('/content/drive/MyDrive/Train/actinic keratosis/output/actinic keratosis_original_ISIC_0031381.jpg_9ce9f5b5-bc16-4c5a-be7c-cece827a1446.jpg'),
 PosixPath('/content/drive/MyDrive/Train/actinic keratosis/output/actinic ke