### Dependencies

In [1]:
import pandas as pd
import numpy as np
from PIL import Image
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
import torchvision.transforms as transforms

### Path config and read the dataset

In [2]:
img_dir = './dataset/images/'
metadata_file ='./dataset/HAM10000_metadata'
df = pd.read_csv(metadata_file)
df.head(10)

Unnamed: 0,lesion_id,image_id,dx,dx_type,age,sex,localization,dataset
0,HAM_0000118,ISIC_0027419,bkl,histo,80.0,male,scalp,vidir_modern
1,HAM_0000118,ISIC_0025030,bkl,histo,80.0,male,scalp,vidir_modern
2,HAM_0002730,ISIC_0026769,bkl,histo,80.0,male,scalp,vidir_modern
3,HAM_0002730,ISIC_0025661,bkl,histo,80.0,male,scalp,vidir_modern
4,HAM_0001466,ISIC_0031633,bkl,histo,75.0,male,ear,vidir_modern
5,HAM_0001466,ISIC_0027850,bkl,histo,75.0,male,ear,vidir_modern
6,HAM_0002761,ISIC_0029176,bkl,histo,60.0,male,face,vidir_modern
7,HAM_0002761,ISIC_0029068,bkl,histo,60.0,male,face,vidir_modern
8,HAM_0005132,ISIC_0025837,bkl,histo,70.0,female,back,vidir_modern
9,HAM_0005132,ISIC_0025209,bkl,histo,70.0,female,back,vidir_modern


In [3]:
print('Number of Classes -', df['dx'].unique())
print('Number of Classes -', df['dx_type'].unique())

Number of Classes - ['bkl' 'nv' 'df' 'mel' 'vasc' 'bcc' 'akiec']
Number of Classes - ['histo' 'consensus' 'confocal' 'follow_up']


In [4]:
y = df.pop('dx')
y = y.to_frame()
encoder = LabelEncoder()
y = encoder.fit_transform(y)
X = df

  y = column_or_1d(y, warn=True)


In [5]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, stratify=y)

print('Training set shape X-', X_train.shape, 'y-', y_train.shape) 
print('Training set shape X-', X_test.shape, 'y-', y_test.shape)

Training set shape X- (7511, 7) y- (7511,)
Training set shape X- (2504, 7) y- (2504,)


### Dataset and DataLoader for CNN

In [6]:
transform = transforms.Compose([
    transforms.Resize((64,64)),
    transforms.ToTensor(),
])

In [6]:
class SkinLesionsDataset(Dataset):
    def __init__(self, X, y, transform = None):
        self.X = X
        self.y = y
        self.transform = transform

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        img_name = self.X.iloc[idx]['image_id']
        label = self.y[idx]
        img = Image.open(f'{img_dir}{img_name}.jpg')
        if self.transform is not None:
            img = self.transform(img)
        return img, label

In [8]:
training_dataset = SkinLesionsDataset(X_train, y_train, transform=transform)
testing_dataset = SkinLesionsDataset(X_test, y_test, transform=transform)

train_dataloder = DataLoader(training_dataset, batch_size=8)
test_dataloader = DataLoader(testing_dataset, batch_size=8)

In [9]:
data_iterator = iter(train_dataloder)
sample_images, sample_labels = next(data_iterator)

print(sample_labels)
print(sample_images)
print(sample_images.shape)

tensor([4, 5, 5, 5, 5, 4, 2, 4])
tensor([[[[0.1098, 0.1451, 0.2000,  ..., 0.1451, 0.0784, 0.0549],
          [0.1255, 0.1608, 0.2471,  ..., 0.2157, 0.1059, 0.0667],
          [0.1373, 0.1961, 0.3020,  ..., 0.2902, 0.1529, 0.0824],
          ...,
          [0.0588, 0.0980, 0.2431,  ..., 0.3373, 0.2157, 0.1529],
          [0.0510, 0.0745, 0.1490,  ..., 0.2706, 0.1725, 0.1373],
          [0.0431, 0.0588, 0.0941,  ..., 0.2118, 0.1490, 0.1176]],

         [[0.0667, 0.0902, 0.1216,  ..., 0.0941, 0.0510, 0.0314],
          [0.0745, 0.1020, 0.1490,  ..., 0.1412, 0.0667, 0.0431],
          [0.0863, 0.1255, 0.1843,  ..., 0.2000, 0.1020, 0.0510],
          ...,
          [0.0353, 0.0588, 0.1490,  ..., 0.2314, 0.1451, 0.0980],
          [0.0275, 0.0431, 0.0902,  ..., 0.1765, 0.1137, 0.0863],
          [0.0196, 0.0353, 0.0588,  ..., 0.1373, 0.0941, 0.0745]],

         [[0.0902, 0.1098, 0.1490,  ..., 0.1176, 0.0588, 0.0392],
          [0.0980, 0.1176, 0.1686,  ..., 0.1686, 0.0824, 0.0471],
         

### Model Architecture

In [7]:
class CNNModel(nn.Module):
    def __init__(self):
        super(CNNModel, self).__init__()

        # feature extraction with Convolutions, Relu, and max_pooling layers
        self.conv1 = nn.Conv2d(3, 6, 5)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(6, 16, 5)

        # classification with linear layers
        self.linear1 = nn.Linear(16*13*13, 128)
        self.linear2 = nn.Linear(128, 64)
        # no softamx activation needed bacause we are using Cross Entropy loss
        # In pytorch, it includes softmax
        self.linear3 = nn.Linear(64, 7) # 7 classes
        

    def forward(self, x):
        # input size = (3,64,64)
        x = nn.functional.relu(self.conv1(x))  # Output size = (6,62,62)
        x = self.pool(x)  # Output size = (6,30,31)
        x = nn.functional.relu(self.conv2(x))  # Output size = (16,26,26)
        x = self.pool(x)  # Output size = (16,13,13)
        x = torch.flatten(x,1)  # Output size = (6,60,60)
        x = nn.functional.relu(self.linear1(x))  # Output size = (6,60,60)
        x = nn.functional.relu(self.linear2(x))  # Output size = (6,60,60)
        x = self.linear3(x)  # Output size = (6,60,60)
        return x

In [11]:
model = CNNModel().to('cpu')
print(model)

for name, param in model.named_parameters():
    print(f"Layer: {name} | Size: {param.size()} | Values : {param[:2]} \n")

CNNModel(
  (conv1): Conv2d(3, 6, kernel_size=(5, 5), stride=(1, 1))
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv2): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
  (linear1): Linear(in_features=2704, out_features=128, bias=True)
  (linear2): Linear(in_features=128, out_features=64, bias=True)
  (linear3): Linear(in_features=64, out_features=7, bias=True)
)
Layer: conv1.weight | Size: torch.Size([6, 3, 5, 5]) | Values : tensor([[[[ 0.1147, -0.0699, -0.0460, -0.0420, -0.0661],
          [ 0.0322,  0.0006,  0.0328, -0.0483, -0.0531],
          [ 0.0953, -0.0186,  0.0123, -0.0054, -0.0215],
          [ 0.0478, -0.0936, -0.1060, -0.1112,  0.0318],
          [ 0.0015,  0.1076,  0.0715,  0.0759,  0.0096]],

         [[-0.0833,  0.0896, -0.0325, -0.0017,  0.0799],
          [ 0.0202,  0.0552,  0.0236, -0.1036, -0.0411],
          [-0.1116, -0.0827,  0.0800, -0.0122, -0.0401],
          [ 0.0647, -0.0618, -0.1131, -0.1106,  0.0757],
          

### Training and Validation Loop

In [8]:
def train_model(model, optimizer, loss_func, num_epochs, device, train_dataloader, test_dataloader):
    for current_epoch in range(num_epochs):
        model.train()
        running_loss = 0
        correct_train_predictions = 0
        for _, (batch_images, batch_labels) in enumerate(train_dataloader):
            imgs =  batch_images.to(device)
            labels = torch.tensor(batch_labels).to(device)
            # Forward pass, get the output from the model and
            # calculate the loss by comparing the model output and true labels
            model_output = model(imgs)
            loss = loss_func(model_output, labels)
            
            # Backpropagate the calculated loss
            optimizer.zero_grad() #zeroing the gradients
            loss.backward()
            optimizer.step()

            running_loss += loss.item()
            _, predictions = torch.max(model_output, 1)  # Get the higest probability predictions
            correct_train_predictions += (predictions ==labels).sum().item()

        running_loss = running_loss/len(train_dataloader)
        train_accuracy = correct_train_predictions/len(train_dataloader)
        # val_accuracy = 
        print(f'Epoch [{current_epoch}/{num_epochs}] - ')
        print(f'   Training Accuracy: {train_accuracy} Validation Accuracy:  Loss: {running_loss}')

    print('Training Completed!')
        


### Train the Model

In [1]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device = 'cpu'
num_epochs = 5
batch_size = 1
transform = transforms.Compose([
    transforms.Resize((64,64)),
    transforms.ToTensor(),
])

training_dataset = SkinLesionsDataset(X_train, y_train, transform=transform)
testing_dataset = SkinLesionsDataset(X_test, y_test, transform=transform)

train_dataloader = DataLoader(training_dataset, batch_size)
test_dataloader = DataLoader(testing_dataset, batch_size)

model = CNNModel().to(device)

loss_function = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters())

train_model(model, optimizer, loss_function, num_epochs, device, train_dataloader, test_dataloader)

NameError: name 'torch' is not defined

### Saving the Model