# Baseline

### Prepare data

In [6]:
from torchvision.datasets import ImageFolder
import torchvision.transforms as transforms

TRAIN_PATH = '/kaggle/input/melanoma-skin-cancer-dataset-of-10000-images/melanoma_cancer_dataset/train'
TEST_PATH = '/kaggle/input/melanoma-skin-cancer-dataset-of-10000-images/melanoma_cancer_dataset/test'

to_tensor = transforms.Compose([
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])])

train_data = ImageFolder(root=TRAIN_PATH, transform = to_tensor)
test_data = ImageFolder(root=TEST_PATH, transform = to_tensor)

### Define the convolutional neural network model

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        self.conv1 = nn.Conv2d(in_channels=3, out_channels=16, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(in_channels=16, out_channels=32, kernel_size=3, padding=1)
        self.conv3 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, padding=1)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
        self.global_pool = nn.AdaptiveAvgPool2d((1, 1))
        
        self.fc1 = nn.Linear(64, 128)
        self.fc2 = nn.Linear(128, 2)
        self.dropout = nn.Dropout(0.5)
        
    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = self.pool(F.relu(self.conv3(x)))
        
        x = self.global_pool(x)
        x = torch.flatten(x, start_dim=1)
        
        x = F.relu(self.fc1(x))
        x = self.dropout(x)
        x = self.fc2(x)
        return x

    def get_conv_layers(self):
        return [self.conv1, self.conv2, self.conv3]

### Helper function for calculating accuriacies

In [8]:
from torch import no_grad, max

def calculate_accuracy(loader, model, device):
    model.eval()
    with no_grad():
        correct = 0
        total = 0
        for images, labels in loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, predicted = max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    return 100 * correct / total

### Training loop

In [9]:
import torch
import numpy as np
from torch.cuda import is_available
from torch.utils.data import DataLoader, Subset
from torch.optim import Adam
from torch.amp.grad_scaler import GradScaler
from torch.amp import autocast
from torch.nn import CrossEntropyLoss
from sklearn.model_selection import KFold

device = torch.device('cuda' if is_available() else 'cpu')

print(device.type)

def training_loop(model, training_data, num_of_epochs):
    kf = KFold(n_splits=5, shuffle=True, random_state=0)

    for train_index, val_index in kf.split(np.arange(len(train_data))):
        train_loader = DataLoader(Subset(training_data, train_index), batch_size=32, shuffle=True)
        val_loader = DataLoader(Subset(training_data, val_index), batch_size=32, shuffle=True)
        
        model = model.to(device)
        criterion = CrossEntropyLoss()
        optimizer = Adam(model.parameters(), lr=0.001)
        scaler = GradScaler()

        train_losses = []
        val_accuracies = []

        for epoch in range(num_of_epochs):
            model.train()
            running_loss = 0.0
            for images, labels in train_loader:
                images, labels = images.to(device), labels.to(device)
                optimizer.zero_grad()
                with autocast(device.type):
                    outputs = model(images)
                    loss = criterion(outputs, labels)

                scaler.scale(loss).backward()
                scaler.step(optimizer)
                scaler.update()
                running_loss += loss.item()

            train_loss = running_loss / len(train_loader)
            train_losses.append(train_loss)
            val_accuracy = calculate_accuracy(val_loader, model, device)
            val_accuracies.append(val_accuracy)
            print(f'Epoch [{epoch+1}/{num_of_epochs}], Loss: {train_loss:.4f}, Validation Accuracy: {val_accuracy:.2f}%')

cuda


### Run training

In [10]:
from torchvision.models.alexnet import AlexNet
cnn = AlexNet()
training_loop(cnn, train_data, 25)

Epoch [1/25], Loss: 0.7857, Validation Accuracy: 80.90%
Epoch [2/25], Loss: 0.3903, Validation Accuracy: 85.22%
Epoch [3/25], Loss: 0.3524, Validation Accuracy: 80.79%
Epoch [4/25], Loss: 0.3542, Validation Accuracy: 84.70%
Epoch [5/25], Loss: 0.3317, Validation Accuracy: 86.67%
Epoch [6/25], Loss: 0.3583, Validation Accuracy: 85.27%
Epoch [7/25], Loss: 0.3480, Validation Accuracy: 85.48%
Epoch [8/25], Loss: 0.5921, Validation Accuracy: 85.01%
Epoch [9/25], Loss: 0.3581, Validation Accuracy: 84.28%
Epoch [10/25], Loss: 0.3478, Validation Accuracy: 83.71%
Epoch [11/25], Loss: 0.3570, Validation Accuracy: 84.12%
Epoch [12/25], Loss: 0.3389, Validation Accuracy: 86.78%
Epoch [13/25], Loss: 0.3205, Validation Accuracy: 87.19%
Epoch [14/25], Loss: 0.3214, Validation Accuracy: 85.79%
Epoch [15/25], Loss: 0.3264, Validation Accuracy: 86.00%
Epoch [16/25], Loss: 0.3107, Validation Accuracy: 86.57%
Epoch [17/25], Loss: 0.3117, Validation Accuracy: 87.30%
Epoch [18/25], Loss: 0.2911, Validation 

### Helper method to calculate metrics

In [11]:
from sklearn.metrics import precision_score, recall_score, f1_score

def calculate_metrics(loader, model, device):
    model.eval()
    all_labels = []
    all_predictions = []

    with torch.no_grad():
        correct = 0
        total = 0
        for images, labels in loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

            all_labels.extend(labels.cpu().numpy())
            all_predictions.extend(predicted.cpu().numpy())

    accuracy = 100 * correct / total
    precision = precision_score(all_labels, all_predictions, average='weighted')
    recall = recall_score(all_labels, all_predictions, average='weighted')
    f1 = f1_score(all_labels, all_predictions, average='weighted')

    return accuracy, precision, recall, f1, all_labels, all_predictions

### Run against test dataset

In [12]:
test_loader = DataLoader(test_data, batch_size=32, shuffle=False)

accuracy, precision, recall, f1, all_labels, all_predictions = calculate_metrics(test_loader, cnn, device)

print(f'Test accuracy: {accuracy:.4f}%')
print(f'Test precision: {precision:.4f}')
print(f'Test recall: {recall:.4f}')
print(f'Test F1-Score: {f1:.4f}')

Test accuracy: 87.2000%
Test precision: 0.8723
Test recall: 0.8720
Test F1-Score: 0.8720


### Augment data

In [13]:
train_transforms = transforms.Compose([
    transforms.RandomResizedCrop(90),
    transforms.RandomRotation(20),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

augmented_train_data = ImageFolder(root=TRAIN_PATH, transform=train_transforms)


### Train on augmented data and validate on test dataset

In [15]:
augmented_cnn = AlexNet()
training_loop(augmented_cnn, augmented_train_data, 25)

accuracy, precision, recall, f1, all_labels, all_predictions = calculate_metrics(test_loader, augmented_cnn, device)

print(f'Test accuracy: {accuracy:.4f}%')
print(f'Test precision: {precision:.4f}')
print(f'Test recall: {recall:.4f}')
print(f'Test F1-Score: {f1:.4f}')

Epoch [1/25], Loss: 1.2420, Validation Accuracy: 72.41%
Epoch [2/25], Loss: 0.4332, Validation Accuracy: 79.70%
Epoch [3/25], Loss: 0.4231, Validation Accuracy: 83.81%
Epoch [4/25], Loss: 0.4331, Validation Accuracy: 83.91%
Epoch [5/25], Loss: 0.3723, Validation Accuracy: 84.90%
Epoch [6/25], Loss: 0.3809, Validation Accuracy: 84.12%
Epoch [7/25], Loss: 0.3579, Validation Accuracy: 82.87%
Epoch [8/25], Loss: 0.3589, Validation Accuracy: 84.75%
Epoch [9/25], Loss: 0.3570, Validation Accuracy: 86.15%
Epoch [10/25], Loss: 0.3786, Validation Accuracy: 84.12%
Epoch [11/25], Loss: 0.3587, Validation Accuracy: 85.89%
Epoch [12/25], Loss: 0.3487, Validation Accuracy: 86.88%
Epoch [13/25], Loss: 0.3352, Validation Accuracy: 86.00%
Epoch [14/25], Loss: 0.3928, Validation Accuracy: 84.17%
Epoch [15/25], Loss: 0.3612, Validation Accuracy: 86.73%
Epoch [16/25], Loss: 0.3963, Validation Accuracy: 80.53%
Epoch [17/25], Loss: 0.3726, Validation Accuracy: 84.38%
Epoch [18/25], Loss: 0.3406, Validation 

  _warn_prf(average, modifier, msg_start, len(result))
