In [5]:
import numpy as np
import pandas as pd
import torch
import torchvision
from torchvision import transforms
import torch.nn as nn
import torch.optim as optim
from PIL import Image

In [65]:
# import the data into pandas dataframes
# 2 dataframes: train and test
train_df = pd.read_csv('/work/20250412-194207/train-metadata.csv')
test_df = pd.read_csv('/work/20250412-194207/test-metadata.csv')

# split train into train + val
perm = np.random.permutation(np.arange(len(train_df)))
# 80% train, 20% validation
split_idx = int(0.8*len(train_df))
val_df = train_df.iloc[perm[split_idx:]]
train_df = train_df.iloc[perm[:split_idx]]

In [67]:
# define transforms for the images
image_scale = 128
chosen_transforms = transforms.Compose([
    transforms.Resize(image_scale),
    transforms.ToTensor()
])


In [69]:
class CustomDataset():
    def __init__(self, dataframe, transformations):
        self.df = dataframe
        self.transformations = transformations

    def __len__(self):
        # return a length; probably len of dataframe
        return len(self.df)

    def __getitem__(self, idx):
        # return (data, label) tuple
        image_name = self.df.iloc[idx]['isic_id']
        image_file = '/work/20250412-194207/train-image/image/' + image_name + '.jpg'
        image_data = Image.open(image_file)
        image_data = self.transformations(image_data)

        # get the label
        target = self.df.iloc[idx]['target']
        
        # return
        return image_data, target

In [77]:
# load the custom datasets
train_dataset = CustomDataset(train_df, chosen_transforms)
val_dataset = CustomDataset(val_df, chosen_transforms)
test_dataset = CustomDataset(test_df, chosen_transforms)

In [79]:
# use a dataloader object for efficient memory use
from torch.utils.data import DataLoader
BATCH_SIZE = 32
N_WORKERS = 4

train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, num_workers=N_WORKERS)
val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, num_workers=N_WORKERS)
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, num_workers=N_WORKERS)

In [87]:
import torch.nn.functional as F

class CNN(nn.Module):
    def __init__(self):
        # CNN:
            # 128x128x3 -> 5x5 Conv (padding) + 2x2 Max Pool -> 64x64x8
            # 64x64x8 -> 5x5 Conv (padding) + 2x2 Max Pool -> 32x32x16
            # 32x32x16 -> 5x5 Conv (padding) + 2x2 Max Pool -> 16x16x32
            # 16x16x32 -> 5x5 Conv (padding) + 2x2 Max Pool -> 8x8x64
            # 8x8x64 -> 5x5 Conv (padding) + 2x2 Max Pool -> 4x4x128
            # 4x4x128 -> 5x5 Conv (padding) + 2x2 Max Pool -> 2x2x256
            # 2x2x256 -> 5x5 Conv (padding) + 2x2 Max Pool -> 1x1x512
            # ReLU on every layer
            # Flatten to a 512x1 vector
        # Feedforward:
            # 512 -> 1024 -> 256 -> 64 -> 16 -> 1
            # ReLU everywhere, sigmoid estimate at the end
        super(CNN, self).__init__()

        self.Conv1 = nn.Conv2d(in_channels=3, out_channels=8, kernel_size=5, padding=2)
        self.Conv2 = nn.Conv2d(in_channels=8, out_channels=16, kernel_size=5, padding=2)
        self.Conv3 = nn.Conv2d(in_channels=16, out_channels=32, kernel_size=5, padding=2)
        self.Conv4 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=5, padding=2)
        self.Conv5 = nn.Conv2d(in_channels=64, out_channels=128, kernel_size=5, padding=2)
        self.Conv6 = nn.Conv2d(in_channels=128, out_channels=256, kernel_size=5, padding=2)
        self.Conv7 = nn.Conv2d(in_channels=256, out_channels=512, kernel_size=5, padding=2)
        
        self.fc1 = nn.Linear(in_features=512, out_features=1024)
        self.fc2 = nn.Linear(in_features=1024, out_features=256)
        self.fc3 = nn.Linear(in_features=256, out_features=64)
        self.fc4 = nn.Linear(in_features=64, out_features=16)
        self.fc5 = nn.Linear(in_features=16, out_features=1)

        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)

    def forward(self, x):
        image = x
        # send the image through the Conv layers
        out = self.pool(F.relu(self.Conv1(image)))
        out = self.pool(F.relu(self.Conv2(out)))
        out = self.pool(F.relu(self.Conv3(out)))
        out = self.pool(F.relu(self.Conv4(out)))
        out = self.pool(F.relu(self.Conv5(out)))
        out = self.pool(F.relu(self.Conv6(out)))
        out = self.pool(F.relu(self.Conv7(out)))

        # flatten
        out = out.view(-1)

        # send through feedforward layers
        out = F.relu(self.fc1(out))
        out = F.relu(self.fc2(out))
        out = F.relu(self.fc3(out))
        out = F.relu(self.fc4(out))
        out = F.sigmoid(self.fc5(out))

        # return prediction
        return out

In [89]:
# define the model
model = CNN()

In [95]:
# define criterion and optimizer
criterion = nn.BCELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [None]:
import time

# just 1 epoch for training because we have ~320000 images in the training dataset
N_EPOCHS = 1
# how often should we print?
# 320000/32 (batch) = 10000, so 10 prints is a frequency of 1000
FREQ = 1000

# train the model!
for epoch in range(N_EPOCHS):
    running_loss = 0.0
    start_time = time.time()

    for idx, data in enumerate(train_loader):
        # get the data
        inputs, labels = data

        # zero the gradient
        optimizer.zero_grad()
        
        # forward
        y_pred == model(inputs)
        # loss
        # labels are ints but our predictions are floats
        loss = criterion(y_pred, labels.float())
        # backpropagation
        loss.backward()
        optimizer.step()

        # add loss, print at FREQ
        running_loss += loss.item()
        if idx+1 % FREQ == 0:
            print(f"elapsed time: {time.time()-start_time:.3f}, average loss: {running_loss/FREQ}")
            start_time = time.time()
            running_loss=0.0

<a style='text-decoration:none;line-height:16px;display:flex;color:#5B5B62;padding:10px;justify-content:end;' href='https://deepnote.com?utm_source=created-in-deepnote-cell&projectId=79063c31-68e7-42ba-ac09-1fa3948bed4b' target="_blank">
 </img>
Created in <span style='font-weight:600;margin-left:4px;'>Deepnote</span></a>