In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import pandas as pd
import torch
from torch.utils.data import DataLoader
from torchvision.transforms import v2
from PIL import Image
from torch import nn, optim

from src.modeling.models import DenseNet, Inception, ResNet
from src.dataset import utils
from src.dataset.harvard_skin_cancer import HarvardSkinCancerDatasset

In [3]:
df_train = pd.read_csv('artifacts/df_train.csv')
df_valid = pd.read_csv('artifacts/df_valid.csv')
df_test = pd.read_csv('artifacts/df_test.csv')

## Model Creation

In [11]:
model = ResNet(n_classes = df_train['lesion_type_id'].nunique(), extract_features=True)

device = model.device

optimizer = optim.Adam(model.parameters())
criterion = nn.CrossEntropyLoss()

## Preparing DataLoaders

In [5]:
means, stds = utils.extract_mean_and_std_from_images(img_paths=df_train['path'].drop_duplicates())
display(means)
display(stds)

100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 8912/8912 [00:20<00:00, 437.54it/s]


[0.7579861, 0.54822993, 0.57350165]

[0.14163895, 0.15279482, 0.17041847]

In [6]:
transforms = v2.Compose([
    v2.Resize((model.input_size, model.input_size)),
    v2.RandomHorizontalFlip(),
    v2.RandomVerticalFlip(),
    v2.RandomRotation(20),
    v2.ColorJitter(brightness=0.1, contrast=0.1, hue=0.1),
    v2.ToImage(),
    v2.ToDtype(torch.float32, scale=True),
    v2.Normalize(mean=means, std=stds),
])

transforms

Compose(
      Resize(size=[224, 224], interpolation=InterpolationMode.BILINEAR, antialias=True)
      RandomHorizontalFlip(p=0.5)
      RandomVerticalFlip(p=0.5)
      RandomRotation(degrees=[-20.0, 20.0], interpolation=InterpolationMode.NEAREST, expand=False, fill=0)
      ColorJitter(brightness=(0.9, 1.1), contrast=(0.9, 1.1), hue=(-0.1, 0.1))
      ToImage()
      ToDtype(scale=True)
      Normalize(mean=[0.7579861, 0.54822993, 0.57350165], std=[0.14163895, 0.15279482, 0.17041847], inplace=False)
)

In [9]:
train_loader = DataLoader(
    dataset=HarvardSkinCancerDatasset(df=df_train, transforms=transforms),
    batch_size=64,
    shuffle=True,
    num_workers=16,
)

valid_loader = DataLoader(
    dataset=HarvardSkinCancerDatasset(df=df_valid, transforms=transforms),
    batch_size=64,
    num_workers=16,
)

test_loader = DataLoader(
    dataset=HarvardSkinCancerDatasset(df=df_test, transforms=transforms),
    batch_size=64,
    num_workers=16,
)

## Training

In [None]:
model.train(
    train_loader=train_loader,
    valid_loader=valid_loader,
    criterion=criterion,
    optimizer=optimizer,
    n_epochs=10,
)

[epoch 0], [iter 100 / 613], [train loss 1.02526], [train acc 0.63297]
[epoch 0], [iter 200 / 613], [train loss 0.85438], [train acc 0.69094]
[epoch 0], [iter 300 / 613], [train loss 0.76106], [train acc 0.71995]
[epoch 0], [iter 400 / 613], [train loss 0.70060], [train acc 0.74273]
[epoch 0], [iter 500 / 613], [train loss 0.65201], [train acc 0.75994]
[epoch 0], [iter 600 / 613], [train loss 0.61823], [train acc 0.77193]
EPOCH VALIDATION: [epoch = 0] acc = 0.8543002136752137 loss = 0.35803675485981834
[epoch 1], [iter 100 / 613], [train loss 0.39738], [train acc 0.84859]
[epoch 1], [iter 200 / 613], [train loss 0.38840], [train acc 0.85398]
[epoch 1], [iter 300 / 613], [train loss 0.37956], [train acc 0.85776]
[epoch 1], [iter 400 / 613], [train loss 0.37277], [train acc 0.85961]
[epoch 1], [iter 500 / 613], [train loss 0.36531], [train acc 0.86303]
[epoch 1], [iter 600 / 613], [train loss 0.35401], [train acc 0.86701]
EPOCH VALIDATION: [epoch = 1] acc = 0.9040242165242165 loss = 0.30