In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import os
from datetime import datetime

import pandas as pd
import torch
from PIL import Image
from torch.utils.data import DataLoader
from torchvision.transforms import v2
from torch import nn, optim

from src.dataset import utils
from src.dataset.harvard_skin_cancer import HarvardSkinCancerDatasset
from src.modeling.models import DenseNet, Inception, ResNet

In [3]:
df_train = pd.read_csv('artifacts/df_train.csv')
df_valid = pd.read_csv('artifacts/df_valid.csv')
df_test = pd.read_csv('artifacts/df_test.csv')

## Model Creation

In [4]:
model = ResNet(n_classes = df_train['lesion_type_id'].nunique(), extract_features=True)

optimizer = optim.Adam(model.parameters())
criterion = nn.CrossEntropyLoss()

## Preparing DataLoaders

In [5]:
means, stds = utils.extract_mean_and_std_from_images(img_paths=df_train['path'].drop_duplicates())
display(means)
display(stds)

100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 8912/8912 [00:20<00:00, 437.28it/s]


[0.7579861, 0.54822993, 0.57350165]

[0.14163895, 0.15279482, 0.17041847]

In [6]:
transforms = v2.Compose([
    v2.Resize((model.input_size, model.input_size)),
    v2.RandomHorizontalFlip(),
    v2.RandomVerticalFlip(),
    v2.RandomRotation(20),
    v2.ColorJitter(brightness=0.1, contrast=0.1, hue=0.1),
    v2.ToImage(),
    v2.ToDtype(torch.float32, scale=True),
    v2.Normalize(mean=means, std=stds),
])

transforms

Compose(
      Resize(size=[224, 224], interpolation=InterpolationMode.BILINEAR, antialias=True)
      RandomHorizontalFlip(p=0.5)
      RandomVerticalFlip(p=0.5)
      RandomRotation(degrees=[-20.0, 20.0], interpolation=InterpolationMode.NEAREST, expand=False, fill=0)
      ColorJitter(brightness=(0.9, 1.1), contrast=(0.9, 1.1), hue=(-0.1, 0.1))
      ToImage()
      ToDtype(scale=True)
      Normalize(mean=[0.7579861, 0.54822993, 0.57350165], std=[0.14163895, 0.15279482, 0.17041847], inplace=False)
)

In [7]:
train_loader = DataLoader(
    dataset=HarvardSkinCancerDatasset(df=df_train, transforms=transforms),
    batch_size=64,
    shuffle=True,
    num_workers=16,
)

valid_loader = DataLoader(
    dataset=HarvardSkinCancerDatasset(df=df_valid, transforms=transforms),
    batch_size=64,
    num_workers=16,
)

test_loader = DataLoader(
    dataset=HarvardSkinCancerDatasset(df=df_test, transforms=transforms),
    batch_size=64,
    num_workers=16,
)

## Training

In [8]:
torch.cuda.empty_cache()

In [9]:
model.train(
    train_loader=train_loader,
    valid_loader=valid_loader,
    criterion=criterion,
    optimizer=optimizer,
    n_epochs=10,
)

[epoch 0], [iter 100 / 613], [train loss 1.01566], [train acc 0.63984]
[epoch 0], [iter 200 / 613], [train loss 0.85367], [train acc 0.69125]
[epoch 0], [iter 300 / 613], [train loss 0.77069], [train acc 0.71818]
[epoch 0], [iter 400 / 613], [train loss 0.71222], [train acc 0.73871]
[epoch 0], [iter 500 / 613], [train loss 0.66323], [train acc 0.75628]
[epoch 0], [iter 600 / 613], [train loss 0.63032], [train acc 0.76747]
MODEL_EVALUATION: acc = 0.8560363247863249 loss = 0.3773735927210914
[epoch 1], [iter 100 / 613], [train loss 0.41401], [train acc 0.84688]
[epoch 1], [iter 200 / 613], [train loss 0.40121], [train acc 0.84992]
[epoch 1], [iter 300 / 613], [train loss 0.38926], [train acc 0.85448]
[epoch 1], [iter 400 / 613], [train loss 0.38224], [train acc 0.85590]
[epoch 1], [iter 500 / 613], [train loss 0.37378], [train acc 0.85903]
[epoch 1], [iter 600 / 613], [train loss 0.36404], [train acc 0.86273]
MODEL_EVALUATION: acc = 0.8803418803418803 loss = 0.330578593744172
[epoch 2], 

In [11]:
model.evaluate(loader=test_loader, criterion=criterion)

MODEL_EVALUATION: acc = 0.9038194444444445 loss = 0.2955888542864058


(0.2955888542864058, 0.9038194444444445)