In [None]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload




In [None]:
import os
import sys
from datetime import datetime
from PIL import Image
import cv2
from sklearn.model_selection import train_test_split
import torch
from torch.utils.data import DataLoader
import lightning as pl
from torch.utils.tensorboard import SummaryWriter
from torchmetrics.classification import Accuracy, AUROC, F1Score

sys.path.append('..')

from src.data.generate import generate_half_circle_image
from src.data.utils import get_image_paths
from src.data.dataset import HalfCircleBinaryClfDataset
from src.data.transforms import TRAIN_TRANSFORMS, TEST_TRANSFORMS
from src.modeling.model import HCCLF



In [2]:
device = torch.device('mps' if torch.mps.is_available() else 'cpu')
device

device(type='mps')

In [3]:
images_filepaths = get_image_paths(directory="/Users/alexandrepoupeau/Documents/work/code/halfcircle-clf/data/")
train_images_filepaths, test_images_filepaths = train_test_split(images_filepaths, test_size=0.2)
train_images_filepaths, val_images_filepaths = train_test_split(train_images_filepaths, test_size=0.2)

train_ds = HalfCircleBinaryClfDataset(images_filepaths=train_images_filepaths, transform=TRAIN_TRANSFORMS)
val_ds = HalfCircleBinaryClfDataset(images_filepaths=val_images_filepaths, transform=TEST_TRANSFORMS)
test_ds = HalfCircleBinaryClfDataset(images_filepaths=test_images_filepaths, transform=TEST_TRANSFORMS)

In [None]:
train_loader = DataLoader(train_ds, batch_size=64, shuffle=False, num_workers=4, persistent_workers=True, prefetch_factor=64)
val_loader = DataLoader(val_ds, batch_size=64, shuffle=False, num_workers=4, persistent_workers=True)
test_loader = DataLoader(test_ds, batch_size=64, shuffle=False, num_workers=2, persistent_workers=False)

In [5]:
test_ds[0][0].dtype

torch.float32

In [6]:
test_ds[6][1]

1.0

In [7]:
model = HCCLF(lr=1e-3).to(device)

In [8]:
optimizer = model.configure_optimizers()
loss_fn = torch.nn.BCELoss()

In [9]:
def train_one_epoch(epoch_index, tb_writer):
    threshold = 0.5
    running_loss = 0.
    running_auroc = 0.
    running_accuracy = 0.
    running_f1 = 0.
    last_loss = 0.
    step_logs = 100

    # Here, we use enumerate(training_loader) instead of
    # iter(training_loader) so that we can track the batch
    # index and do some intra-epoch reporting
    for i, data in enumerate(train_loader):
        # Every data instance is an input + label pair
        inputs, labels = data
        inputs = inputs.to(device)
        labels = labels.unsqueeze(1).to(torch.float32).to(device)

        # Zero your gradients for every batch!
        optimizer.zero_grad()

        # Make predictions for this batch
        outputs = model(inputs)
        preds = (outputs >= threshold).float()

        # Compute the loss and its gradients
        loss = loss_fn(outputs, labels)
        loss.backward()

        # Adjust learning weights
        optimizer.step()

        # Gather data and report
        running_loss += loss.item()
        auroc_value = AUROC("binary").to(device)(outputs, labels)
        running_auroc += auroc_value
        accuracy_value = Accuracy("binary").to(device)(preds, labels)
        running_accuracy += accuracy_value
        f1_value = F1Score("binary").to(device)(preds, labels)
        running_f1 += f1_value

        if i % step_logs == step_logs - 1:
            last_loss = running_loss / step_logs # loss per batch
            last_accuracy = running_accuracy / step_logs
            last_f1 = running_f1 / step_logs
            last_auroc = running_auroc / step_logs

            print('  batch {} loss: {} acc: {} f1: {} auroc: {}'.format(i + 1, last_loss, last_accuracy, last_f1, last_auroc))
            tb_x = epoch_index * len(train_loader) + i + 1
            tb_writer.add_scalar('Loss/train', last_loss, tb_x)

            running_loss = 0.
            running_accuracy = 0.
            running_f1 = 0.
            running_auroc = 0.

    return last_loss, last_accuracy, last_f1, last_auroc

In [None]:
# Initializing in a separate cell so we can easily add more epochs to the same run
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
writer = SummaryWriter('../logs/runs/fashion_trainer_{}'.format(timestamp))
epoch_number = 0

EPOCHS = 5

best_vloss = 1_000_000.

for epoch in range(EPOCHS):
    print('EPOCH {}:'.format(epoch_number + 1))

    # Make sure gradient tracking is on, and do a pass over the data
    model.train(True)
    avg_loss, avg_acc, avg_f1, avg_auroc = train_one_epoch(epoch_number, writer)


    running_vloss = 0.0
    running_vacc = 0.0
    running_vf1 = 0.
    running_vauroc = 0.

    # Set the model to evaluation mode, disabling dropout and using population
    # statistics for batch normalization.
    model.eval()

    # Disable gradient computation and reduce memory consumption.
    with torch.no_grad():
        for i, vdata in enumerate(val_loader):
            vinputs, vlabels = vdata
            vinputs = vinputs.to(device)
            vlabels = vlabels.unsqueeze(1).to(torch.float32).to(device)
            voutputs = model(vinputs)
            vloss = loss_fn(voutputs, vlabels)
            auroc_value = AUROC("binary").to(device)(voutputs, vlabels)
            running_vauroc += auroc_value
            accuracy_value = Accuracy("binary").to(device)(voutputs, vlabels)
            running_vacc += accuracy_value
            f1_value = F1Score("binary").to(device)(voutputs, vlabels)
            running_vf1 += f1_value
            running_vloss += vloss

    avg_vloss = running_vloss / (i + 1)
    avg_vacc = running_vacc / (i + 1)
    avg_vf1 = running_vf1 / (i + 1)
    avg_vauroc = running_vauroc / (i + 1)
    print('LOSS train {} valid {}'.format(avg_loss, avg_vloss))
    print('ACC train {} valid {}'.format(avg_acc, avg_vacc))
    print('F1SCORE train {} valid {}'.format(avg_f1, avg_vf1))
    print('AUROC train {} valid {}'.format(avg_auroc, avg_vauroc))

    # Log the running loss averaged per batch
    # for both training and validation
    writer.add_scalars('Training vs. Validation Loss',
                    { 'Training' : avg_loss, 'Validation' : avg_vloss },
                    epoch_number + 1)
    writer.flush()

    # Track best performance, and save the model's state
    if avg_vloss < best_vloss:
        best_vloss = avg_vloss
        model_path = '../models/model_{}_{}.pt'.format(timestamp, epoch_number)
        torch.save(model.state_dict(), model_path)

    epoch_number += 1

EPOCH 1:




  batch 100 loss: 0.4777050167694688 acc: 0.7626562714576721 f1: 0.5982999205589294 auroc: 0.8762351274490356
  batch 200 loss: 0.11137648088857531 acc: 0.9626562595367432 f1: 0.9585438370704651 auroc: 0.9903776049613953
  batch 300 loss: 0.07534187356010079 acc: 0.9748437404632568 f1: 0.9718748331069946 auroc: 0.9939463138580322
  batch 400 loss: 0.06219064167700708 acc: 0.9789062738418579 f1: 0.9759185314178467 auroc: 0.9964492917060852
LOSS train 0.06219064167700708 valid 0.027745738625526428
ACC train 0.9789062738418579 valid 0.9935024976730347
F1SCORE train 0.9759185314178467 valid 0.9925121068954468
AUROC train 0.9964492917060852 valid 0.999951183795929
EPOCH 2:
  batch 100 loss: 0.049636640921235084 acc: 0.983593761920929 f1: 0.9820197224617004 auroc: 0.9979079365730286
  batch 200 loss: 0.05258568457560614 acc: 0.9820312261581421 f1: 0.9800695776939392 auroc: 0.9981149435043335
  batch 300 loss: 0.030194255633978172 acc: 0.989062488079071 f1: 0.9878215789794922 auroc: 0.9994137

In [11]:
running_vloss = 0.0
running_vacc = 0.0
running_vf1 = 0.
running_vauroc = 0.

model.eval()

# Disable gradient computation and reduce memory consumption.
with torch.no_grad():
    for i, vdata in enumerate(test_loader):
        vinputs, vlabels = vdata
        vinputs = vinputs.to(device)
        vlabels = vlabels.unsqueeze(1).to(torch.float32).to(device)
        voutputs = model(vinputs)
        vloss = loss_fn(voutputs, vlabels)
        auroc_value = AUROC("binary").to(device)(voutputs, vlabels)
        running_vauroc += auroc_value
        accuracy_value = Accuracy("binary").to(device)(voutputs, vlabels)
        running_vacc += accuracy_value
        f1_value = F1Score("binary").to(device)(voutputs, vlabels)
        running_vf1 += f1_value
        running_vloss += vloss

avg_vloss = running_vloss / (i + 1)
avg_vacc = running_vacc / (i + 1)
avg_vf1 = running_vf1 / (i + 1)
avg_vauroc = running_vauroc / (i + 1)
print('LOSS test {}'.format(avg_vloss))
print('ACC test {}'.format(avg_vacc))
print('F1SCORE test {}'.format(avg_vf1))
print('AUROC test {}'.format(avg_vauroc))

LOSS test 0.0015394717920571566
ACC test 1.0
F1SCORE test 1.0
AUROC test 1.0
