In [1]:
import numpy
import torch
import torch.utils.data
import torch.optim
import torchvision as vision
import torchvision.transforms.v2 as v2
import tqdm
import matplotlib.pyplot as pyplot

import models

# 1. Classifier Model (2-layer CNN)

## Load Dataset

In [2]:
BATCH_SIZE = 2000
TRAIN_SET_SIZE = 0.8

def target_transform(label: int) -> torch.Tensor:
    """Transform label into one-hot encoded."""
    l = torch.zeros(10)
    l[label] = 1
    return l


dataset = vision.datasets.ImageFolder(
    root="dataset",
    transform=vision.transforms.Compose([
        v2.PILToTensor(),
        v2.ToDtype(torch.float32, scale=True),
    ]),
    target_transform=target_transform)

train_set, test_set = torch.utils.data.random_split(dataset, [TRAIN_SET_SIZE, 1 - TRAIN_SET_SIZE])
train_loader = torch.utils.data.DataLoader(dataset=train_set, batch_size=BATCH_SIZE, shuffle=True)
test_loader = torch.utils.data.DataLoader(dataset=test_set, batch_size=BATCH_SIZE, shuffle=False)

print(f"The dataset has {len(dataset)} samples.")
print(f"The train set has {len(train_set)} samples.")
print(f"The test set has {len(test_set)} samples.")

FileNotFoundError: [WinError 3] The system cannot find the path specified: 'dataset'

## Define Training Hyperparameters

In [12]:
cls = models.Conv2Layers(n_classes=10, image_size=(64, 64)).cuda()

criterion = torch.nn.BCELoss(reduction="mean")
optimizer = torch.optim.Adam(cls.parameters(), lr=1e-3)
    
loss_hist = []

# Train

In [None]:
EPOCHS = 50

for epoch in range(EPOCHS):
    total_loss = 0.0
    for inputs, labels in tqdm.tqdm(train_loader):
        inputs = inputs.cuda()
        labels = labels.cuda()
        y = cls(inputs)
        loss = criterion(y, labels)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    loss_hist.append(total_loss)

pyplot.plot(loss_hist)

## Evaluation

In [None]:
tp = 0
tn = 0
fp = 0
fn = 0

for inputs, labels in tqdm.tqdm(test_loader):
    inputs = inputs.cuda()
    labels = labels.cuda()
    y = cls(inputs)
    acc = torch.argmax(y, dim=1) == torch.argmax(labels, dim=1)
    y = torch.argmax(y, dim=1).to(bool)
    labels = torch.argmax(labels, dim=1).to(bool)
    tp += ((y == True) & (labels == True)).sum()
    tn += ((y == False) & (labels == False)).sum()
    fp += ((y == True) & (labels == False)).sum()
    fn += ((y == False) & (labels == True)).sum()

accuracy = (tp + tn) / (tp + tn + fp + fn)
precision = tp / (tp + fp)
recall = tp / (tp + fn)

print(f"Accuracy = {accuracy}")
print(f"Precision = {precision}")
print(f"Recall = {recall}")

## Save Model

In [None]:
torch.save(
    {
        "model_state_dict": cls.state_dict(),
        "optimizer_state_dict": optimizer.state_dict(),
    },
    "outputs/cls_state.pt")

# 2. Segmentation Model (U-Net)

## Load Dataset

In [None]:
BATCH_SIZE = 2000
TRAIN_SET_SIZE = 0.8

def target_transform_2(label: int) -> torch.Tensor:
    """Transform label into segmentation matrix."""
    l = torch.zeros((10, 64, 64))
    l[label] = torch.ones((64, 64))
    return l


dataset = vision.datasets.ImageFolder(
    root="dataset",
    transform=vision.transforms.Compose([
        v2.PILToTensor(),
        v2.ToDtype(torch.float32, scale=True),
    ]),
    target_transform=target_transform_2)

train_set, test_set = torch.utils.data.random_split(dataset, [TRAIN_SET_SIZE, 1 - TRAIN_SET_SIZE])
train_loader = torch.utils.data.DataLoader(dataset=train_set, batch_size=BATCH_SIZE, shuffle=True)
test_loader = torch.utils.data.DataLoader(dataset=test_set, batch_size=BATCH_SIZE, shuffle=False)

print(f"The dataset has {len(dataset)} samples.")
print(f"The train set has {len(train_set)} samples.")
print(f"The test set has {len(test_set)} samples.")

The dataset has 27000 samples.
The train set has 21601 samples.
The test set has 5399 samples.


## Define Training Hyperparameters

In [13]:
def gaussian_filter(kernel_size, sigma=1, mean=0):
    x, y = numpy.meshgrid(numpy.linspace(-1, 1, kernel_size), numpy.linspace(-1, 1, kernel_size))
    dst = numpy.sqrt(x**2 + y**2)
    gauss = numpy.exp(-((dst - mean)**2 / (2.0 * sigma**2)))
    return gauss

loss_filter = torch.Tensor(gaussian_filter(64)).cuda()

In [4]:
unet = models.UNet(n_classes=10).cuda()

criterion2 = torch.nn.BCELoss(reduction="none")
optimizer2 = torch.optim.Adam(unet.parameters(), lr=1e-3)

loss_hist = []

# Train

In [None]:
EPOCHS = 50

for epoch in range(EPOCHS):
    total_loss = 0.0
    for inputs, labels in tqdm.tqdm(train_loader):
        inputs = inputs.cuda()
        labels = labels.cuda()
        y = unet(inputs)
        loss = criterion2(y, labels)
        loss = (loss * loss_filter).mean()
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    loss_hist.append(total_loss)

pyplot.plot(loss_hist)

## Evaluation

In [11]:
tp = 0
tn = 0
fp = 0
fn = 0

for inputs, labels in tqdm.tqdm(test_loader):
    inputs = inputs.cuda()
    labels = labels.cuda()
    y = unet(inputs)
    y = torch.argmax(y, dim=1).to(bool)
    labels = torch.argmax(labels, dim=1).to(bool)
    tp += ((y == True) & (labels == True)).sum()
    tn += ((y == False) & (labels == False)).sum()
    fp += ((y == True) & (labels == False)).sum()
    fn += ((y == False) & (labels == True)).sum()

accuracy = (tp + tn) / (tp + tn + fp + fn)
precision = tp / (tp + fp)
recall = tp / (tp + fn)

print(f"Accuracy = {accuracy}")
print(f"Precision = {precision}")
print(f"Recall = {recall}")

100%|██████████| 3/3 [00:35<00:00, 11.75s/it]

Accuracy = 0.8680691123008728
Precision = 0.9329958558082581
Recall = 0.9177623987197876





## Save Model

In [None]:
torch.save(
    {
        "model_state_dict": unet.state_dict(),
        "optimizer_state_dict": optimizer.state_dict(),
    },
    "outputs/unet_state.pt")