### Import Libraries

In [2]:
import torch
from torch.utils.data import DataLoader, random_split

from torchvision import datasets, transforms
from torchvision.models import AlexNet

from model_runner import ModelRunner

from utils import get_mean_and_std

### Test for CUDA
Make sure to install the correct CUDA version and packages, see: https://pytorch.org/get-started/locally/

In [3]:
# check if CUDA is available
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(f'Using {device}')

Using cuda


In [4]:
import gc

gc.collect()

torch.cuda.empty_cache()

### Load Images

#### Load the dataset and dataloader
We do this with a basic transformer which doesn't do anything except converting the images to tensors.

In [5]:
images_path = 'data/images'
basic_transforms = transforms.Compose([
    transforms.ToTensor(),
])

untransformed_dataset = datasets.ImageFolder(root=images_path, transform=basic_transforms)

In [6]:
mean, std= get_mean_and_std(untransformed_dataset)
print(f"Untransformed images, mean is {mean}, std is {std}")

==> Computing mean and std..
Untransformed images, mean is tensor([0.4587, 0.5568, 0.4739]), std is tensor([0.3499, 0.2272, 0.2809])


#### Show some images and labels.

In [19]:
enhanced_transforms = transforms.Compose([
    # transforms.Resize((224, 224)),  # Resize images to 224x224 for AlexNet
    transforms.RandomCrop(32, padding=4),
    # transforms.Resize((100, 100)),  # Resize to match the model's input size
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize(mean=mean, std=std),
])

# Dataset with the correctly transformed images
dataset = datasets.ImageFolder(root=images_path, transform=enhanced_transforms)

In [20]:
print(f"Dataset has classes: {dataset.classes} (length: {len(dataset.classes)}) Dataset length: is {len(dataset)}")

mean_updated, std_updated= get_mean_and_std(dataset)
print(f"Transformed images, mean is {mean_updated}, std is {std_updated}")

Dataset has classes: ['Black bishop', 'Black king', 'Black knight', 'Black pawn', 'Black queen', 'Black rook', 'White bishop', 'White king', 'White knight', 'White pawn', 'White queen', 'White rook'] (length: 12) Dataset length: is 300
==> Computing mean and std..
Transformed images, mean is tensor([-0.0534, -0.0052, -0.0156]), std is tensor([0.3581, 0.3869, 0.3755])


### Split Train and Test
We split test 0.2, to train 0.8.

In [21]:
ratio = 0.8
train_size = int(ratio * len(dataset))
test_size = len(dataset) - train_size
train_dataset, test_dataset = random_split(dataset, [train_size, test_size])

#### Create the dataloaders
We set a feasible batch size (Amount of images is 300).
It is important to create the dataloaders after the split!

In [22]:
print(f"Train set length: {len(train_dataset)}, testset length: {len(test_dataset)}")

Train set length: 240, testset length: 60


In [23]:
# Train set is 240 images, test set is 60 images. Batch size to 20
batch_size = 20

train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_dataloader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

### Do the training with AlexNet

In [21]:
# Define the network: AlexNet, num_classes is the number of classes in the dataset (from the dataset directly)
net = AlexNet(num_classes=len(dataset.classes))
net.to(device)

# specify loss function
criterion = torch.nn.CrossEntropyLoss()

# Optimizer and momentum
optimizer = torch.optim.SGD(net.parameters(), lr=0.001, momentum=0.9)

# Number of epochs
num_epochs = 40

runner = ModelRunner(net=net, trainloader=train_dataloader, testloader=test_dataloader, device=device, optimizer=optimizer, criterion=criterion)

for epoch in range(num_epochs):
    runner.train(epoch)
    runner.test(epoch)

RuntimeError: CUDA error: out of memory
CUDA kernel errors might be asynchronously reported at some other API call, so the stacktrace below might be incorrect.
For debugging consider passing CUDA_LAUNCH_BLOCKING=1.
Compile with `TORCH_USE_CUDA_DSA` to enable device-side assertions.


### Do the training with ResNet18

In [24]:
import resnet

# Define the network: ResNet101
net = resnet.ResNet18()
net.to(device)

# specify loss function
criterion = torch.nn.CrossEntropyLoss()

# Optimizer and momentum
optimizer = torch.optim.Adam(net.parameters(), lr=0.01)

# Number of epochs
num_epochs = 100

runner = ModelRunner(net=net, trainloader=train_dataloader, testloader=test_dataloader, device=device, optimizer=optimizer, criterion=criterion)

for epoch in range(num_epochs):
    runner.train(epoch)
    runner.test(epoch)


Epoch: 0
Train accuracy: 11.25
Test accuracy: 11.666666666666666
SAVING! Previous best accuracy: 0. New best accuracy: 11.666666666666666

Epoch: 1
Train accuracy: 6.666666666666667
Test accuracy: 10.0

Epoch: 2
Train accuracy: 8.75
Test accuracy: 11.666666666666666

Epoch: 3
Train accuracy: 12.916666666666666
Test accuracy: 6.666666666666667

Epoch: 4
Train accuracy: 9.166666666666666
Test accuracy: 6.666666666666667

Epoch: 5
Train accuracy: 12.5
Test accuracy: 11.666666666666666

Epoch: 6
Train accuracy: 9.583333333333334
Test accuracy: 13.333333333333334
SAVING! Previous best accuracy: 11.666666666666666. New best accuracy: 13.333333333333334

Epoch: 7
Train accuracy: 14.166666666666666
Test accuracy: 10.0

Epoch: 8
Train accuracy: 11.25
Test accuracy: 11.666666666666666

Epoch: 9
Train accuracy: 14.166666666666666
Test accuracy: 16.666666666666668
SAVING! Previous best accuracy: 13.333333333333334. New best accuracy: 16.666666666666668

Epoch: 10
Train accuracy: 8.75
Test accurac