In [None]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from PIL import Image
import numpy as np
import torch
from sklearn.model_selection import train_test_split
from torch.utils.data import Dataset, DataLoader
from typing import List
import torch.nn as nn
import torch.nn.functional as F
from tqdm.auto import tqdm
tqdm.pandas()

from cfr10_dataset import Cifar10Dataset, load_data, collate_fn

In [None]:
data_dir = '../prac3/cifar-10/'
train_val_csv = os.path.join(data_dir, 'trainLabels.csv')
train_imgs_dir = os.path.join(data_dir, 'train')

df = pd.read_csv(train_val_csv)

df.head(5)

In [None]:
# let's look at the first image
img_id = 1
img_fname = os.path.join(train_imgs_dir, f'{img_id}.png')
img_fname

In [None]:
img = Image.open(img_fname)

In [None]:
np.array(img).shape

In [None]:
img.resize((256,256))

In [None]:
np.array(img).shape

In [None]:
np.array(img).dtype, np.array(img).min(), np.array(img).max()

### Main pipeline

In [None]:
class ClfModel(torch.nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(3, 6, 5)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(6, 16, 5)
        self.fc1 = nn.Linear(16 * 5 * 5, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = torch.flatten(x, 1) # flatten all dimensions except batch
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x


In [None]:
def init_model(lr):
    model = ClfModel()
    criterion = torch.nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)
    return model, criterion, optimizer

def validate(model, criterion, loader):
    model.eval()
    total_loss = 0
    with torch.no_grad():
        for fnames, images, labels in tqdm(loader, desc="val"):
            outputs = model(images)
            loss = criterion(outputs, labels)
            total_loss += loss.item()
    return total_loss / len(loader)

def train(model, criterion, optimizer, loader, val_loader, epochs):
    model.train()
    train_losses = []
    val_losses = []
    for epoch in range(epochs):
        total_loss = 0
        for batch in tqdm(loader, desc="train"):
            fnames, images, labels = batch
            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            total_loss += loss.item()

        train_losses.append(total_loss/len(loader))
        
            
            
        val_loss = validate(model, criterion, val_loader)
        val_losses.append(val_loss)
        
        print(f"Epoch {epoch+1}/{epochs}, Train Loss: {total_loss / len(loader)}, Val Loss: {val_loss}")
            
    return train_losses, val_losses

In [None]:
def run(train_val_csv, train_imgs_dir, test_imgs_dir, batch_size, lr, epochs, num_workers, num_samples):
    train_loader, val_loader, test_loader = load_data(train_val_csv, train_imgs_dir, test_imgs_dir, batch_size, num_workers, num_samples)
    model, criterion, optimizer = init_model(lr)
    train_losses, val_losses = train(model, criterion, optimizer, train_loader, val_loader, epochs)
    val_loss = validate(model, criterion, val_loader)
    print(f"Validation loss: {val_loss}")
    return train_losses, val_losses

In [None]:
data_dir = '../prac3/cifar-10/'
train_val_csv = os.path.join(data_dir, 'trainLabels.csv')
train_imgs_dir = os.path.join(data_dir, 'train')
test_imgs_dir = os.path.join(data_dir, 'test')
batch_size = 32
lr = 0.001
epochs = 50
num_workers = 0
num_samples = -1

run(train_val_csv, train_imgs_dir, test_imgs_dir, batch_size, lr, epochs, num_workers, num_samples)