In [1]:
# import time
# time.sleep(5000)

In [1]:
competition_name = "deep-learning-spring-2025-project-1"

### Import packages

In [2]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
from torchvision import transforms
import pickle
import os

from dataloaders import download_kaggle_dataset, get_train_dataloaders, get_test_dataloader

### Load CIFAR-10 dataset

In [3]:
# download_kaggle_dataset()

In [4]:
# Data augmentation and normalization
transform = transforms.Compose([
    transforms.ToPILImage(),  # Convert numpy array to PIL Image
    transforms.RandomRotation(10),
    transforms.ColorJitter(brightness = 0.1,contrast = 0.1,saturation = 0.1),
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.RandomAdjustSharpness(sharpness_factor = 2,p = 0.2),
    transforms.RandomCrop(32, padding=4),
    transforms.ToTensor(),
    #transforms.Normalize((0.4914, 0.4822, 0.4465), (0.247, 0.243, 0.261)),
    transforms.RandomErasing(p=0.2,scale=(0.02, 0.1),value=1.0, inplace=False)
])

In [5]:
batch_size = 256

train_loader, valid_loader = get_train_dataloaders(
    transform,
    subset_percent=1, 
    train_percent=0.8,
    batch_size=batch_size
)

Image shape: torch.Size([3, 32, 32])
Label: 9
Number of training data: 40000
Number of validation data: 10000


## Train the model

First, configure the device

In [6]:
# Device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print("Using device:", device)

Using device: cuda


### Set the hyperparameters

In [7]:
num_classes = 10
num_epochs = 40
learning_rate = 0.01
weight_decay = 0.001
momentum = 0.9

### Setup model

In [8]:
from model import ResNet18

In [10]:
model = ResNet18().to(device)
# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), 
                            lr=learning_rate, 
                            weight_decay=weight_decay, 
                            momentum=momentum)

# Train the model
total_step = len(train_loader)
print(total_step)

# See the total number of trainable parameters
total_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
print(f"Total Trainable Parameters: {total_params}")

157
Total Trainable Parameters: 4903242


### Training

In [11]:
import gc

for epoch in range(num_epochs):
    for i, (images, labels) in enumerate(train_loader):
        # Move tensors to the configured device
        images = images.to(device)
        labels = labels.to(device)
        
        # Forward pass
        outputs = model(images)
        loss = criterion(outputs, labels)
        
        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        del images, labels, outputs
        torch.cuda.empty_cache()
        gc.collect()

        if i % 1000 == 0:
            print ('Epoch [{}/{}], Loss: {:.4f}'
                            .format(epoch+1, num_epochs, loss.item()))
    
    # Validation
    with torch.no_grad():
      correct = 0
      total = 0
      for images, labels in valid_loader:
        images = images.to(device)
        labels = labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
        del images, labels, outputs
    
    print('Accuracy of the network on the {} validation images: {} %'.format(5000, 100 * correct / total))

Epoch [1/40], Loss: 2.3902
Accuracy of the network on the 5000 validation images: 50.3 %
Epoch [2/40], Loss: 1.3660
Accuracy of the network on the 5000 validation images: 56.9 %
Epoch [3/40], Loss: 1.2717
Accuracy of the network on the 5000 validation images: 63.72 %
Epoch [4/40], Loss: 0.8305
Accuracy of the network on the 5000 validation images: 65.63 %
Epoch [5/40], Loss: 0.7951
Accuracy of the network on the 5000 validation images: 68.05 %
Epoch [6/40], Loss: 0.6168
Accuracy of the network on the 5000 validation images: 68.31 %
Epoch [7/40], Loss: 0.4294
Accuracy of the network on the 5000 validation images: 67.84 %
Epoch [8/40], Loss: 0.3282
Accuracy of the network on the 5000 validation images: 67.97 %
Epoch [9/40], Loss: 0.2493
Accuracy of the network on the 5000 validation images: 69.91 %
Epoch [10/40], Loss: 0.1405
Accuracy of the network on the 5000 validation images: 69.82 %
Epoch [11/40], Loss: 0.0758
Accuracy of the network on the 5000 validation images: 69.88 %
Epoch [12/

### Checkpoint model

In [12]:
from datetime import datetime

In [13]:
formatted_timestamp = datetime.now().strftime("%Y_%m_%d__%H_%M_%S")
torch.save(model.state_dict(), f'checkpoints/model_{formatted_timestamp}.pth')

### Load Checkpoint

In [15]:
from model import ResNet18
model = ResNet18().to(device)

In [16]:
# Load the best checkpoint
checkpoint = torch.load('checkpoints/model_2025_03_07__23_21_39.pth')
model.load_state_dict(checkpoint)

<All keys matched successfully>

In [17]:
# Validation
with torch.no_grad():
  correct = 0
  total = 0
  for images, labels in valid_loader:
    images = images.to(device)
    labels = labels.to(device)
    outputs = model(images)
    _, predicted = torch.max(outputs.data, 1)
    total += labels.size(0)
    correct += (predicted == labels).sum().item()
    del images, labels, outputs

print('Accuracy of the network on the {} validation images: {} %'.format(5000, 100 * correct / total))

Accuracy of the network on the 5000 validation images: 72.48 %


### Testing

In [18]:
test_loader = get_test_dataloader(transform)

In [19]:
# Generate submission file
model.eval()
predictions = []
with torch.no_grad():
    for images in test_loader:
        images = images.to(device) 
        outputs = model(images) 
        _, predicted = torch.max(outputs, 1)
        predictions.extend(predicted.cpu().numpy())

submission = pd.DataFrame({'ID': np.arange(len(predictions)), 'Labels': predictions})
submission.to_csv('submission.csv', index=False)
print("Submission file saved.")

Submission file saved.


In [23]:
# import kaggle
# kaggle.api.competition_submit(
#     file_name="submission.csv",
#     message="test",
#     competition=competition_name
# )