In [None]:
# import time
# time.sleep(5000)

In [1]:
competition_name = "deep-learning-spring-2025-project-1"

### Import packages

In [1]:
%load_ext autoreload
%autoreload 2

import numpy as np
import pandas as pd
import torch
import torch.nn as nn
from torchvision import transforms
from datetime import datetime

from data_loader import get_cifar10_dataloaders, get_test_dataloader
from model import ResNet18
from trainer import train_model



### Load CIFAR-10 dataset

In [2]:
transform = transforms.Compose([
    transforms.RandomCrop(32, padding=4),  # Random crop with padding
    transforms.RandomHorizontalFlip(),     # Random horizontal flip
    transforms.ToTensor(),                 # Convert to Tensor
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))  # Normalize with mean and std of CIFAR-10
])

In [3]:
train_loader, valid_loader = get_cifar10_dataloaders(
    transform,
    subset_percent=0.0002, 
    valid_size=0.1,
    batch_size=128,
    num_workers=4
)

Image shape: torch.Size([3, 32, 32])
Label: 8
Number of training data: 9
Number of validation data: 1


## Train the model

First, configure the device

In [4]:
# Device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print("Using device:", device)

Using device: cpu


### Setup model

In [5]:
# Hyper parameters
num_epochs = 1
learning_rate = 0.01
weight_decay = 0.001
momentum = 0.9

model = ResNet18().to(device)

# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), 
                            lr=learning_rate, 
                            weight_decay=weight_decay, 
                            momentum=momentum)

# See the total number of trainable parameters
total_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
print(f"Total Trainable Parameters: {total_params}")

Total Trainable Parameters: 4903242


In [6]:
train_model(
    model=model,
    train_loader=train_loader,
    valid_loader=valid_loader,
    criterion=criterion,
    optimizer=optimizer,
    num_epochs=num_epochs,
    device=device,
    log_interval=1
    )

Epoch [1/1], Step [1/1], Loss: 2.3161
Validation Loss: 2.3088, Validation Accuracy: 0.00%
Training complete!


### Checkpoint model

In [None]:
formatted_timestamp = datetime.now().strftime("%Y_%m_%d__%H_%M_%S")
latest_chk_fn = f'checkpoints/model_{formatted_timestamp}.pth'
torch.save(model.state_dict(), latest_chk_fn)

### Load Checkpoint

In [None]:
model = ResNet18().to(device)

# Load the latest checkpoint
checkpoint = torch.load(latest_chk_fn)
model.load_state_dict(checkpoint)

<All keys matched successfully>

In [25]:
# from trainer import evaluate_model
# evaluate_model(model, valid_loader, criterion, device)

### Run model on test data

In [None]:
# Generate submission file with test data
test_loader = get_test_dataloader()

model.eval()
predictions = []

with torch.no_grad():
    for images, in test_loader:
        outputs = model(images) 
        _, predicted = torch.max(outputs, 1)
        predictions.extend(predicted.cpu().numpy())

submission = pd.DataFrame({'ID': np.arange(len(predictions)), 'Labels': predictions})
submission.to_csv('submission.csv', index=False)
print("submission file saved.")

In [23]:
# import kaggle
# kaggle.api.competition_submit(
#     file_name="submission.csv",
#     message="test",
#     competition=competition_name
# )