In [5]:
# import time
# time.sleep(5000)

In [1]:
competition_name = "deep-learning-spring-2025-project-1"

### Import packages

In [13]:
%load_ext autoreload
%autoreload 2

import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import transforms, models
import optuna
import numpy as np
import pandas as pd
from datetime import datetime
import os

from data_loader import get_cifar10_dataloaders, get_test_dataloader
from trainer import train_model
# from model import ResNet18

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


Configure the device

In [14]:
# Device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print("Using device:", device)

Using device: cuda


In [17]:
# Define the objective function for Optuna
def objective(trial):
    # Suggest hyperparameters
    num_epochs = trial.suggest_int("num_epochs", 10, 50)
    batch_size = trial.suggest_categorical("batch_size", [32, 64, 128, 256, 512])
    learning_rate = trial.suggest_float("learning_rate", 1e-4, 1e-2, log=True)
    optimizer_name = trial.suggest_categorical("optimizer", ["Adam", "SGD", "RMSprop"])
    momentum = trial.suggest_uniform("momentum", 0.5, 0.9) if optimizer_name == "SGD" else None

    # Suggest data transformations
    transform = transforms.Compose([
        # add random crop and padding
        transforms.RandomHorizontalFlip(trial.suggest_float("h_flip", 0.0, 1.0)),
        transforms.RandomRotation(trial.suggest_int("rotation", 0, 30)),
        transforms.ToTensor(),
        transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))  # Normalize with mean and std of CIFAR-10
    ])
    
    train_loader, valid_loader = get_cifar10_dataloaders(
        transform,
        subset_percent=1.0, 
        valid_size=0.1,
        batch_size=batch_size,
        num_workers=4
    )

    # Define model
    model = models.resnet18(pretrained=False)
    model.fc = nn.Linear(model.fc.in_features, 10)  # CIFAR-10 has 10 classes
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)

    model_name = "resnet18"

    # Print the current hyperparameters, transformations, and model name
    print("-" * 50)
    print(f"Trial {trial.number}:")
    print(f"Model: {model_name}")
    print(f"Epochs: {num_epochs}")
    print(f"Batch Size: {batch_size}")
    print(f"Learning Rate: {learning_rate}")
    print(f"Optimizer: {optimizer_name}")
    if optimizer_name == "SGD":
        print(f"Momentum: {momentum}")
    print(f"Transformations: {transform}")
    print("- " * 25)

    # Define optimizer
    if optimizer_name == "Adam":
        optimizer = optim.Adam(model.parameters(), lr=learning_rate)
    elif optimizer_name == "SGD":
        optimizer = optim.SGD(model.parameters(), lr=learning_rate, momentum=momentum)
    else:
        optimizer = optim.RMSprop(model.parameters(), lr=learning_rate)

    criterion = nn.CrossEntropyLoss()

    # Training
    best_val_accuracy = train_model(
        trial, model, train_loader, criterion, optimizer, 
        valid_loader=valid_loader, num_epochs=num_epochs, device=device)
    
    # Checkpoint the model with the best validation accuracy
    model_filename = f"model_trial_{trial.number}_val_acc_{best_val_accuracy:.4f}.pth"
    study_name = trial.study.study_name
    checkpoint_dir = f"checkpoints_{study_name}"
    model_path = os.path.join(checkpoint_dir, model_filename)
    
    # Create a directory for checkpoints if it doesn't exist
    os.makedirs(checkpoint_dir, exist_ok=True)
    
    # Save the model state_dict
    torch.save(model.state_dict(), model_path)
    print(f"Model checkpoint saved to {model_path}")

    return best_val_accuracy

In [None]:
timestamp = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
study_name = f"study_{timestamp}"

study = optuna.create_study(direction="maximize", study_name=study_name)
study.optimize(objective, n_trials=20)

print("Best trial:", study.best_trial.number)
print("Best hyperparameters:", study.best_params)
print("Best validation accuracy:", study.best_value)

[I 2025-03-09 12:31:02,851] A new study created in memory with name: study_2025-03-09_12-31-02


Image shape: torch.Size([3, 32, 32])
Label: 1
Number of training data: 45000
Number of validation data: 5000
--------------------------------------------------
Trial 0:
Model: resnet18
Epochs: 24
Batch Size: 64
Learning Rate: 0.001101960409207437
Optimizer: Adam
Transformations: Compose(
    RandomHorizontalFlip(p=0.7723181767062807)
    RandomRotation(degrees=[-23.0, 23.0], interpolation=nearest, expand=False, fill=0)
    ToTensor()
    Normalize(mean=(0.4914, 0.4822, 0.4465), std=(0.2023, 0.1994, 0.201))
)
- - - - - - - - - - - - - - - - - - - - - - - - - 
  Epoch [1/24], Batch [100/704], Loss: 1.5933
  Epoch [1/24], Batch [200/704], Loss: 1.5934
  Epoch [1/24], Batch [300/704], Loss: 1.6876
  Epoch [1/24], Batch [400/704], Loss: 1.2799
  Epoch [1/24], Batch [500/704], Loss: 1.7427
  Epoch [1/24], Batch [600/704], Loss: 1.3450
  Epoch [1/24], Batch [700/704], Loss: 1.2652
  Validation Accuracy after Epoch 1: 0.5380
  Epoch [2/24], Batch [100/704], Loss: 1.1431
  Epoch [2/24], Batch [

### Load Checkpoint

In [11]:
model = models.resnet18(pretrained=False)
model.fc = nn.Linear(model.fc.in_features, 10)  # CIFAR-10 has 10 classes
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

# Load the latest checkpoint
checkpoint_dir = f"checkpoints_{study_name}"
checkpoint = torch.load(f"{checkpoint_dir}/model_trial_0_val_acc_0.0000.pth")
model.load_state_dict(checkpoint)

<All keys matched successfully>

In [12]:
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))  # Normalize with mean and std of CIFAR-10
])

from trainer import evaluate_model
_, valid_loader = get_cifar10_dataloaders(
    transform,
    subset_percent=0.0002, 
    valid_size=0.1,
    batch_size=128,
    num_workers=4
)

acc = evaluate_model(model, valid_loader, device)
print("Acc:", acc)

Image shape: torch.Size([3, 32, 32])
Label: 8
Number of training data: 9
Number of validation data: 1
Acc: 0.0


### Run model on test data

In [None]:
# Generate submission file with test data
test_loader = get_test_dataloader()

model.eval()
predictions = []

with torch.no_grad():
    for images, in test_loader:
        outputs = model(images) 
        _, predicted = torch.max(outputs, 1)
        predictions.extend(predicted.cpu().numpy())

submission = pd.DataFrame({'ID': np.arange(len(predictions)), 'Labels': predictions})
submission.to_csv('submission.csv', index=False)
print("submission file saved.")

In [23]:
# import kaggle
# kaggle.api.competition_submit(
#     file_name="submission.csv",
#     message="test",
#     competition=competition_name
# )