In [None]:
# import time
# time.sleep(5000)

In [1]:
competition_name = "deep-learning-spring-2025-project-1"

### Import packages

In [10]:
%load_ext autoreload
%autoreload 2

import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import transforms, models
import optuna
import numpy as np
import pandas as pd
import os

from data_loader import get_cifar10_dataloaders, get_test_dataloader
from trainer import train_model
# from model import ResNet18

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


Configure the device

In [11]:
# Device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print("Using device:", device)

Using device: cpu


In [12]:
# Define the objective function for Optuna
def objective(trial):
    # Suggest hyperparameters
    num_epochs = trial.suggest_int("num_epochs", 1, 1)
    batch_size = trial.suggest_categorical("batch_size", [16, 32, 64])
    learning_rate = trial.suggest_float("learning_rate", 1e-4, 1e-2, log=True)
    optimizer_name = trial.suggest_categorical("optimizer", ["Adam", "SGD", "RMSprop"])
    momentum = trial.suggest_uniform("momentum", 0.5, 0.9) if optimizer_name == "SGD" else None

    # Suggest data transformations
    transform = transforms.Compose([
        # add random crop and padding
        transforms.RandomHorizontalFlip(trial.suggest_float("h_flip", 0.0, 1.0)),
        transforms.RandomRotation(trial.suggest_int("rotation", 0, 30)),
        transforms.ToTensor(),
        transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))  # Normalize with mean and std of CIFAR-10
    ])
    
    train_loader, valid_loader = get_cifar10_dataloaders(
        transform,
        subset_percent=0.0002, 
        valid_size=0.1,
        batch_size=batch_size,
        num_workers=4
    )

    # Define model
    model = models.resnet18(pretrained=False)
    model.fc = nn.Linear(model.fc.in_features, 10)  # CIFAR-10 has 10 classes
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)

    model_name = "resnet18"

    # Print the current hyperparameters, transformations, and model name
    print("-" * 50)
    print(f"Trial {trial.number}:")
    print(f"Model: {model_name}")
    print(f"Epochs: {num_epochs}")
    print(f"Batch Size: {batch_size}")
    print(f"Learning Rate: {learning_rate}")
    print(f"Optimizer: {optimizer_name}")
    if optimizer_name == "SGD":
        print(f"Momentum: {momentum}")
    print(f"Transformations: {transform}")
    print("- " * 25)

    # Define optimizer
    if optimizer_name == "Adam":
        optimizer = optim.Adam(model.parameters(), lr=learning_rate)
    elif optimizer_name == "SGD":
        optimizer = optim.SGD(model.parameters(), lr=learning_rate, momentum=momentum)
    else:
        optimizer = optim.RMSprop(model.parameters(), lr=learning_rate)

    criterion = nn.CrossEntropyLoss()

    # Training
    best_val_accuracy = train_model(
        trial, model, train_loader, criterion, optimizer, 
        valid_loader=valid_loader, num_epochs=num_epochs, device=device)
    
    # Checkpoint the model with the best validation accuracy
    model_filename = f"model_trial_{trial.number}_val_acc_{best_val_accuracy:.4f}.pth"
    model_path = os.path.join("checkpoints", model_filename)
    
    # Create a directory for checkpoints if it doesn't exist
    os.makedirs("checkpoints", exist_ok=True)
    
    # Save the model state_dict
    torch.save(model.state_dict(), model_path)
    print(f"Model checkpoint saved to {model_path}")

    return best_val_accuracy

In [None]:
study = optuna.create_study(direction="maximize")
study.optimize(objective, n_trials=2)

print("Best trial:", study.best_trial.number)
print("Best hyperparameters:", study.best_params)
print("Best validation accuracy:", study.best_value)

[I 2025-03-09 11:50:13,514] A new study created in memory with name: no-name-c6521b0d-deab-4f8f-8c26-360cf0bd73a7
  momentum = trial.suggest_uniform("momentum", 0.5, 0.9) if optimizer_name == "SGD" else None


Image shape: torch.Size([3, 32, 32])
Label: 8
Number of training data: 9
Number of validation data: 1
--------------------------------------------------
Trial 0:
Model: resnet18
Epochs: 1
Batch Size: 32
Learning Rate: 0.0012632204925797196
Optimizer: SGD
Momentum: 0.5259893129532928
Transformations: Compose(
    RandomHorizontalFlip(p=0.2667636437690398)
    RandomRotation(degrees=[-18.0, 18.0], interpolation=nearest, expand=False, fill=0)
    ToTensor()
    Normalize(mean=(0.4914, 0.4822, 0.4465), std=(0.2023, 0.1994, 0.201))
)
- - - - - - - - - - - - - - - - - - - - - - - - - 


[I 2025-03-09 11:50:37,597] Trial 0 finished with value: 0.0 and parameters: {'num_epochs': 1, 'batch_size': 32, 'learning_rate': 0.0012632204925797196, 'optimizer': 'SGD', 'momentum': 0.5259893129532928, 'h_flip': 0.2667636437690398, 'rotation': 18}. Best is trial 0 with value: 0.0.


  Validation Accuracy after Epoch 1: 0.0000
Trial 0 complete. Best Validation Accuracy: 0.0000

Model checkpoint saved to checkpoints/model_trial_0_val_acc_0.0000.pth
Image shape: torch.Size([3, 32, 32])
Label: 8
Number of training data: 9
Number of validation data: 1
--------------------------------------------------
Trial 1:
Model: resnet18
Epochs: 1
Batch Size: 32
Learning Rate: 0.0004578838787322066
Optimizer: SGD
Momentum: 0.7206165683835364
Transformations: Compose(
    RandomHorizontalFlip(p=0.7429052973489064)
    RandomRotation(degrees=[-4.0, 4.0], interpolation=nearest, expand=False, fill=0)
    ToTensor()
    Normalize(mean=(0.4914, 0.4822, 0.4465), std=(0.2023, 0.1994, 0.201))
)
- - - - - - - - - - - - - - - - - - - - - - - - - 


[I 2025-03-09 11:51:01,274] Trial 1 finished with value: 0.0 and parameters: {'num_epochs': 1, 'batch_size': 32, 'learning_rate': 0.0004578838787322066, 'optimizer': 'SGD', 'momentum': 0.7206165683835364, 'h_flip': 0.7429052973489064, 'rotation': 4}. Best is trial 0 with value: 0.0.


  Validation Accuracy after Epoch 1: 0.0000
Trial 1 complete. Best Validation Accuracy: 0.0000

Model checkpoint saved to checkpoints/model_trial_1_val_acc_0.0000.pth
Best trial: FrozenTrial(number=0, state=TrialState.COMPLETE, values=[0.0], datetime_start=datetime.datetime(2025, 3, 9, 11, 50, 13, 514904), datetime_complete=datetime.datetime(2025, 3, 9, 11, 50, 37, 596469), params={'num_epochs': 1, 'batch_size': 32, 'learning_rate': 0.0012632204925797196, 'optimizer': 'SGD', 'momentum': 0.5259893129532928, 'h_flip': 0.2667636437690398, 'rotation': 18}, user_attrs={}, system_attrs={}, intermediate_values={0: 0.0}, distributions={'num_epochs': IntDistribution(high=1, log=False, low=1, step=1), 'batch_size': CategoricalDistribution(choices=(16, 32, 64)), 'learning_rate': FloatDistribution(high=0.01, log=True, low=0.0001, step=None), 'optimizer': CategoricalDistribution(choices=('Adam', 'SGD', 'RMSprop')), 'momentum': FloatDistribution(high=0.9, log=False, low=0.5, step=None), 'h_flip': Fl

### Load Checkpoint

In [None]:
# model = ResNet18().to(device)
# # Load the latest checkpoint
# checkpoint = torch.load(latest_chk_fn)
# model.load_state_dict(checkpoint)

<All keys matched successfully>

In [None]:
# from trainer import evaluate_model
# evaluate_model(model, valid_loader, device)

### Run model on test data

In [None]:
# Generate submission file with test data
test_loader = get_test_dataloader()

model.eval()
predictions = []

with torch.no_grad():
    for images, in test_loader:
        outputs = model(images) 
        _, predicted = torch.max(outputs, 1)
        predictions.extend(predicted.cpu().numpy())

submission = pd.DataFrame({'ID': np.arange(len(predictions)), 'Labels': predictions})
submission.to_csv('submission.csv', index=False)
print("submission file saved.")

In [23]:
# import kaggle
# kaggle.api.competition_submit(
#     file_name="submission.csv",
#     message="test",
#     competition=competition_name
# )