# CIFAR-10 Hyperparameter Tuning with Ray Tune

This notebook demonstrates how to use Ray Tune for hyperparameter optimization with your existing code.

In [7]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
import torchvision.transforms as transforms
import common
import ray

## 1. Load CIFAR-10 Dataset

In [8]:
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)

# Initialize Ray and put datasets in object store to avoid large actor serialization
ray.init(ignore_reinit_error=True)
trainset_ref = ray.put(trainset)
testset_ref = ray.put(testset)

2025-12-16 21:20:50,181	INFO worker.py:1855 -- Calling ray.init() again after it has already been called.


## 2. Define the Model (same as cifar_regular.ipynb)

In [10]:
class Net(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(3, 32, 5)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(32, 64, 5)
        self.fc1 = nn.Linear(64 * 5 * 5, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)
        self.fc4 = nn.Linear(64 * 5 * 5, 10)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = torch.flatten(x, 1)
        x = self.fc4(x)
        return x

## 3. Run Hyperparameter Search

This will try different combinations of:
- Learning rates (1e-4 to 1e-1)
- Batch sizes (32, 64, 128, 256)
- Optimizers (SGD, Adam)
- Momentum values (0.8 to 0.99)
- Weight decay (1e-5 to 1e-2)

In [11]:
# Run Ray Tune hyperparameter search
# num_samples: number of different configurations to try
# max_num_epochs: maximum epochs per trial
results = common.tune_hyperparameters(
    Net, 
    trainset_ref, 
    testset_ref, 
    num_samples=20,  # Try 20 different configurations
    max_num_epochs=6,  # Max 6 epochs per trial
    gpus_per_trial=0  # Set to 1 if you have GPU
)

0,1
Current time:,2025-12-16 21:51:50
Running for:,00:30:51.57
Memory:,11.3/16.0 GiB

Trial name,status,loc,batch_size,lr,momentum,optimizer,weight_decay
train_tune_06a59_00000,PENDING,,64,0.0112082,0.901514,sgd,5.01552e-05
train_tune_06a59_00001,PENDING,,32,0.000141254,0.819222,sgd,0.00383685
train_tune_06a59_00002,PENDING,,128,0.0577233,0.850219,sgd,1.18507e-05
train_tune_06a59_00003,PENDING,,64,0.0205148,0.887425,adam,0.00610642
train_tune_06a59_00004,PENDING,,256,0.00089599,0.954429,sgd,0.000742339
train_tune_06a59_00005,PENDING,,32,0.000565417,0.979436,sgd,0.000605215
train_tune_06a59_00006,PENDING,,128,0.000619493,0.916088,sgd,0.00331863
train_tune_06a59_00007,PENDING,,128,0.00156748,0.866355,sgd,3.36094e-05
train_tune_06a59_00008,PENDING,,128,0.000339513,0.821342,adam,0.00232802
train_tune_06a59_00009,PENDING,,32,0.0548016,0.88233,adam,0.00522649


2025-12-16 21:51:50,774	INFO tune.py:1009 -- Wrote the latest version of all result files and experiment state to '/Users/sajishunnarah/ray_results/train_tune_2025-12-16_21-20-59' in 0.0039s.
2025-12-16 21:51:50,790	INFO tune.py:1041 -- Total run time: 1851.60 seconds (1851.56 seconds for the tuning loop).
Resume experiment with: Tuner.restore(path="/Users/sajishunnarah/ray_results/train_tune_2025-12-16_21-20-59", trainable=...)
- train_tune_06a59_00000: FileNotFoundError('Could not fetch metrics for train_tune_06a59_00000: both result.json and progress.csv were not found at /Users/sajishunnarah/ray_results/train_tune_2025-12-16_21-20-59/train_tune_06a59_00000_0_batch_size=64,lr=0.0112,momentum=0.9015,optimizer=sgd,weight_decay=0.0001_2025-12-16_21-20-59')
- train_tune_06a59_00001: FileNotFoundError('Could not fetch metrics for train_tune_06a59_00001: both result.json and progress.csv were not found at /Users/sajishunnarah/ray_results/train_tune_2025-12-16_21-20-59/train_tune_06a59_000

RuntimeError: No best trial found for the given metric: accuracy. This means that no trial has reported this metric, or all values reported for this metric are NaN. To not ignore NaN values, you can set the `filter_nan_and_inf` arg to False.

## 4. Analyze Results

In [None]:
# Get best trial
best_result = results.get_best_result("accuracy", "max")

print("Best Hyperparameters:")
print(f"  Learning Rate: {best_result.config['lr']:.6f}")
print(f"  Batch Size: {best_result.config['batch_size']}")
print(f"  Optimizer: {best_result.config['optimizer']}")
print(f"  Momentum: {best_result.config['momentum']:.4f}")
print(f"  Weight Decay: {best_result.config['weight_decay']:.6f}")
print(f"\nBest Validation Accuracy: {best_result.metrics['accuracy']:.4f}")

In [None]:
# View results as a dataframe
import pandas as pd
df = results.get_dataframe()

# Show top 10 configurations by accuracy
df_sorted = df.sort_values('accuracy', ascending=False)
print("\nTop 10 Configurations:")
print(df_sorted[['config/lr', 'config/batch_size', 'config/optimizer', 
                 'config/momentum', 'config/weight_decay', 'accuracy', 'loss']].head(10))

## 5. Train Final Model with Best Hyperparameters

Now use the best hyperparameters to train your final model:

In [None]:
# Create final model with best hyperparameters
final_model = Net()

best_config = best_result.config

common.train(
    final_model,
    trainset,
    testset,
    learning_rate=best_config['lr'],
    batch_size=int(best_config['batch_size']),
    optimizer=best_config['optimizer'],
    momentum=best_config['momentum'],
    weight_decay=best_config['weight_decay'],
    num_epochs=10  # Train longer for final model
)

## 6. Save Your Best Model

In [None]:
# Save the trained model
torch.save(final_model.state_dict(), 'cifar_net_tuned.pth')
print("Model saved to cifar_net_tuned.pth")