In [None]:
%pip install torch torchvision optuna "optuna[visualization]" plotly 

In [None]:
%pip install --upgrade nbformat

In [None]:
pip install scikit-learn

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader

In [None]:
import torch
import optuna
print(torch.backends.mps.is_available())  # Should return True on supported macOS + Apple Silicon
print(torch.backends.mps.is_built())      # Should also return True if MPS support is built


In [None]:
import torch

if torch.backends.mps.is_available():
    device = torch.device("mps")
else:
    device = torch.device("cpu")

print("Using device:", device)

In [4]:
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.1307,), (0.3081,))
])

train_dataset = datasets.MNIST(root='./data', train=True, 
                               download=True, transform=transform)
val_dataset = datasets.MNIST(root='./data', train=False, 
                             download=True, transform=transform)

# We will split the test set into "validation" for simplicity.
# In a real scenario, you might keep a separate validation set
# or use cross-validation techniques.
val_loader = DataLoader(val_dataset, batch_size=1000, shuffle=False)

In [5]:
# -------------------------------------------------------------------------
# 2. Model Definition
# -------------------------------------------------------------------------
class SimpleNet(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
        super(SimpleNet, self).__init__()
        self.fc1 = nn.Linear(input_dim, hidden_dim)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(hidden_dim, output_dim)
        
    def forward(self, x):
        # Flatten the input from (B, 1, 28, 28) to (B, 784)
        x = x.view(x.size(0), -1)
        x = self.fc1(x)
        x = self.relu(x)
        x = self.fc2(x)
        return x


In [6]:
# -------------------------------------------------------------------------
# 3. Training & Validation Functions
# -------------------------------------------------------------------------
def train_one_epoch(model, dataloader, criterion, optimizer, device):
    model.train()
    total_loss = 0.0
    
    for images, labels in dataloader:
        images, labels = images.to(device), labels.to(device)
        
        # Forward pass
        outputs = model(images)
        loss = criterion(outputs, labels)
        
        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        total_loss += loss.item()
    
    return total_loss / len(dataloader)

In [7]:
def validate(model, val_loader, device):
    model.eval()
    correct = 0
    total = 0
    
    with torch.no_grad():
        for images, labels in val_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    
    accuracy = 100 * correct / total
    return accuracy

In [8]:
def train_and_evaluate(hparams):
    """
    hparams: dictionary containing hyperparameters such as:
        {
          'learning_rate': float,
          'hidden_size': int,
          'batch_size': int,
          'epochs': int
        }
    """
    
    # Create the model
    model = SimpleNet(input_dim=784, 
                      hidden_dim=hparams['hidden_size'], 
                      output_dim=10).to(device)
    
    # Prepare DataLoader with the chosen batch size
    train_loader = DataLoader(train_dataset, 
                              batch_size=hparams['batch_size'], 
                              shuffle=True)
    
    # Define loss function and optimizer
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=hparams['learning_rate'])
    
    # Training loop
    for epoch in range(hparams['epochs']):
        train_loss = train_one_epoch(model, train_loader, criterion, optimizer, device)
        val_acc = validate(model, val_loader, device)
        print(f"Epoch [{epoch+1}/{hparams['epochs']}], "
              f"Train Loss: {train_loss:.4f}, Validation Accuracy: {val_acc:.2f}%")
    
    # Final validation accuracy after training
    final_val_acc = validate(model, val_loader, device)
    return final_val_acc

In [9]:
def objective(trial):
    # --------------------------------------
    # 1. Suggest hyperparameters
    # --------------------------------------
    learning_rate = trial.suggest_float("learning_rate", 1e-4, 1e-1, log=True)
    hidden_size = trial.suggest_categorical("hidden_size", [128, 256, 512])
    batch_size = trial.suggest_categorical("batch_size", [32, 64, 128])
    epochs = 3  # Or make this a trial suggestion if you want to optimize epochs as well


    model = SimpleNet(input_dim=784, hidden_dim=hidden_size, output_dim=10).to(device)

    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)

    # --------------------------------------
    # 3. Training loop
    # --------------------------------------
    for epoch in range(epochs):
        train_loss = train_one_epoch(model, train_loader, criterion, optimizer, device)
        val_acc = validate(model, val_loader, device)
        # (Optional) Report intermediate results to Optuna
        trial.report(val_acc, step=epoch)

        # If the performance is getting worse or stuck, we can use
        # Optuna's "pruning" to stop early. This is optional but useful
        # to speed up the search.
        if trial.should_prune():
            raise optuna.exceptions.TrialPruned()

    # --------------------------------------
    # 4. Final validation accuracy
    # --------------------------------------
    final_val_acc = validate(model, val_loader, device)
    return final_val_acc


In [None]:
if __name__ == "__main__":
    # Create a study object (Optuna tries to maximize or minimize an objective)
    # We use "maximize" because we're trying to maximize validation accuracy
    study = optuna.create_study(direction="maximize")
    
    # Launch the hyperparameter search. 
    # n_trials determines how many sets of hyperparameters to try.
    study.optimize(objective, n_trials=5, timeout=None)
    import optuna.visualization as viz

    fig = viz.plot_optimization_history(study)
    fig.show()
    fig = viz.plot_parallel_coordinate(study)
    fig.show()
    fig = viz.plot_slice(study)
    fig.show()
    fig = viz.plot_param_importances(study)
    fig.show()
    

    # Print results
    print("Number of finished trials:", len(study.trials))
    print("Best trial:")
    best_trial = study.best_trial
    
    print(f"  Value (Accuracy): {best_trial.value}")
    print("  Params:")
    for key, value in best_trial.params.items():
        print(f"    {key}: {value}")
