In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from torchvision import datasets, transforms
import itertools
import os


In [None]:
class Net(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, output_size):
        super(Net, self).__init__()
        layers = []
        layers.append(nn.Linear(input_size, hidden_size))
        layers.append(nn.ReLU())
        for _ in range(num_layers - 1):
            layers.append(nn.Linear(hidden_size, hidden_size))
            layers.append(nn.ReLU())
        layers.append(nn.Linear(hidden_size, output_size))
        self.network = nn.Sequential(*layers)
        
    def forward(self, x):
        return self.network(x)


In [None]:
def create_and_train_model(hyperparams, train_loader, val_loader, model_save_path):
    # Unpack hyperparameters
    input_size = hyperparams['input_size']
    hidden_size = hyperparams['hidden_size']
    num_layers = hyperparams['num_layers']
    output_size = hyperparams['output_size']
    learning_rate = hyperparams['learning_rate']
    num_epochs = hyperparams['num_epochs']
    batch_size = hyperparams['batch_size']
    
    # Initialize the model, loss function, and optimizer
    model = Net(input_size, hidden_size, num_layers, output_size)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)
    
    # Training loop
    for epoch in range(num_epochs):
        model.train()
        total_loss = 0
        for inputs, labels in train_loader:
            inputs = inputs.view(-1, 28*28)  # Flatten images
            # Zero the parameter gradients
            optimizer.zero_grad()
            # Forward pass
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            # Backward pass and optimization
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
        avg_loss = total_loss / len(train_loader)
        
        # Validation loop
        model.eval()
        correct = 0
        total = 0
        with torch.no_grad():
            for inputs, labels in val_loader:
                inputs = inputs.view(-1, 28*28)
                outputs = model(inputs)
                _, predicted = torch.max(outputs.data, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()
        accuracy = 100 * correct / total
        
        print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {avg_loss:.4f}, Validation Accuracy: {accuracy:.2f}%")
    
    # Save the trained model
    torch.save(model.state_dict(), model_save_path)


In [None]:
# Define hyperparameter options
hyperparams_options = {
    'input_size': [28*28],               # MNIST images flattened
    'hidden_size': [64, 128],
    'num_layers': [1, 2],
    'output_size': [10],                 # 10 classes for digits 0-9
    'learning_rate': [0.01, 0.001],
    'num_epochs': [5],
    'batch_size': [64, 128]
}

# Generate all combinations of hyperparameters
keys, values = zip(*hyperparams_options.items())
hyperparams_combinations = [dict(zip(keys, v)) for v in itertools.product(*values)]


In [None]:
# Define transformations for the training and validation sets
transform = transforms.Compose([
    transforms.ToTensor(),               # Convert images to PyTorch tensors
    transforms.Normalize((0.1307,), (0.3081,))  # Normalize with mean and std
])

# Download and load the training data
train_dataset = datasets.MNIST(root='mnist_data', train=True, download=True, transform=transform)

# Download and load the test data
test_dataset = datasets.MNIST(root='mnist_data', train=False, download=True, transform=transform)


In [None]:
# Directory to save models
model_dir = 'saved_models'
os.makedirs(model_dir, exist_ok=True)

for idx, hyperparams in enumerate(hyperparams_combinations):
    print(f"\nTraining model {idx+1}/{len(hyperparams_combinations)} with hyperparameters: {hyperparams}")
    model_save_path = os.path.join(model_dir, f'model_{idx}.pth')
    create_and_train_model(hyperparams, train_data, train_labels, model_save_path)


In [None]:
# Example of loading a model for analysis
def load_model(model_path, hyperparams):
    model = Net(hyperparams['input_size'], hyperparams['hidden_size'],
                hyperparams['num_layers'], hyperparams['output_size'])
    model.load_state_dict(torch.load(model_path))
    model.eval()
    return model

# Load a specific model
model_idx = 0  # Change as needed
model_path = os.path.join(model_dir, f'model_{model_idx}.pth')
loaded_model = load_model(model_path, hyperparams_combinations[model_idx])

# Perform evaluation or analysis with loaded_model


In [None]:
from torch.utils.data.sampler import SubsetRandomSampler
import numpy as np

# Creating data indices for training and validation splits:
dataset_size = len(train_dataset)
indices = list(range(dataset_size))
split = int(np.floor(0.1 * dataset_size))  # 10% for validation
np.random.shuffle(indices)

train_indices, val_indices = indices[split:], indices[:split]

# Creating PT data samplers and loaders:
train_sampler = SubsetRandomSampler(train_indices)
val_sampler = SubsetRandomSampler(val_indices)

# Create DataLoaders
train_loader = DataLoader(train_dataset, batch_size=hyperparams_options['batch_size'][0], sampler=train_sampler)
val_loader = DataLoader(train_dataset, batch_size=hyperparams_options['batch_size'][0], sampler=val_sampler)
