In [1]:
import torch
import torch.nn as nn
import torch.optim as optim

import load_cifar10

from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

In [2]:
train_loader, val_loader, test_loader = load_cifar10.DatasetandLoader()

Data loaded succesfully! as <class 'torch.Tensor'>
Training data shape: torch.Size([40000, 3, 32, 32])


In [3]:
class MLP(nn.Module):
    def __init__(self, input_size, neuron_structure, num_classes):
        super().__init__()
        # Define first layer
        layers = []
        prev_size = input_size

        # Create hidden layers
        # Neuron Structure is expecting a list
        for neurons in neuron_structure:
            layers.append(nn.Linear(prev_size, neurons))
            layers.append(nn.ReLU())
            prev_size = neurons

        # Define output layer
        layers.append(nn.Linear(prev_size, num_classes))
        
        self.network = nn.Sequential(*layers)

    def forward(self, x):
        return self.network(x)
    


In [4]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

Using device: cuda


In [5]:
# Initialize model, loss function, and optimizer
input_size = 32 * 32 * 3  # 3072 features per image
hidden_layers = [512, 256, 128]  # Three hidden layers
num_classes = 10  # CIFAR-10 has 10 classes

model = MLP(input_size, hidden_layers, num_classes).to(device)
print(model)

# Loss function and Optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

MLP(
  (network): Sequential(
    (0): Linear(in_features=3072, out_features=512, bias=True)
    (1): ReLU()
    (2): Linear(in_features=512, out_features=256, bias=True)
    (3): ReLU()
    (4): Linear(in_features=256, out_features=128, bias=True)
    (5): ReLU()
    (6): Linear(in_features=128, out_features=10, bias=True)
  )
)


In [6]:
def train(model, train_loader, criterion, optimizer, device):
    model.train()
    running_loss = 0.0
    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)

        # Flatten images from (batch, 3, 32, 32) to (batch, 3072)
        images = images.view(images.size(0), -1)

        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

    return running_loss / len(train_loader)

def evaluate(model, test_loader, criterion, device):
    model.eval()
    correct = 0
    total = 0
    test_loss = 0.0

    with torch.no_grad():
        for images, labels in test_loader:
            images, labels = images.to(device), labels.to(device)

            # Flatten images
            images = images.view(images.size(0), -1)

            outputs = model(images)
            loss = criterion(outputs, labels)
            test_loss += loss.item()

            _, predicted = torch.max(outputs, 1)
            correct += (predicted == labels).sum().item()
            total += labels.size(0)

    accuracy = 100 * correct / total
    return test_loss / len(test_loader), accuracy



In [7]:
num_epochs = 1  # Change as needed

for epoch in range(num_epochs):
    train_loss = train(model, train_loader, criterion, optimizer, device)
    test_loss, test_accuracy = evaluate(model, val_loader, criterion, device)

    print(f"Epoch [{epoch+1}/{num_epochs}] | "
          f"Train Loss: {train_loss:.4f} | "
          f"Test Loss: {test_loss:.4f} | "
          f"Test Accuracy: {test_accuracy:.2f}%")


Epoch [1/1] | Train Loss: 1.6965 | Test Loss: 1.5682 | Test Accuracy: 43.78%


# Test in search space

In [8]:
from itertools import product

n_hidden_layers = [1, 2, 4]  # Number of hidden layers
n_neurons_x_layer = [50, 200, 1000]  # Neurons per layer
learning_rate = [10**-3, 10**-4, 10**-5]  # Learning rates

architectures = list(product(n_hidden_layers, n_neurons_x_layer, learning_rate))
print('Total architectures:', len(architectures)) 


Total architectures: 27


In [9]:
# Set device (GPU if available)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Define activation functions dynamically
activation_functions = {
    'relu': nn.ReLU(),
    'tanh': nn.Tanh(),
    'sigmoid': nn.Sigmoid()
}

activation = 'relu'

# Choose activation function (default to ReLU)
activation_fn = activation_functions.get(activation, nn.ReLU())

# Loop over all architecture combinations
results = []

for i, (n_layers, neurons_per_layer, lr) in enumerate(architectures):
    print('Testing architecture:', i+1, '/', len(architectures))
    print(f"\nTraining MLP with {n_layers} layers, {neurons_per_layer} neurons per layer, LR={lr}")

    # Create list of hidden layer sizes
    hidden_layers = [neurons_per_layer] * n_layers

    # Initialize model
    model = MLP(input_size=32*32*3, neuron_structure=hidden_layers, num_classes=10).to(device)

    # Define optimizer and loss function
    optimizer = optim.Adam(model.parameters(), lr=lr)
    criterion = nn.CrossEntropyLoss()

    num_epochs = 1

    train_loss = train(model, train_loader, criterion, optimizer, device)
    test_loss, test_accuracy = evaluate(model, test_loader, criterion, device)

    # print(f"Epoch [1/{num_epochs}] | Train Loss: {train_loss:.4f} | Test Loss: {test_loss:.4f} | Test Acc: {test_accuracy:.2f}%")

    # Store results
    results.append({
        'n_layers': n_layers,
        'neurons_per_layer': neurons_per_layer,
        'learning_rate': lr,
        'test_accuracy': test_accuracy
    })


Testing architecture: 1 / 27

Training MLP with 1 layers, 50 neurons per layer, LR=0.001
Testing architecture: 2 / 27

Training MLP with 1 layers, 50 neurons per layer, LR=0.0001
Testing architecture: 3 / 27

Training MLP with 1 layers, 50 neurons per layer, LR=1e-05
Testing architecture: 4 / 27

Training MLP with 1 layers, 200 neurons per layer, LR=0.001
Testing architecture: 5 / 27

Training MLP with 1 layers, 200 neurons per layer, LR=0.0001
Testing architecture: 6 / 27

Training MLP with 1 layers, 200 neurons per layer, LR=1e-05
Testing architecture: 7 / 27

Training MLP with 1 layers, 1000 neurons per layer, LR=0.001
Testing architecture: 8 / 27

Training MLP with 1 layers, 1000 neurons per layer, LR=0.0001
Testing architecture: 9 / 27

Training MLP with 1 layers, 1000 neurons per layer, LR=1e-05
Testing architecture: 10 / 27

Training MLP with 2 layers, 50 neurons per layer, LR=0.001
Testing architecture: 11 / 27

Training MLP with 2 layers, 50 neurons per layer, LR=0.0001
Testin

In [10]:
# Sort architectures by highest test accuracy
best_architecture = max(results, key=lambda x: x['test_accuracy'])

print("\nBest Architecture:")
print(best_architecture)


Best Architecture:
{'n_layers': 1, 'neurons_per_layer': 1000, 'learning_rate': 0.0001, 'test_accuracy': 46.99}


# Analyze results

In [11]:
def process_architecture_results(df, metric):
    """
    Creates unique IDs for neural network architectures and ranks them by a specified metric.
    
    Parameters:
    -----------
    df : pandas.DataFrame
        DataFrame containing neural network architecture configurations.
        Must have columns 'hidden_layers', 'n_neurons', and 'learning_rate'.
    
    metric : str
        The column name of the metric to use for ranking.
        Higher values are assumed to be better.
    
    Returns:
    --------
    pandas.DataFrame
        A copy of the input DataFrame with two new columns:
        - 'ID': A string combining the architecture parameters
        - 'Ranking': The rank of each architecture based on the metric
    """
    # Create a copy to avoid modifying the original DataFrame
    result_df = df.copy()
    
    # Create an ID for each architecture
    result_df['ID'] = (result_df['n_layers'].astype(str) + '_' + 
                       result_df['neurons_per_layer'].astype(str) + '_' + 
                       result_df['learning_rate'].astype(str))
    
    # Sort by metric score
    result_df = result_df.sort_values(by=metric, ascending=False).reset_index(drop=True)
    
    # Add ranking
    result_df['Ranking'] = result_df.index + 1
    
    return result_df

In [17]:
import pandas as pd
# Make it a dataframe
results_df = pd.DataFrame(results)
results_df = process_architecture_results(results_df, 'test_accuracy')
results_df

Unnamed: 0,n_layers,neurons_per_layer,learning_rate,test_accuracy,ID,Ranking
0,1,1000,0.0001,46.99,1_1000_0.0001,1
1,4,1000,0.0001,46.56,4_1000_0.0001,2
2,2,1000,0.0001,46.45,2_1000_0.0001,3
3,1,200,0.0001,45.13,1_200_0.0001,4
4,2,200,0.0001,44.7,2_200_0.0001,5
5,4,200,0.001,44.55,4_200_0.001,6
6,2,200,0.001,44.52,2_200_0.001,7
7,2,1000,0.001,44.4,2_1000_0.001,8
8,1,200,0.001,44.36,1_200_0.001,9
9,2,50,0.001,44.3,2_50_0.001,10


In [18]:
oe_results = pd.read_excel('One_Epoch_Results.xlsx').rename(columns={'n_neurons': 'neurons_per_layer', 
                                                                     'hidden_layers': 'n_layers'})
oe_results = process_architecture_results(oe_results, 'test_score')
oe_results

Unnamed: 0,n_layers,neurons_per_layer,learning_rate,train_time,train_score,val_score,test_score,confusion_matrices,ID,Ranking
0,4,1000,0.0001,30.728001,0.515425,0.4654,0.4739,[[568 31 30 24 11 42 19 50 163 48]\n [...,4_1000_0.0001,1
1,2,1000,0.0001,20.250511,0.5178,0.4628,0.4706,[[524 78 51 47 31 24 15 59 102 55]\n [...,2_1000_0.0001,2
2,4,1000,0.001,30.415259,0.4686,0.4498,0.4535,[[543 56 80 24 32 27 36 28 76 84]\n [...,4_1000_0.001,3
3,1,1000,0.0001,14.929885,0.495275,0.4504,0.4519,[[481 72 79 24 26 34 10 45 125 90]\n [...,1_1000_0.0001,4
4,2,200,0.001,3.198517,0.4796,0.443,0.4478,[[444 36 60 25 55 28 29 43 144 122]\n [...,2_200_0.001,5
5,2,1000,0.001,20.052437,0.48255,0.4511,0.4468,[[538 97 63 15 35 17 3 46 115 57]\n [...,2_1000_0.001,6
6,4,200,0.001,3.545218,0.471625,0.4434,0.4446,[[454 51 82 23 29 27 28 47 222 23]\n [...,4_200_0.001,7
7,2,200,0.0001,3.232022,0.446875,0.425,0.4272,[[442 73 99 24 14 19 27 47 193 48]\n [...,2_200_0.0001,8
8,1,1000,0.001,16.621384,0.48675,0.4263,0.426,[[311 51 118 35 39 37 27 48 265 55]\n [...,1_1000_0.001,9
9,1,50,0.001,1.178625,0.4512,0.4194,0.4254,[[485 36 58 51 50 24 19 40 166 57]\n [...,1_50_0.001,10


In [19]:
early_stop_results = pd.read_excel("50 Epochs wo Increase_Results.xlsx").rename(columns={'n_neurons': 'neurons_per_layer', 
                                                                     'hidden_layers': 'n_layers'})

early_stop_results = process_architecture_results(early_stop_results, 'test_score')

In [20]:
# Merge the two dataframes to check their rankings
merged_results = pd.merge(results_df, oe_results, on='ID', suffixes=('_torch', '_one_epoch'))
display(merged_results[['Ranking_torch', 'Ranking_one_epoch', 'test_accuracy', 'test_score']])

# Merge now early stop results
merged_results = pd.merge(results_df, early_stop_results, on='ID', suffixes=('_torch', '_early_stop'))
display(merged_results[['Ranking_torch', 'Ranking_early_stop', 'test_accuracy', 'test_score']])

Unnamed: 0,Ranking_torch,Ranking_one_epoch,test_accuracy,test_score
0,1,4,46.99,0.4519
1,2,1,46.56,0.4739
2,3,2,46.45,0.4706
3,4,14,45.13,0.4167
4,5,8,44.7,0.4272
5,6,7,44.55,0.4446
6,7,5,44.52,0.4478
7,8,6,44.4,0.4468
8,9,12,44.36,0.4216
9,10,15,44.3,0.4149


Unnamed: 0,Ranking_torch,Ranking_early_stop,test_accuracy,test_score
0,1,4,46.99,0.481
1,2,1,46.56,0.492
2,3,2,46.45,0.4862
3,4,13,45.13,0.4566
4,5,17,44.7,0.4458
5,6,10,44.55,0.4636
6,7,12,44.52,0.4608
7,8,11,44.4,0.4618
8,9,15,44.36,0.4524
9,10,25,44.3,0.4078
