In [None]:
import os 
import time
import pickle
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

from itertools import product

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
import torchvision.transforms as transforms
import torchvision

from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import confusion_matrix,accuracy_score



In [2]:
def unpickle(file):
    with open(file, 'rb') as fo:
        dict = pickle.load(fo, encoding='bytes')
    return dict

# Load CIFAR-10 dataset

The CIFAR-10 dataset consists of images that are 32x32 pixels in size and have 3 color channels (Red, Green, Blue)  
Each image in the CIFAR-10 dataset is represented as a 1D array of length 3072 (32 * 32 * 3).
The first 1024 values correspond to the Red channel, the next 1024 values correspond to the Green channel, and the last 1024 values correspond to the Blue channel.

The common format for image data in deep learning frameworks like PyTorch is:

Shape: (num_samples, channels, height, width)

In [3]:
# Initialize variables
X_train, y_train = [], []
X_test, y_test = [], []

# Load all the paths of the pickle files
cifar_path = "CIFAR-10"
files_path = os.listdir(cifar_path)

# Load training data
for file in files_path: 
  filepath = os.path.join(cifar_path, file)
  if file.startswith("data_batch"):
    temp_dict = unpickle(filepath)
    X_train.extend(temp_dict[b'data'])
    y_train.extend(temp_dict[b'labels'])
    print(len(X_train), len(y_train))

# Load testing data
for file in files_path:
    filepath = os.path.join(cifar_path, file)
    if file.startswith("test_batch"):
        temp_dict = unpickle(filepath)
        X_test.append(temp_dict[b'data'])
        y_test.extend(temp_dict[b'labels'])



# Turn into numpy array 
X_train, y_train = np.array(X_train), np.array(y_train)
X_test, y_test = np.array(X_test), np.array(y_test)


# Reshape the data
# X_train = np.vstack(X_train).reshape(-1, 3, 32, 32) #-1 is the number of samples/images, 3 is the channnels, 32 is the height and 32 is the width
X_train = np.vstack(X_train)

# X_test = np.vstack(X_test).reshape(-1, 3, 32, 32)
X_test= np.vstack(X_test)

# Scale the data
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)


print(X_train.shape, y_train.shape)

10000 10000
20000 20000
30000 30000
40000 40000
50000 50000
(50000, 3072) (50000,)


In [None]:
# Create X_val set
X_train, X_val, y_train, y_val = train_test_split(X_train_scaled, y_train, test_size=0.2, random_state=13)
X_train.shape, X_val.shape

((40000, 3072), (10000, 3072))

In [5]:
# import matplotlib.pyplot as plt

# # Select an image index
# index = 13

# # Get the image
# image = X_train[index]

# # Plot it
# plt.figure(figsize=(1, 1))  
# plt.imshow(image.reshape(0, 2, 3, 1))
# plt.axis("off")
# plt.show()


For PyTorch: Image data in the format (num_samples, channels, height, width) for training and inference.  
For Visualization: transpose(1, 2, 0) is required to convert the image to the format (height, width, channels) when using plt.imshow().

# Search Space

In [33]:
def fibonacci(n):
    fib_sequence = [0, 1]
    while True:
        next_fib = fib_sequence[-1] + fib_sequence[-2]
        if next_fib > n:
            break
        fib_sequence.append(next_fib)
    
    return fib_sequence[2:]  # Exclude 0 and 1


In [46]:
n_hidden_layers = fibonacci(5)
n_neurons_x_layer = [50, 100, 200, 500, 1000]
learning_rate = [10**-3, 10**-4, 10**-5]
activation = 'relu'
solver='adam'

# se hacen todas las combinaciones
configurations = list(product(n_hidden_layers, n_neurons_x_layer, learning_rate))
print('Total de configuraciones:', len(configurations))

configuration_results = {
    'hidden_layers': [],
    'n_neurons': [],
    'learning_rate': [],
    'train_time': [],
    'train_score': [],
    'val_score': [],  # stores only the last validation score after each epoch? 
    'test_score': [],  # stores only the last validation score after each epoch? 
    'confusion_matrices': [],
}



Total de configuraciones: 60


In [37]:
for i, (h, n, lr) in enumerate(configurations):
    print(i + 1, '/', len(configurations))
    print('Hidden Layers: {}, # Neurons: {}, Learning rate: {}'.format(h, n, lr))
    # definir estructura de neurona
    neuron_structure = (np.ones(h) * n).astype(int)

    # Entrenar NN
    mlp = MLPClassifier(
        hidden_layer_sizes=(neuron_structure),
        activation='relu',
        solver='adam',
        learning_rate_init=lr
    )

    # Start timing
    start_time = time.time()
    
    # Train for one epoch
    mlp.partial_fit(X_train, y_train, classes=np.unique(y_train))

    # Calculate time taken
    total_train_time = time.time() - start_time
    # Calculate accuracies
    train_accuracy = mlp.score(X_train, y_train)
    val_accuracy = mlp.score(X_val, y_val)
    y_val_pred = mlp.predict(X_val)  # Predict once

    # Confusion matrix
    best_cm = confusion_matrix(y_val, y_val_pred)

    # Calculate test accuracy
    test_accuracy = mlp.score(X_test, y_test)

    # Print results
    print(f"Training Time: {total_train_time:.4f} seconds")
    # print(f"Training Accuracy: {train_accuracy:.4f}")
    print(f"Validation Accuracy: {val_accuracy:.4f}")
    # print(f"Test Accuracy: {test_accuracy:.4f}")
    # print("Confusion Matrix:\n", best_cm)

    # Se almacenan los resultados en el dict
    configuration_results['hidden_layers'].append(h)
    configuration_results['n_neurons'].append(n)
    configuration_results['learning_rate'].append(lr)
    configuration_results['train_time'].append(total_train_time)
    configuration_results['train_score'].append(train_accuracy)
    configuration_results['val_score'].append(val_accuracy)
    configuration_results['test_score'].append(test_accuracy)
    configuration_results['confusion_matrices'].append(best_cm)

configurations_df = pd.DataFrame(configuration_results)
configurations_df.to_excel('One_Epoch_Results.xlsx', index=False)

1 / 75
Hidden Layers: 1, # Neurons: 50, Learning rate: 0.001
Training Time: 1.1319 seconds
Validation Accuracy: 0.4211
2 / 75
Hidden Layers: 1, # Neurons: 50, Learning rate: 0.0001
Training Time: 1.1173 seconds
Validation Accuracy: 0.3777
3 / 75
Hidden Layers: 1, # Neurons: 50, Learning rate: 1e-05
Training Time: 1.1136 seconds
Validation Accuracy: 0.2277
4 / 75
Hidden Layers: 1, # Neurons: 100, Learning rate: 0.001
Training Time: 1.8529 seconds
Validation Accuracy: 0.4241
5 / 75
Hidden Layers: 1, # Neurons: 100, Learning rate: 0.0001
Training Time: 1.8071 seconds
Validation Accuracy: 0.3980
6 / 75
Hidden Layers: 1, # Neurons: 100, Learning rate: 1e-05
Training Time: 1.7794 seconds
Validation Accuracy: 0.2546
7 / 75
Hidden Layers: 1, # Neurons: 200, Learning rate: 0.001
Training Time: 3.0976 seconds
Validation Accuracy: 0.4303
8 / 75
Hidden Layers: 1, # Neurons: 200, Learning rate: 0.0001
Training Time: 3.2105 seconds
Validation Accuracy: 0.4150
9 / 75
Hidden Layers: 1, # Neurons: 200,

In [None]:
configurations_df.to_excel('One_Epoch_Results.xlsx', index=False)

In [42]:
configurations_df = pd.DataFrame(configuration_results)
configurations_df['ID'] = configurations_df['hidden_layers'].astype(str) + '_' + configurations_df['n_neurons'].astype(str) + '_' + configurations_df['learning_rate'].astype(str)

In [43]:
configurations_df = configurations_df.sort_values(by='val_score', ascending=False).reset_index(drop=True).reset_index()
configurations_df

Unnamed: 0,index,hidden_layers,n_neurons,learning_rate,train_time,train_score,val_score,test_score,confusion_matrices,ID
0,0,3,1000,0.00010,32.867550,0.517250,0.4662,0.2838,"[[552, 44, 26, 38, 21, 28, 11, 63, 167, 36], [...",3_1000_0.0001
1,1,5,1000,0.00010,44.357958,0.503650,0.4659,0.1903,"[[541, 83, 37, 23, 57, 25, 14, 46, 111, 49], [...",5_1000_0.0001
2,2,2,1000,0.00010,21.166707,0.522450,0.4629,0.1879,"[[587, 52, 43, 32, 32, 12, 19, 45, 115, 49], [...",2_1000_0.0001
3,3,2,1000,0.00100,20.380999,0.489125,0.4611,0.2809,"[[493, 33, 74, 36, 22, 20, 16, 21, 129, 142], ...",2_1000_0.001
4,4,3,500,0.00010,11.186117,0.485825,0.4604,0.2647,"[[524, 60, 60, 38, 15, 15, 20, 46, 136, 72], [...",3_500_0.0001
...,...,...,...,...,...,...,...,...,...,...
70,70,2,50,0.00001,1.335903,0.207000,0.2055,0.1676,"[[485, 136, 122, 0, 22, 9, 16, 5, 164, 27], [1...",2_50_1e-05
71,71,8,100,0.00001,2.594995,0.159600,0.1583,0.1085,"[[237, 2, 164, 39, 0, 496, 0, 0, 0, 48], [83, ...",8_100_1e-05
72,72,3,50,0.00001,1.295890,0.154225,0.1503,0.1301,"[[234, 10, 136, 6, 6, 218, 1, 276, 90, 9], [36...",3_50_1e-05
73,73,5,50,0.00001,1.292592,0.143000,0.1422,0.1033,"[[390, 45, 150, 14, 0, 167, 0, 0, 219, 1], [87...",5_50_1e-05


In [48]:
configurations_df[['n_neurons', 'val_score', 'test_score']].groupby('n_neurons').mean().sort_values(by='val_score', ascending=False)

Unnamed: 0_level_0,val_score,test_score
n_neurons,Unnamed: 1_level_1,Unnamed: 2_level_1
1000,0.422007,0.23334
500,0.405153,0.2122
200,0.376753,0.194453
100,0.345633,0.204487
50,0.307347,0.193653


In [28]:
previous_results = pd.read_excel("Resultados por redes neuronales.xlsx")
previous_results['ID'] = previous_results['hidden_layers'].astype(str) + '_' + previous_results['n_neurons'].astype(str) + '_' + previous_results['learning_rate'].astype(str)
previous_results = previous_results.sort_values(by='val_score', ascending=False).reset_index(drop=True).reset_index()

In [29]:
previous_results

Unnamed: 0,index,hidden_layers,n_neurons,learning_rate,train_time,train_score,val_score,test_score,confusion_matrices,val_accuracy_over_epochs,ID
0,0,2,1000,0.0001,736.960688,1.0,0.4878,0.4862,[[197 36 14 29 13 10 41 56 43 59]\n [...,"[0.4156, 0.4394, 0.4518, 0.4658, 0.4708, 0.475...",2_1000_0.0001
1,1,1,1000,1e-05,1119.730063,0.983867,0.4822,0.486,[[181 41 14 30 13 15 43 53 41 67]\n [...,"[0.2848, 0.3324, 0.3556, 0.3758, 0.3894, 0.399...",1_1000_1e-05
2,2,2,1000,1e-05,824.78024,0.991067,0.4812,0.4762,[[179 39 13 30 18 19 41 52 55 52]\n [...,"[0.3122, 0.353, 0.3866, 0.4032, 0.4154, 0.4216...",2_1000_1e-05
3,3,4,1000,0.0001,1456.216042,1.0,0.4808,0.492,[[203 35 16 22 12 14 44 57 37 58]\n [...,"[0.4066, 0.4392, 0.4546, 0.466, 0.48, 0.4698, ...",4_1000_0.0001
4,4,1,1000,0.0001,835.536551,1.0,0.4744,0.481,[[194 38 16 26 13 12 32 53 44 70]\n [...,"[0.3994, 0.4242, 0.4378, 0.4482, 0.4588, 0.455...",1_1000_0.0001
5,5,1,200,1e-05,160.686873,0.744,0.468,0.4748,[[173 44 15 36 10 27 29 53 53 58]\n [...,"[0.2282, 0.266, 0.2894, 0.311, 0.3238, 0.339, ...",1_200_1e-05
6,6,2,200,1e-05,164.971378,0.773933,0.4642,0.4696,[[158 36 16 36 13 25 47 54 43 70]\n [...,"[0.205, 0.257, 0.287, 0.311, 0.3276, 0.344, 0....",2_200_1e-05
7,7,2,1000,0.001,396.014971,0.986133,0.4592,0.4618,[[171 36 20 34 12 15 46 66 37 61]\n [...,"[0.382, 0.4424, 0.445, 0.4652, 0.4582, 0.4508,...",2_1000_0.001
8,8,4,200,1e-05,190.784323,0.8446,0.4576,0.4652,[[176 41 11 33 13 16 41 51 47 69]\n [...,"[0.219, 0.256, 0.2778, 0.297, 0.3174, 0.3336, ...",4_200_1e-05
9,9,4,200,0.001,64.326505,0.982467,0.4544,0.4636,[[160 40 12 44 10 21 37 51 64 59]\n [...,"[0.3972, 0.4146, 0.4358, 0.4368, 0.4486, 0.45,...",4_200_0.001


In [32]:
# Merge both to check positions
merged_results = pd.merge(configurations_df, previous_results, on='ID', suffixes=('_new', '_old'))
merged_results['val_score_diff'] = merged_results['val_score_new'] - merged_results['val_score_old']
merged_results['position_diff'] = merged_results['index_new'] - merged_results['index_old']
display(merged_results[['index_new', 'index_old', 'ID']])

Unnamed: 0,index_new,index_old,ID
0,0,3,4_1000_0.0001
1,1,0,2_1000_0.0001
2,2,7,2_1000_0.001
3,3,4,1_1000_0.0001
4,4,16,1_1000_0.001
5,5,12,4_1000_0.001
6,6,9,4_200_0.001
7,7,13,2_200_0.001
8,8,14,2_200_0.0001
9,9,18,4_200_0.0001


In [None]:
# # Convert to PyTorch tensors
# X_train_tensor = torch.tensor(X_train, dtype=torch.float32) / 255.0  # Normalize to [0, 1]
# y_train_tensor = torch.tensor(y_train, dtype=torch.long)

# X_test_tensor = torch.tensor(X_test, dtype=torch.float32) / 255.0  # Normalize to [0, 1]
# y_test_tensor = torch.tensor(y_test, dtype=torch.long)

# # Create DataLoader for training and testing
# train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
# train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)

# test_dataset = TensorDataset(X_test_tensor, y_test_tensor)
# test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)


In [None]:
# class SimpleDenseNN(nn.Module):
#     def __init__(self, num_layers, neurons_per_layer):
#         super(SimpleDenseNN, self).__init__()
        
#         # Create a list to hold the layers
#         layers = []
        
#         # Input size for the first layer
#         input_size = 3 * 32 * 32  # Assuming input images are flattened (3 channels, 32x32 pixels)
        
#         # Create the specified number of layers
#         for i in range(num_layers):
#             layers.append(nn.Linear(input_size, neurons_per_layer))  # Add a dense layer
#             layers.append(nn.ReLU())  # Add ReLU activation
#             input_size = neurons_per_layer  # Update input size for the next layer
        
#         # Add the output layer
#         layers.append(nn.Linear(neurons_per_layer, 10))  # Assuming 10 output classes for CIFAR-10
        
#         # Combine all layers into a sequential model
#         self.model = nn.Sequential(*layers)

#     def forward(self, x):
#         x = x.view(x.size(0), -1)  # Flatten the input tensor
#         return self.model(x)  # Forward pass through the model

# # Example usage
# num_layers = 3  # Desired number of layers
# neurons_per_layer = 128  # Desired number of neurons per layer
# model = SimpleDenseNN(num_layers, neurons_per_layer)

# # Print the model architecture
# # print(model)

# # Create an instance of the model
# model = SimpleDenseNN(num_layers=3, neurons_per_layer=2)

# # Define a loss function and optimizer
# criterion = nn.CrossEntropyLoss()
# optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)

# # Training loop
# num_epochs = 5  # Number of epochs to train
# for epoch in range(num_epochs):
#     running_loss = 0.0
#     for i, data in enumerate(train_loader, 0):
#         inputs, labels = data
#         optimizer.zero_grad()   # Zero the parameter gradients
#         outputs = model(inputs) # Forward pass
#         loss = criterion(outputs, labels) # Compute loss
#         loss.backward()         # Backward pass
#         optimizer.step()        # Optimize the weights
#         running_loss += loss.item()
        
#         if i % 100 == 99:    # Print every 100 mini-batches
#             print(f'Epoch {epoch + 1}, Batch {i + 1}, Loss: {running_loss / 100:.3f}')
#             running_loss = 0.0

# print('Finished Training')

# # Save the model (optional)
# torch.save(model.state_dict(), 'simple_cnn.pth')

# # Prediction on the test set
# correct = 0
# total = 0
# with torch.no_grad():  # No need to track gradients during evaluation
#     for data in test_loader:
#         images, labels = data
#         outputs = model(images)  # Forward pass
#         _, predicted = torch.max(outputs.data, 1)  # Get the predicted class
#         total += labels.size(0)  # Update total count
#         correct += (predicted == labels).sum().item()  # Update correct count

# print(f'Accuracy of the model on the 10000 test images: {100 * correct / total:.2f}%')

# # Print predicted labels
# outputs = model(images)
# _, predicted = torch.max(outputs, 1)
# print('Predicted labels:', predicted.numpy())
# print('True labels:', labels.numpy())

Epoch 1, Batch 100, Loss: 2.340
Epoch 1, Batch 200, Loss: 2.338
Epoch 1, Batch 300, Loss: 2.338
Epoch 1, Batch 400, Loss: 2.323
Epoch 1, Batch 500, Loss: 2.327
Epoch 1, Batch 600, Loss: 2.323
Epoch 2, Batch 100, Loss: 2.311
Epoch 2, Batch 200, Loss: 2.314
Epoch 2, Batch 300, Loss: 2.308
Epoch 2, Batch 400, Loss: 2.310
Epoch 2, Batch 500, Loss: 2.307
Epoch 2, Batch 600, Loss: 2.307
Epoch 3, Batch 100, Loss: 2.304
Epoch 3, Batch 200, Loss: 2.306
Epoch 3, Batch 300, Loss: 2.304
Epoch 3, Batch 400, Loss: 2.303
Epoch 3, Batch 500, Loss: 2.304
Epoch 3, Batch 600, Loss: 2.304
Epoch 4, Batch 100, Loss: 2.303
Epoch 4, Batch 200, Loss: 2.303
Epoch 4, Batch 300, Loss: 2.303
Epoch 4, Batch 400, Loss: 2.304
Epoch 4, Batch 500, Loss: 2.302
Epoch 4, Batch 600, Loss: 2.303
Epoch 5, Batch 100, Loss: 2.303
Epoch 5, Batch 200, Loss: 2.303
Epoch 5, Batch 300, Loss: 2.303
Epoch 5, Batch 400, Loss: 2.303
Epoch 5, Batch 500, Loss: 2.302
Epoch 5, Batch 600, Loss: 2.303
Finished Training
Accuracy of the model 

In [None]:

# # Define the number of layers and neurons per layer
# num_layers = 3  # Number of layers
# neurons_per_layer = 2  # Number of neurons in each layer

# # Create a list to hold the layers
# layers = []

# # Input size for the first layer
# input_size = 2

# # Create layers in a loop
# for _ in range(num_layers):
#     layer = nn.Linear(input_size, neurons_per_layer)
#     layers.append(layer)
#     # Update input_size for the next layer
#     input_size = neurons_per_layer