In [1]:
import sys
from pathlib import Path

# Add the project directory to the Python path
project_dir = Path.cwd().parent
sys.path.append(str(project_dir))

from data import Data
from cnn import CNN
import pickle

# Load Dataset

- Preprocess
- Build splits (Training/Validation/Testing):
  - 80/20/00 -> For the hyperparameter search
  - 80/10/10
  - 40/20/40
  - 10/10/80

In [2]:
dataset0 = Data('caltech101_silhouettes_28.mat', train_split=80, val_split=20, test_split=0)
dataset1 = Data('caltech101_silhouettes_28.mat', train_split=80, val_split=10, test_split=10)
dataset2 = Data('caltech101_silhouettes_28.mat', train_split=40, val_split=20, test_split=40)
dataset3 = Data('caltech101_silhouettes_28.mat', train_split=10, val_split=10, test_split=80)

# Hyperparameter Search
Previous to the study of CNN configurations

- OL Activation function
- CFL Cost Function
- Dense Layer Size
- Learning rates

In [None]:
import itertools
import traceback
from concurrent.futures import ThreadPoolExecutor, as_completed

output_activations = ['softmax', 'sigmoid']
cost_functions = ['categorical_crossentropy', 'mean_squared_error']
learning_rates = [0.01, 0.1]
dense_layer_sizes = [64, 128]
max_epochs = 20

def train_model_hyperparmeter_search(params):
    try:
        oa, cf, dls, lr = params
        cnn = CNN(output_layer_activation=oa, filter_sizes=[64, 64], dense_layer_size=dls, hidden_layer_activation='tanh')
        
        print(f"Training with parameters: OA={oa}, CF={cf}, DLS={dls}, LR={lr}")
        
        history = cnn.fit(
            dataset0,
            cost_function=cf,
            max_epochs=max_epochs,
            learning_rate=lr
        )
        
        val_acc = history.history['val_accuracy'][-1]
        return (oa, cf, dls, lr, val_acc, history)
    except Exception as e:
        print(f"Error processing parameters {params}:")
        print(traceback.format_exc())
        return None

# Generate all parameter combinations
param_combinations = list(itertools.product(
    output_activations, 
    cost_functions, 
    dense_layer_sizes,
    learning_rates
))

# Use ThreadPoolExecutor to parallelize
hyperparameter_search_results = []

with ThreadPoolExecutor() as executor:
    # Submit all tasks
    futures = [executor.submit(train_model_hyperparmeter_search, params) for params in param_combinations]
    
    # Collect results as they complete
    for future in as_completed(futures):
        result = future.result()
        if result is not None:
            hyperparameter_search_results.append(result)

# Sort and display results
hyperparameter_search_results = sorted(hyperparameter_search_results, key=lambda x: x[-2], reverse=True)
for result in hyperparameter_search_results:
    print(f"Params: {result[:-2]}, Validation Accuracy: {result[-2]:.4f}")

best_hyperparameters = hyperparameter_search_results[0][:-2]

In [4]:
# Save the list to a pickle file
with open('hyperparameter_search_results.pkl', 'wb') as file:
    pickle.dump(hyperparameter_search_results, file)

In [21]:
import pickle
import pandas as pd
import os
import matplotlib.pyplot as plt

# Load hyperparameter search results
with open('hyperparameter_search_results.pkl', 'rb') as file:
    hyperparameter_search_results = pickle.load(file)

# Define column names for the DataFrame
columns = ['Output Activation', 'Cost Function', 'Dense Layer Size', 'Learning Rate', 'Validation Accuracy', 'History']

# Convert the results into a pandas DataFrame
results_df = pd.DataFrame(hyperparameter_search_results, columns=columns)

# Drop the 'History' column as it's not needed for display
results_df_no_history = results_df.drop(columns=['History'])

# Find the row with the best accuracy
best_row = results_df_no_history.loc[results_df_no_history['Validation Accuracy'].idxmax()]

# Output directory
output_dir = os.path.join(os.getcwd(), 'outputs')
os.makedirs(output_dir, exist_ok=True)
png_file_path = os.path.join(output_dir, 'hyperparameter_search_results.png')

# Add highlighting to the best row
fig, ax = plt.subplots(figsize=(10, 6))
ax.axis('off')  # Turn off the axis

# Add a table to the figure
table_data = results_df_no_history.values.tolist()
table_data.insert(0, results_df_no_history.columns.tolist())  # Add headers

# Highlight the best row by changing its color
cell_colors = [['white'] * len(results_df_no_history.columns) for _ in range(len(results_df_no_history) + 1)]
best_row_index = results_df_no_history.index.get_loc(best_row.name) + 1  # Add 1 for headers
cell_colors[best_row_index] = ['#FFFF99'] * len(results_df_no_history.columns)  # Yellow highlight

# Create the table
table = ax.table(cellText=table_data, 
                 cellColours=cell_colors,
                 loc='center', 
                 cellLoc='center')

# Style the table
table.auto_set_font_size(False)
table.set_fontsize(8)
table.scale(1.2, 1.2)

# Save the table as a PNG file
plt.savefig(png_file_path, dpi=300, bbox_inches='tight')
plt.close()

print(f"Table saved as PNG at: {png_file_path}")



Best configuration for validation accuracy:
Output Activation                       sigmoid
Cost Function          categorical_crossentropy
Dense Layer Size                             64
Learning Rate                              0.01
Validation Accuracy                    0.538329
Name: 0, dtype: object
Table saved as PNG at: /Users/sheena/Library/CloudStorage/Box-Box/Sheena./Master/semester-1/CI/Labs/LAB-EXERCISE1/mai-ci-assignment-2/code/outputs/hyperparameter_search_results.png


# CNN Results

Loop over configurations:
1. Architecture:
  - 1 Block: 128 Filter Size
  - 3 Blocks: 32, 64 and 128 Filter Sizes
2. Activations:
  - Sigmoid
  - ReLU
3. Dataset Splits.

In [None]:
import itertools
import traceback
from concurrent.futures import ThreadPoolExecutor, as_completed

oa = best_hyperparameters[0]
cf =  best_hyperparameters[1]
dls =  best_hyperparameters[2]
lr =  best_hyperparameters[3]

datasets = [dataset1, dataset2, dataset3]
filter_sizes = [[128], [32, 64, 128]]
hidden_activations = ['sigmoid', 'relu']

def train_model_configuration_search(params):
    try:
        dataset, fs, nhl = params
        cnn = CNN(output_layer_activation=oa, filter_sizes=fs, dense_layer_size=dls, hidden_layer_activation=nhl)
        
        splits = dataset.splits
        
        print(f"Training with parameters: FS={fs}, NHL={nhl}, Splits={splits}")
        
        history = cnn.fit(
            dataset,
            cost_function=cf,
            max_epochs=max_epochs,
            learning_rate=lr
        )
        
        val_acc = max(history.history['val_accuracy'])

        test_acc = cnn.evaluate(dataset)

        return (fs, nhl, splits, val_acc, test_acc[1], history)
    except Exception as e:
        print(f"Error processing parameters {params} with dataset {splits}:")
        print(traceback.format_exc())
        return None

# Generate all parameter combinations
param_combinations = list(itertools.product(
    datasets,
    filter_sizes,
    hidden_activations
))

# Use ThreadPoolExecutor to parallelize
configuration_search_results = []
with ThreadPoolExecutor() as executor:
    # Submit all tasks
    futures = [executor.submit(train_model_configuration_search, params) for params in param_combinations]
    
    # Collect results as they complete
    for future in as_completed(futures):
        result = future.result()
        if result is not None:
            configuration_search_results.append(result)

# Sort and display results
configuration_search_results = sorted(configuration_search_results, key=lambda x: x[-2], reverse=True)
for result in configuration_search_results:
    print(f"Params: {result[:-3]}, Validation Accuracy: {result[-3]:.4f}, Testing Accuracy: {result[-2]:.4f}")

In [6]:
# Save the list to a pickle file
with open('configuration_search_results.pkl', 'wb') as file:
    pickle.dump(configuration_search_results, file)

In [1]:
import pickle
import matplotlib.pyplot as plt
import os

# Load the configuration search results
with open('configuration_search_results.pkl', 'rb') as file:
    configuration_search_results = pickle.load(file)

# Find the architecture with the highest validation accuracy
best_result = max(configuration_search_results, key=lambda x: x[-3])  # Validation Accuracy is at index -3
best_history = best_result[-1]  # The history object is at the last position

# Extract data from the history
training_accuracy = best_history.history['accuracy']
validation_accuracy = best_history.history['val_accuracy']
training_loss = best_history.history['loss']
validation_loss = best_history.history['val_loss']

# Create output directory if it doesn't exist
output_dir = os.path.join(os.getcwd(), 'outputs')
os.makedirs(output_dir, exist_ok=True)

# Generate the plot
plt.figure(figsize=(12, 8))

# Plot accuracies
plt.subplot(2, 1, 1)
plt.plot(training_accuracy, label='Training Accuracy')
plt.plot(validation_accuracy, label='Validation Accuracy')
plt.title('Training and Validation Accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()

# Plot losses
plt.subplot(2, 1, 2)
plt.plot(training_loss, label='Training Loss')
plt.plot(validation_loss, label='Validation Loss')
plt.title('Training and Validation Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()

# Save the plot to the /outputs directory
output_png_path = os.path.join(output_dir, 'best_configuration_training_validation.png')
plt.tight_layout()
plt.savefig(output_png_path, dpi=300)
plt.close()

print(f"Training and validation plots saved as PNG at: {output_png_path}")

Training and validation plots saved as PNG at: /Users/sheena/Library/CloudStorage/Box-Box/Sheena./Master/semester-1/CI/Labs/LAB-EXERCISE1/mai-ci-assignment-2/code/outputs/best_configuration_training_validation.png


# CNN Analysis

In [7]:
with open('hyperparameter_search_results.pkl', 'rb') as file:
    hyperparameter_search_results = pickle.load(file)

with open('configuration_search_results.pkl', 'rb') as file:
    configuration_search_results = pickle.load(file)

Plots and tables comparing the configurations.

Tables with Validation Accuracies:
- Hyperparameter search
- Configuration search

Plots:
- Hyperparameter Heatmaps
- Best run Train-Validation accuracy plot (need to save all of them during the search)

# Tasks

1) Description of the runs with the different configurations that you have performed. -> Sheena
2) Explain how you have selected the rest of parameters. -> Sheena
3) Those tables that you consider necessary to describe the results obtained for the different network configurations. Explain and reason the results presented in the tables.
    - Tables -> Sheena
    - Heatmaps -> Bruno
    - Best run Train-Validation Accuracy plot -> Sheena
4) Your own conclusions with respect the results obtained. -> Bruno