### **MNIST MLP Hyperparameter setting example using Scikit**

This is an example of Hyperparameter setting for MNIST MLP with Keras and Scikit. <br>
Uncomment cell #1 to use it in COLAB


<a target="_blank" href="https://colab.research.google.com/github/castorgit/RL_course/blob/main/00_LunarLander-COLAB_render.ipynb">
  <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/>
</a>


In [1]:
#!pip install numpy
#!pip install random
#!pip install matplotlib
#!pip install tensorflow==2.17.1
#!pip install keras==3.6.0
#!pip pandas as pd
#!pip install scikeras

In [2]:
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' 
import tensorflow as tf
from tensorflow.keras import models
import numpy as np
import matplotlib.pyplot as plt
from tensorflow.keras import Input
from tensorflow.keras.layers import ReLU, Dense, Softmax
from tensorflow.keras.models import Model
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
from sklearn.model_selection import GridSearchCV

from sklearn.model_selection import ParameterGrid
from scikeras.wrappers import KerasClassifier

import sys
sys.stderr = open('err.txt', 'w')

import tensorflow as tf
print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))

Num GPUs Available:  1


I0000 00:00:1736970527.389678   82287 cuda_executor.cc:1001] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
I0000 00:00:1736970527.408097   82287 cuda_executor.cc:1001] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
I0000 00:00:1736970527.408144   82287 cuda_executor.cc:1001] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
I0000 00:00:1736970527.787921   82287 cuda_executor.cc:1001] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
I0000 00:00:1736970527.788115   82287 cuda_executor.cc:1001] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
I0000 00:0

#### **Hyperparameter Search Space**

In [3]:
param_grid = {
    'hidden_size'  : [64, 128, 256],
    'activation_pr': ['relu', 'elu'],
    'optimizer_pr' : ['adam'],
    'batch_size'   :[32, 64, 128],
    'loss_pr'      : ['MeanSquaredError']
}

In [4]:
# Parameters not optimized

num_classes = 10  # this is the number of digits
num_epochs = 5

#### **Data preparation and processing**

In [5]:
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()
# Data Preprocessing
# Preprocess the data (flattening and normalization)
x_train = x_train.reshape(-1, 28*28).astype('float32') / 255.0
x_test = x_test.reshape(-1, 28*28).astype('float32') / 255.0

# Convert labels to one-hot encoding
y_train = tf.keras.utils.to_categorical(y_train, 10)
y_test = tf.keras.utils.to_categorical(y_test, 10)

#### **MLP Architecture**

In [6]:
def create_MLP(hidden_size=128, optimizer_pr='adam', loss_pr='MeanSquaredError', activation_pr='relu'):
    inputs = Input(shape= (784,))
    x = Dense(hidden_size, activation=activation_pr)(inputs)
    x = Dense(10)(x)
    output = Softmax()(x)
    model = Model(inputs=inputs, outputs=output)
#   model.summary()
    model.compile(optimizer= optimizer_pr, loss = loss_pr, metrics=['accuracy'])
              
    return model

In [7]:
# We shorten the exploration to 5 epochs to reduce overall exploration time

model = KerasClassifier(build_fn=create_MLP, hidden_size=128, optimizer_pr='adam', 
                        loss_pr='MeanSquaredError', activation_pr='relu', batch_size=16, epochs=5)

In [8]:
grid = GridSearchCV(estimator=model, param_grid=param_grid, cv=3)
grid_result = grid.fit(x_train, y_train)

Epoch 1/5
[1m1250/1250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 2ms/step - accuracy: 0.8263 - loss: 0.0252
Epoch 2/5
[1m1250/1250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.9446 - loss: 0.0086
Epoch 3/5
[1m1250/1250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.9582 - loss: 0.0065
Epoch 4/5
[1m1250/1250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.9670 - loss: 0.0053
Epoch 5/5
[1m1250/1250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.9712 - loss: 0.0046
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step
Epoch 1/5
[1m1250/1250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.8234 - loss: 0.0264
Epoch 2/5
[1m1250/1250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.9448 - loss: 0.0087
Epoch 3/5
[1m1250/1250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 

[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - accuracy: 0.8212 - loss: 0.0263
Epoch 2/5
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.9501 - loss: 0.0081
Epoch 3/5
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.9665 - loss: 0.0056
Epoch 4/5
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.9729 - loss: 0.0046
Epoch 5/5
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.9791 - loss: 0.0036
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step
Epoch 1/5
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.8277 - loss: 0.0260
Epoch 2/5
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.9500 - loss: 0.0081
Epoch 3/5
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.963

[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.9516 - loss: 0.0078
Epoch 3/5
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.9656 - loss: 0.0056
Epoch 4/5
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.9738 - loss: 0.0044
Epoch 5/5
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.9798 - loss: 0.0034
[1m157/157[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step
Epoch 1/5
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.8189 - loss: 0.0273
Epoch 2/5
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.9518 - loss: 0.0077
Epoch 3/5
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.9652 - loss: 0.0057
Epoch 4/5
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.973

[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.9246 - loss: 0.0116
Epoch 3/5
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.9428 - loss: 0.0090
Epoch 4/5
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.9510 - loss: 0.0078
Epoch 5/5
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.9616 - loss: 0.0062
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step
Epoch 1/5
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.7971 - loss: 0.0291
Epoch 2/5
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.9278 - loss: 0.0113
Epoch 3/5
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.9451 - loss: 0.0088
Epoch 4/5
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.952

[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.9437 - loss: 0.0089
Epoch 4/5
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.9533 - loss: 0.0075
Epoch 5/5
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.9616 - loss: 0.0064
[1m157/157[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
Epoch 1/5
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.7969 - loss: 0.0297
Epoch 2/5
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.9274 - loss: 0.0114
Epoch 3/5
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.9422 - loss: 0.0092
Epoch 4/5
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.9529 - loss: 0.0076
Epoch 5/5
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.962

In [9]:
print(f"Best Accuracy: {grid_result.best_score_} using {grid_result.best_params_}")

# Detailed results
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']

for mean, std, param in zip(means, stds, params):
    print(f"Accuracy: {mean:.4f} (+/- {std:.4f}) with params: {param}")

Best Accuracy: 0.9716833333333333 using {'activation_pr': 'relu', 'batch_size': 32, 'hidden_size': 256, 'loss_pr': 'MeanSquaredError', 'optimizer_pr': 'adam'}
Accuracy: 0.9613 (+/- 0.0025) with params: {'activation_pr': 'relu', 'batch_size': 32, 'hidden_size': 64, 'loss_pr': 'MeanSquaredError', 'optimizer_pr': 'adam'}
Accuracy: 0.9688 (+/- 0.0005) with params: {'activation_pr': 'relu', 'batch_size': 32, 'hidden_size': 128, 'loss_pr': 'MeanSquaredError', 'optimizer_pr': 'adam'}
Accuracy: 0.9717 (+/- 0.0011) with params: {'activation_pr': 'relu', 'batch_size': 32, 'hidden_size': 256, 'loss_pr': 'MeanSquaredError', 'optimizer_pr': 'adam'}
Accuracy: 0.9598 (+/- 0.0012) with params: {'activation_pr': 'relu', 'batch_size': 64, 'hidden_size': 64, 'loss_pr': 'MeanSquaredError', 'optimizer_pr': 'adam'}
Accuracy: 0.9670 (+/- 0.0007) with params: {'activation_pr': 'relu', 'batch_size': 64, 'hidden_size': 128, 'loss_pr': 'MeanSquaredError', 'optimizer_pr': 'adam'}
Accuracy: 0.9697 (+/- 0.0010) wit

In [10]:
import session_info
session_info.show(html=False)

-----
keras               3.6.0
matplotlib          3.9.2
numpy               1.26.4
scikeras            0.13.0
session_info        1.0.0
sklearn             1.5.2
tensorflow          2.17.1
-----
IPython             8.28.0
jupyter_client      8.6.3
jupyter_core        5.7.2
-----
Python 3.12.3 (main, Nov  6 2024, 18:32:19) [GCC 13.2.0]
Linux-5.15.167.4-microsoft-standard-WSL2-x86_64-with-glibc2.39
-----
Session information updated at 2025-01-15 20:59
