# Hyperparameter Testing on Each Model (Q6)

This notebook is used to show the 3 models, with one hyperparameter changed each time to be used to compare to the original model to see if any of the hyperparameters tested significantly effect the model

In [1]:
# Imports
from tensorflow.keras.models import Sequential 
from tensorflow.keras.layers import Flatten, Dense
from tensorflow.keras.optimizers import Adam, SGD
from tensorflow.keras.losses import SparseCategoricalCrossentropy
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score
from sklearn.model_selection import StratifiedKFold
import numpy as np
import matplotlib.pyplot as plt
from sklearn.manifold import TSNE
import tensorflow as tf

# local imports
from metrics import *
from visualization import *
from data_operations import load_data

2024-11-06 15:34:52.848748: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2024-11-06 15:34:52.878555: E tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:9342] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-11-06 15:34:52.878590: E tensorflow/compiler/xla/stream_executor/cuda/cuda_fft.cc:609] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-11-06 15:34:52.878616: E tensorflow/compiler/xla/stream_executor/cuda/cuda_blas.cc:1518] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-11-06 15:34:52.885127: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2024-11-06 15:34:52.885945: I tensorflow/core/platform/cpu_feature_guard.cc:182] This Tens

# Model 1  - Single Layer ANN

Used to visualize changes for hyperparameters. Compare with Model 1 Original

Hyperparameters changed: 64 -> 128 neurons in dense layer 

In [2]:
# load the data
tr_images, tr_labels, ts_images, ts_labels = load_data()

model_name = "Single Layer ANN"
# Single layer ANN
model = Sequential([
    Flatten(input_shape=(28, 28)),    # 
    Dense(128, activation='relu'),    # changed from 64 ->128 neurons
    Dense(10)                         # 
])

model.compile(optimizer=Adam(learning_rate=0.01),  # Adam optimizer with 0.01 lr
              loss=SparseCategoricalCrossentropy(from_logits=True), # Use sparse categorical for multiclass
              metrics=['accuracy'])                                

# training and setting epochs
epochs = 10
history, total_tr_time = train_model(model, tr_images, tr_labels, epochs=epochs, batch_size=32)

# evaluate  model
test_acc, preds = evaluate_model(model, ts_images, ts_labels)

# show training time and accuracy results
show_results(history, total_tr_time, epochs)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Training and Evaluation Results
 Epoch Training Accuracy Validation Accuracy
     1            80.96%              81.59%
     2            84.30%              84.89%
     3            85.16%              85.17%
     4            85.62%              85.46%
     5            85.75%              85.50%
     6            86.20%              86.35%
     7            86.40%              84.67%
     8            86.52%              83.36%
     9            86.77%              84.64%
    10            86.85%              84.09%

Total Training Time for all epochs: 60.62 seconds
Total Training Accuracy (last epoch): 86.85%
Total Validation Accuracy (last epoch): 84.09%


Changing only Adam -> SGD

In [3]:
# load the data
tr_images, tr_labels, ts_images, ts_labels = load_data()

model_name = "Single Layer ANN"
# Single layer ANN
model = Sequential([
    Flatten(input_shape=(28, 28)),    # 
    Dense(64, activation='relu'),    # keeping 64 neurons
    Dense(10)                         # 
])

model.compile(optimizer=SGD(learning_rate=0.01),  # change from Adam -> SGD
              loss=SparseCategoricalCrossentropy(from_logits=True), # Use sparse categorical for multiclass
              metrics=['accuracy'])                                

# training and setting epochs
epochs = 10
history, total_tr_time = train_model(model, tr_images, tr_labels, epochs=epochs, batch_size=32)

# evaluate  model
test_acc, preds = evaluate_model(model, ts_images, ts_labels)

# show training time and accuracy results
show_results(history, total_tr_time, epochs)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Training and Evaluation Results
 Epoch Training Accuracy Validation Accuracy
     1            73.59%              80.35%
     2            81.75%              81.38%
     3            83.15%              83.27%
     4            84.02%              83.35%
     5            84.56%              84.79%
     6            84.97%              84.90%
     7            85.54%              84.87%
     8            85.81%              85.50%
     9            86.13%              85.58%
    10            86.32%              85.92%

Total Training Time for all epochs: 34.88 seconds
Total Training Accuracy (last epoch): 86.32%
Total Validation Accuracy (last epoch): 85.92%


Run for only 5 epochs compared to 10 

In [4]:
# load the data
tr_images, tr_labels, ts_images, ts_labels = load_data()

model_name = "Single Layer ANN"
# Single layer ANN
model = Sequential([
    Flatten(input_shape=(28, 28)),    # flatten
    Dense(64, activation='relu'),    # 64 neurons
    Dense(10)                         # 10 outputs = 10 classes
])

model.compile(optimizer=Adam(learning_rate=0.01),  # Adam optimizer with 0.01 lr
              loss=SparseCategoricalCrossentropy(from_logits=True), # Use sparse categorical for multiclass
              metrics=['accuracy'])                                

# training and setting epochs
epochs = 25 # change from 10
history, total_tr_time = train_model(model, tr_images, tr_labels, epochs=epochs, batch_size=32)

# evaluate  model
test_acc, preds = evaluate_model(model, ts_images, ts_labels)

# show training time and accuracy results
show_results(history, total_tr_time, epochs)


Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25
Training and Evaluation Results
 Epoch Training Accuracy Validation Accuracy
     1            80.93%              84.87%
     2            84.48%              84.24%
     3            85.21%              83.90%
     4            85.40%              85.88%
     5            85.93%              85.48%
     6            86.00%              85.34%
     7            86.39%              85.25%
     8            86.62%              85.63%
     9            86.85%              85.60%
    10            87.10%              86.07%
    11            86.85%              86.65%
    12            87.18%              85.45%
    13            87.16%              86.35%
    14            87.44%              85.24%
  

# Model 2 - Double Layer ANN

Hyperparameters changed: 128 -> 256 neurons in dense layer 

In [5]:
model_name = 'Double Layer ANN'
# Load the data
tr_images, tr_labels, ts_images, ts_labels = load_data()

# Define a Double-Layer ANN model
model = Sequential([
    Flatten(input_shape=(28, 28)),      
    Dense(256, activation='relu'),      # Hidden layer - 256 neurons changed from 128 
    Dense(128, activation='relu'),       # Hideen layer - 128 neurons - changed from 64
    Dense(10)                            
])

model.compile(optimizer=Adam(learning_rate=0.01),  # Adam optimizer with 0.01 learning rate
              loss=SparseCategoricalCrossentropy(from_logits=True), # Use sparse categorical for multiclass
              metrics=['accuracy'])                                # Track accuracy 

# Train the model
epochs = 10
history, total_tr_time = train_model(model, tr_images, tr_labels, epochs=epochs, batch_size=32)

# Evaluate the model
test_accuracy, preds = evaluate_model(model, ts_images, ts_labels)

# Display the results
show_results(history, total_tr_time, epochs)


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Training and Evaluation Results
 Epoch Training Accuracy Validation Accuracy
     1            79.97%              84.05%
     2            84.12%              83.57%
     3            84.67%              85.06%
     4            85.85%              84.92%
     5            86.30%              86.09%
     6            86.33%              85.89%
     7            86.57%              85.95%
     8            86.89%              86.78%
     9            87.03%              85.78%
    10            87.31%              86.38%

Total Training Time for all epochs: 94.48 seconds
Total Training Accuracy (last epoch): 87.31%
Total Validation Accuracy (last epoch): 86.38%


Adam -> SGD optimizer

In [6]:
model_name = 'Double Layer ANN'
# Load the data
tr_images, tr_labels, ts_images, ts_labels = load_data()

# Define a Double-Layer ANN model
model = Sequential([
    Flatten(input_shape=(28, 28)),     
    Dense(128, activation='relu'),      
    Dense(64, activation='relu'),       
    Dense(10)                            
])

model.compile(optimizer=SGD(learning_rate=0.01),  # changed to SGD
              loss=SparseCategoricalCrossentropy(from_logits=True), 
              metrics=['accuracy'])                               

# Train the model
epochs = 10
history, total_tr_time = train_model(model, tr_images, tr_labels, epochs=epochs, batch_size=32)

# Evaluate the model
test_accuracy, preds = evaluate_model(model, ts_images, ts_labels)

# Display the results
show_results(history, total_tr_time, epochs)


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Training and Evaluation Results
 Epoch Training Accuracy Validation Accuracy
     1            73.64%              80.06%
     2            81.76%              83.24%
     3            83.66%              84.31%
     4            84.61%              83.93%
     5            85.45%              85.51%
     6            85.94%              85.72%
     7            86.42%              86.27%
     8            86.78%              86.48%
     9            87.04%              86.22%
    10            87.45%              86.82%

Total Training Time for all epochs: 44.86 seconds
Total Training Accuracy (last epoch): 87.45%
Total Validation Accuracy (last epoch): 86.82%


10-> 20 epochs

In [7]:
model_name = 'Double Layer ANN'
# Load the data
tr_images, tr_labels, ts_images, ts_labels = load_data()

# Define a Double-Layer ANN model
model = Sequential([
    Flatten(input_shape=(28, 28)),     
    Dense(128, activation='relu'),      
    Dense(64, activation='relu'),       
    Dense(10)                            
])

model.compile(optimizer=Adam(learning_rate=0.01),  
              loss=SparseCategoricalCrossentropy(from_logits=True), 
              metrics=['accuracy'])                               

# Train the model
epochs = 20 # changed to 20
history, total_tr_time = train_model(model, tr_images, tr_labels, epochs=epochs, batch_size=32)

# Evaluate the model
test_accuracy, preds = evaluate_model(model, ts_images, ts_labels)

# Display the results
show_results(history, total_tr_time, epochs)


Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Training and Evaluation Results
 Epoch Training Accuracy Validation Accuracy
     1            80.63%              83.22%
     2            84.03%              82.85%
     3            84.89%              82.90%
     4            85.17%              84.03%
     5            85.79%              84.78%
     6            86.23%              84.57%
     7            86.36%              84.53%
     8            86.55%              84.95%
     9            86.56%              85.24%
    10            86.84%              86.03%
    11            87.11%              84.07%
    12            87.30%              85.02%
    13            87.11%              86.09%
    14            87.14%              85.97%
    15            87.47%              85.51%
    16           

# Model 3 - KNN

changed k-neighbours to 1 from 5

In [8]:
model_name = "KNN"
# load the data
tr_images, tr_labels, ts_images, ts_labels = load_data()

# flatten images so knn can use
tr_images_flat = tr_images.reshape(-1, 28 * 28)  # Flatten to 784 features
ts_images_flat = ts_images.reshape(-1, 28 * 28)

# Define Model
k_neighbors = 1  # Number of neighbors for k-NN
knn_model = KNeighborsClassifier(n_neighbors=k_neighbors)

# stratified k-Fold is better with multiclass to even the class distribution
n_splits = 5 # fold numebr
skf = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=21)
cv_scores = []  

# loop to iterate though each fold
time_start = time.time()
for train_index, val_index in skf.split(tr_images_flat, tr_labels):
    # Split the data for training and validation sets for each fold
    X_train, X_val = tr_images_flat[train_index], tr_images_flat[val_index]
    y_train, y_val = tr_labels[train_index], tr_labels[val_index]
    
    # Train the knn model on the training set 
    knn_model.fit(X_train, y_train)
    
    # evaluate model (val set)
    val_predictions = knn_model.predict(X_val)
    fold_accuracy = accuracy_score(y_val, val_predictions)
    cv_scores.append(fold_accuracy)

time_end = time.time()
print(f"Time to run {n_splits} folds: {time_end - time_start:.2f} seconds")

# print cross-val accuracy results
print(f"Stratified Cross-Validation Accuracy (5-fold): {sum(cv_scores) / len(cv_scores) * 100:.2f}%")

test_preds = knn_model.predict(ts_images_flat)

# Calculate metrics for the test set
test_accuracy = accuracy_score(ts_labels, test_preds)

# Print the test set accuracy
print("\nTest Set Accuracy:")
print(f"Accuracy: {test_accuracy * 100:.2f}%")

Time to run 5 folds: 109.56 seconds
Stratified Cross-Validation Accuracy (5-fold): 84.93%

Test Set Accuracy:
Accuracy: 84.27%


Change metric from euclidian -> chebyshev

Note I wanted to use manhattan or cosine just for comparison - but due to the nature of the data the computation was taking way to long and i would not be able to speed it up without effecting other metrics so for the sake of this investigation only chebyshev and euclidan were tested

In [9]:
model_name = "KNN"
# load the data
tr_images, tr_labels, ts_images, ts_labels = load_data()

# flatten images so knn can use
tr_images_flat = tr_images.reshape(-1, 28 * 28)  # Flatten to 784 features
ts_images_flat = ts_images.reshape(-1, 28 * 28)

# Define Model
k_neighbors = 5  # Number of neighbors for k-NN
knn_model = KNeighborsClassifier(n_neighbors=k_neighbors, metric='chebyshev') #changed from euclidian to chebyshev 

# stratified k-Fold is better with multiclass to even the class distribution
n_splits = 5 # fold number
skf = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=21)
cv_scores = []  

# loop to iterate though each fold
time_start = time.time()
for train_index, val_index in skf.split(tr_images_flat, tr_labels):
    # Split the data for training and validation sets for each fold
    X_train, X_val = tr_images_flat[train_index], tr_images_flat[val_index]
    y_train, y_val = tr_labels[train_index], tr_labels[val_index]
    
    # Train the knn model on the training set 
    knn_model.fit(X_train, y_train) 
    
    # evaluate model (val set)
    val_predictions = knn_model.predict(X_val)
    fold_accuracy = accuracy_score(y_val, val_predictions)
    cv_scores.append(fold_accuracy)

time_end = time.time()
print(f"Time to run {n_splits} folds: {time_end - time_start:.2f} seconds")

# print cross-val accuracy results
print(f"Stratified Cross-Validation Accuracy (5-fold): {sum(cv_scores) / len(cv_scores) * 100:.2f}%")

test_preds = knn_model.predict(ts_images_flat)

# Calculate metrics for the test set
test_accuracy = accuracy_score(ts_labels, test_preds)

# Print the test set accuracy
print("\nTest Set Accuracy:")
print(f"Accuracy: {test_accuracy * 100:.2f}%")

Time to run 5 folds: 1062.51 seconds
Stratified Cross-Validation Accuracy (5-fold): 63.79%

Test Set Accuracy:
Accuracy: 62.92%


change weights from default "uniform" to distance

In [10]:
model_name = "KNN"
# load the data
tr_images, tr_labels, ts_images, ts_labels = load_data()

# flatten images so knn can use
tr_images_flat = tr_images.reshape(-1, 28 * 28)  # Flatten to 784 features
ts_images_flat = ts_images.reshape(-1, 28 * 28)

# Define Model
k_neighbors = 5  # Number of neighbors for k-knn_model = KNeighborsClassifier(n_neighbors=k_neighbors, weights='distance')  #changed from uniform to weighted

# stratified k-Fold is better with multiclass to even the class distribution
n_splits = 5 # fold number
skf = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=21)
cv_scores = []  

# loop to iterate though each fold
time_start = time.time()c
for train_index, val_index in skf.split(tr_images_flat, tr_labels):
    # Split the data for training and validation sets for each fold
    X_train, X_val = tr_images_flat[train_index], tr_images_flat[val_index]
    y_train, y_val = tr_labels[train_index], tr_labels[val_index]
    
    # Train the knn model on the training set 
    knn_model.fit(X_train, y_train) 
    
    # evaluate model (val set)
    val_predictions = knn_model.predict(X_val)
    fold_accuracy = accuracy_score(y_val, val_predictions)
    cv_scores.append(fold_accuracy)

time_end = time.time()
print(f"Time to run {n_splits} folds: {time_end - time_start:.2f} seconds")

# print cross-val accuracy results
print(f"Stratified Cross-Validation Accuracy (5-fold): {sum(cv_scores) / len(cv_scores) * 100:.2f}%")

test_preds = knn_model.predict(ts_images_flat)

# Calculate metrics for the test set
test_accuracy = accuracy_score(ts_labels, test_preds)

# Print the test set accuracy
print("\nTest Set Accuracy:")
print(f"Accuracy: {test_accuracy * 100:.2f}%")

Time to run 5 folds: 108.42 seconds
Stratified Cross-Validation Accuracy (5-fold): 85.56%

Test Set Accuracy:
Accuracy: 85.15%


<a style='text-decoration:none;line-height:16px;display:flex;color:#5B5B62;padding:10px;justify-content:end;' href='https://deepnote.com?utm_source=created-in-deepnote-cell&projectId=9b078dc4-e2c3-414e-bc4d-86356ec945c2' target="_blank">
 </img>
Created in <span style='font-weight:600;margin-left:4px;'>Deepnote</span></a>