In [2]:
!pip install ucimlrepo
!pip install tensorflow

In [3]:
import numpy as np
import pandas as pd
from sklearn.pipeline import Pipeline
from ucimlrepo import fetch_ucirepo
from sklearn.preprocessing import OneHotEncoder
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers, models
from tensorflow.keras.datasets import mnist
from tensorflow.keras.utils import to_categorical
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import f1_score

In [None]:
(xTrain, yTrain), (xTest, yTest) = mnist.load_data()

In [5]:
class KerasClassifier(BaseEstimator, TransformerMixin):
  def __init__(self, model_func, **kwargs):
    self.model_func = model_func
    self.kwargs = kwargs
    self.model_ = None

  def fit(self, x, y):
    self.model_ = self.model_func(**self.kwargs)
    self.model_.fit(x, y, epochs=10, batch_size=32, verbose=0)
    return self

  def predict(self, x):
    # The argmax here converts the one-hot encoding to label format
    return np.argmax(self.model_.predict(x), axis=1)

def create_model(filters, learning_rate):
  model = models.Sequential()
  #define filters and convolutional layers here
  model.add(layers.Conv2D(filters=filters, kernel_size=(3, 3), activation='relu', input_shape=(28, 28, 1)))
  #Add a maxpooling layer
  model.add(layers.MaxPooling2D(pool_size=(2, 2)))
  #Flatten the output and give it to a fully connected layer
  model.add(layers.Flatten())
  #one hidden layer maps the flattened neurons to output, 10 classes/labels
  model.add(layers.Dense(10, activation='softmax'))
  model.compile(optimizer=keras.optimizers.Adam(learning_rate=learning_rate), loss='categorical_crossentropy', metrics=['accuracy'])
  return model

In [62]:
filters = [16, 32]
learning_rates = [0.001, 0.01]
best_f1 = 0
best_params = None

skf = StratifiedKFold(n_splits=5)
yTrain_encoded = to_categorical(yTrain)
yTest_encoded = to_categorical(yTest)

for filters in filters:
  for lr in learning_rates:
    print(f"Testing model with {filters} filters and learning rate {lr}")
    f1_scores = []

    for train_idx, val_idx in skf.split(xTrain, yTrain):
      xTrain_fold = xTrain[train_idx]
      yTrain_fold = yTrain_encoded[train_idx]
      xVal_fold = xTrain[val_idx]
      yVal_fold = yTrain_encoded[val_idx]

      pipeline = Pipeline([
          ('classifier', KerasClassifier(create_model, filters=filters, learning_rate=lr))
      ])

      # Fit and predict for current fold
      pipeline.fit(xTrain_fold, yTrain_fold)
      yPred_fold = pipeline.predict(xVal_fold)
      # Convert y_val_fold from one-hot encoded to label format
      new_yVal_fold = np.argmax(yVal_fold, axis=1)

      f1 = f1_score(new_yVal_fold, yPred_fold, average='macro')
      f1_scores.append(f1)

    # Average F1 score for the current hyperparameters
    avg_f1 = np.mean(f1_scores)

    if avg_f1 > best_f1:
        best_f1 = avg_f1
        best_params = {'filters': filters, 'learning_rate': lr}

    print(f"Filters: {filters}, Learning rate: {lr}, Avg F1 Score: {avg_f1}")

print(f"Best F1 Score: {best_f1} with parameters {best_params}")

Testing model with 16 filters and learning rate 0.001
Filters: 16, Learning rate: 0.001, Avg F1 Score: 0.9677172478241701
Testing model with 16 filters and learning rate 0.01
Filters: 16, Learning rate: 0.01, Avg F1 Score: 0.8839648475810149
Testing model with 32 filters and learning rate 0.001
Filters: 32, Learning rate: 0.001, Avg F1 Score: 0.9695670108382286
Testing model with 32 filters and learning rate 0.01
Filters: 32, Learning rate: 0.01, Avg F1 Score: 0.8821839928790702
Best F1 Score: 0.9695670108382286 with parameters {'filters': 32, 'learning_rate': 0.001}


In [8]:
#Training with the best parameters

pipeline = Pipeline([
    ('classifier', KerasClassifier(create_model, filters=32, learning_rate=0.001))
])

# Fit
pipeline.fit(xTrain, yTrain_encoded)
# Predict on the test data
yPred = pipeline.predict(xTest)
# Convert y_test from one-hot encoded to label format
new_yTest = np.argmax(yTest_encoded, axis=1)
f1_score(new_yTest, yPred, average='macro')



0.97435846987261

In [10]:
print("The best CNN has the parameter:")
print(f"Number of filters = 32")
print(f"Number of learning rate = 0.001")
print(f"Corresponding f1score = 0.97435846987261")

The best CNN has the parameter:
Number of filters = 32
Number of learning rate = 0.001
Corresponding f1score = 0.97435846987261
