# Assignment #1: Applying kfold Cross-Validation on the mnist Dataset

In [1]:
from sklearn.datasets import fetch_openml
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Input,Dense
from sklearn.metrics import classification_report

mnist=fetch_openml('mnist_784',as_frame=False)
X,y=mnist.data,mnist.target

y_5 = y =='5'

X_train,X_test,y_train,y_test = X[:60000],X[60000:], y_5[:60000],y_5[60000:]
X_trainset,X_valid,y_trainset,y_valid = X_train[:-10000],X_train[-10000:], y_train[:-10000],y_train[-10000:]

# Normalization (0 ~ 1) -> To avoid overflow
X_trainset_scaled = X_trainset / 255.0
X_valid_scaled = X_valid / 255.0
X_test_scaled = X_test / 255.0

model1 = Sequential()
input_shape = X_trainset_scaled.shape[1:]

model1.add(Dense(128,input_shape=input_shape,activation='relu'))
model1.add(Dense(128,activation='relu'))
model1.add(Dense(1,activation='sigmoid'))

model1.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])
model1.summary()
history1=model1.fit(X_trainset_scaled, y_trainset, epochs=10, batch_size=32, validation_data=(X_valid_scaled, y_valid))

predictions1=model1.predict(X_test_scaled)
y_hat=(predictions1>0.5).astype(int)

print(classification_report(y_test, y_hat))

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/10
[1m1563/1563[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 5ms/step - accuracy: 0.9709 - loss: 0.0884 - val_accuracy: 0.9887 - val_loss: 0.0369
Epoch 2/10
[1m1563/1563[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 4ms/step - accuracy: 0.9936 - loss: 0.0191 - val_accuracy: 0.9914 - val_loss: 0.0302
Epoch 3/10
[1m1563/1563[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 4ms/step - accuracy: 0.9959 - loss: 0.0126 - val_accuracy: 0.9914 - val_loss: 0.0316
Epoch 4/10
[1m1563/1563[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 5ms/step - accuracy: 0.9970 - loss: 0.0093 - val_accuracy: 0.9916 - val_loss: 0.0379
Epoch 5/10
[1m1563/1563[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 4ms/step - accuracy: 0.9970 - loss: 0.0078 - val_accuracy: 0.9932 - val_loss: 0.0286
Epoch 6/10
[1m1563/1563[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 5ms/step - accuracy: 0.9980 - loss: 0.0063 - val_accuracy: 0.9930 - val_loss: 0.0377
Epoch 7/10
[1

In [4]:
from sklearn.model_selection import StratifiedKFold

X_trainset_scaled = X_train / 255.0

# Set up K-Fold
k = 6
seed=1404
skf = StratifiedKFold(n_splits=k, shuffle=True, random_state=seed)

fold = 1
reports = []

for train_index, val_index in skf.split(X_trainset_scaled, y_train):
    print(f"🌀 Fold {fold}")

    X_train_fold, X_val_fold = X_trainset_scaled[train_index], X_trainset_scaled[val_index]
    y_train_fold, y_val_fold = y_train[train_index], y_train[val_index]

    model = Sequential([
        Dense(128, activation='relu', input_shape=input_shape),
        Dense(128, activation='relu'),
        Dense(1, activation='sigmoid')
    ])

    model.compile(optimizer='adam',
                  loss='binary_crossentropy',
                  metrics=['accuracy'])

    model.fit(X_train_fold, y_train_fold,
              epochs=10,
              batch_size=32,
              verbose=0,
              validation_data=(X_val_fold, y_val_fold))

    val_preds = (model.predict(X_val_fold) > 0.5).astype(int)
    report = classification_report(y_val_fold, val_preds, output_dict=True)
    reports.append(report)

    print(fold, classification_report(y_val_fold, val_preds))
    fold += 1

🌀 Fold 1


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step
1               precision    recall  f1-score   support

       False       1.00      1.00      1.00      9096
        True       0.97      0.96      0.96       904

    accuracy                           0.99     10000
   macro avg       0.98      0.98      0.98     10000
weighted avg       0.99      0.99      0.99     10000

🌀 Fold 2


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step
2               precision    recall  f1-score   support

       False       1.00      1.00      1.00      9096
        True       0.98      0.97      0.97       904

    accuracy                           1.00     10000
   macro avg       0.99      0.98      0.99     10000
weighted avg       1.00      1.00      1.00     10000

🌀 Fold 3


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step
3               precision    recall  f1-score   support

       False       1.00      1.00      1.00      9096
        True       0.97      0.96      0.97       904

    accuracy                           0.99     10000
   macro avg       0.98      0.98      0.98     10000
weighted avg       0.99      0.99      0.99     10000

🌀 Fold 4


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step
4               precision    recall  f1-score   support

       False       0.99      1.00      1.00      9097
        True       0.98      0.95      0.96       903

    accuracy                           0.99     10000
   macro avg       0.99      0.97      0.98     10000
weighted avg       0.99      0.99      0.99     10000

🌀 Fold 5


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step
5               precision    recall  f1-score   support

       False       1.00      1.00      1.00      9097
        True       0.98      0.95      0.97       903

    accuracy                           0.99     10000
   macro avg       0.99      0.98      0.98     10000
weighted avg       0.99      0.99      0.99     10000

🌀 Fold 6


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step
6               precision    recall  f1-score   support

       False       1.00      1.00      1.00      9097
        True       0.99      0.95      0.97       903

    accuracy                           0.99     10000
   macro avg       0.99      0.98      0.98     10000
weighted avg       0.99      0.99      0.99     10000



# Calculation of average f1-score for class 'true'

In [15]:
f1_scores = []

for r in reports:
    f1_scores.append(r.get('True', {}).get('f1-score', 0.0))

print(f"Average F1-score (positive class - '5'): {np.mean(f1_scores):.4f}")

Average F1-score (positive class - '5'): 0.9679


# Assignment # 2: Optimization of the Neural Network for Cat vs Noncat Problem

In [16]:
import numpy as np
from utils import dataset_loader
from sklearn.model_selection import train_test_split
import tensorflow as tf
#from tensorflow.keras.utils import plot_model


X_train,y_train,X_test,y_test = dataset_loader('./')

# reshape -> flat
X_train_flat=X_train.reshape(X_train.shape[0],-1)
X_train_flat.shape
X_test_flat=X_test.reshape(X_test.shape[0],-1)
X_test_flat.shape

# normalize
X_train_scaled=X_train_flat / 255.0
X_test_scaled=X_test_flat / 255.0

input_shape=X_train_scaled.shape[1:]

# fit model (2 Hidden Layers)
model=Sequential([
    Input(shape=input_shape),
    Dense(64,activation='relu'),
    Dense(64,activation='relu'),
    Dense(1,activation='sigmoid')
])

#tf.keras.utils.plot_model(model,'test.png',show_shapes=True)
#plot_model(model,'test.png',show_shapes=True)

model.compile(
    optimizer= tf.keras.optimizers.Adam(),
    loss=tf.keras.losses.BinaryCrossentropy(),
    metrics=[tf.keras.metrics.BinaryAccuracy()]
    )

history=model.fit(X_train_scaled,y_train,epochs=10,batch_size=32)
predictions=model.predict(X_test_scaled)

y_hat=(predictions>0.5).astype(int)
print(classification_report(y_test,y_hat))

Epoch 1/10
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 45ms/step - binary_accuracy: 0.5236 - loss: 2.1041
Epoch 2/10
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 37ms/step - binary_accuracy: 0.5105 - loss: 0.9370
Epoch 3/10
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step - binary_accuracy: 0.6698 - loss: 0.6213
Epoch 4/10
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step - binary_accuracy: 0.6720 - loss: 0.5983
Epoch 5/10
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step - binary_accuracy: 0.6529 - loss: 0.6371
Epoch 6/10
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step - binary_accuracy: 0.6954 - loss: 0.5740
Epoch 7/10
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step - binary_accuracy: 0.6585 - loss: 0.5899
Epoch 8/10
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step - binary_accuracy: 0.7593 - loss: 0.5209
Epoch 9/

#Trying Different Optimizers

In [21]:
from sklearn.metrics import classification_report, f1_score
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Input
import os
import random
import numpy as np
import tensorflow as tf

# Set seeds
seed=1404
os.environ['PYTHONHASHSEED'] = '1404'
random.seed(seed)
np.random.seed(seed)
tf.random.set_seed(seed)
tf.config.experimental.enable_op_determinism()

optimizers = [
    tf.keras.optimizers.SGD(),
    tf.keras.optimizers.RMSprop(),
    tf.keras.optimizers.Adam(),
    tf.keras.optimizers.AdamW(),
    tf.keras.optimizers.Nadam()
]

for opt in optimizers:
    print(f"\nTesting optimizer: {opt._name if hasattr(opt, '_name') else opt.__class__.__name__}")

    model = Sequential([
        Input(shape=input_shape),
        Dense(64, activation='relu'),
        Dense(64, activation='relu'),
        Dense(1, activation='sigmoid')
    ])

    model.compile(
        optimizer=opt,
        loss=tf.keras.losses.BinaryCrossentropy(),
        metrics=[tf.keras.metrics.BinaryAccuracy()]
    )

    model.fit(X_train_scaled, y_train, epochs=10, batch_size=32, verbose=0)
    predictions = model.predict(X_test_scaled)
    y_hat = (predictions > 0.5).astype(int)

    f1 = f1_score(y_test, y_hat)
    print(f"F1-Score with {opt.__class__.__name__}: {f1:.4f}")


Testing optimizer: SGD
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 64ms/step
F1-Score with SGD: 0.7742

Testing optimizer: RMSprop
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 64ms/step
F1-Score with RMSprop: 0.5283

Testing optimizer: Adam
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 60ms/step
F1-Score with Adam: 0.8824

Testing optimizer: AdamW
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 56ms/step
F1-Score with AdamW: 0.1622

Testing optimizer: Nadam
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 55ms/step
F1-Score with Nadam: 0.5769


In [None]:
# Assignment # 3: Selecting Activation Function for the Neural Network for Cat vs Noncat Problem: Adam Optimzer with k=5 cross-validation

In [33]:
import numpy as np
import tensorflow as tf
from sklearn.model_selection import KFold
#from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import f1_score
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Input

# For reproducibility
import os, random
seed=1404
os.environ['PYTHONHASHSEED'] = '1404'
random.seed(seed)
np.random.seed(seed)
tf.random.set_seed(seed)
tf.config.experimental.enable_op_determinism()

input_shape = X_train_scaled.shape[1:]

kf = KFold(n_splits=5, shuffle=True, random_state=seed)
#skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=seed)

activations = ['relu', 'sigmoid', 'tanh']

for activation in activations:
    print(f"\n Evaluating activation function: {activation}")
    f1_scores = []

    for train_idx, val_idx in kf.split(X_train_scaled, y_train):
        X_tr, X_val = X_train_scaled[train_idx], X_train_scaled[val_idx]
        y_tr, y_val = y_train[train_idx], y_train[val_idx]

        model = Sequential([
            Input(shape=input_shape),
            Dense(64, activation=activation),
            Dense(64, activation=activation),
            Dense(1, activation='sigmoid')
        ])

        model.compile(
            optimizer=tf.keras.optimizers.Adam(),
            loss='binary_crossentropy',
            metrics=[tf.keras.metrics.BinaryAccuracy()]
        )

        model.fit(X_tr, y_tr, epochs=10, batch_size=32, verbose=0)
        y_pred = (model.predict(X_val) > 0.5).astype(int)
        f1 = f1_score(y_val, y_pred)
        f1_scores.append(f1)

    mean_f1 = np.mean(f1_scores)
    std_f1 = np.std(f1_scores)
    print(f"{activation} — Mean F1-score: {mean_f1:.4f} ± {std_f1:.4f}")


 Evaluating activation function: relu
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 60ms/step
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 56ms/step
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 58ms/step
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 66ms/step
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 117ms/step
relu — Mean F1-score: 0.2862 ± 0.0893

 Evaluating activation function: sigmoid
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 54ms/step
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 56ms/step
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 56ms/step
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 83ms/step
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 56ms/step
sigmoid — Mean F1-score: 0.0000 ± 0.0000

 Evaluating activation function: tanh
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 60ms/step
[1m2/2