<a href="https://colab.research.google.com/github/m-xsk/m-xsk/blob/main/CNN%26MLPOpyimizers.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install -q tensorflow tensorflow-datasets matplotlib pandas

# Import necessary libraries
import tensorflow as tf
import tensorflow_datasets as tfds
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

from tensorflow.keras import layers, models, optimizers, losses
from sklearn.model_selection import train_test_split

In [None]:
# Load the rock_paper_scissors dataset
dataset, info = tfds.load('rock_paper_scissors', with_info=True, as_supervised=True)
train_ds_raw, test_ds_raw = dataset['train'], dataset['test']

# Resize dimension
IMG_SIZE = 100

# Define preprocessing function
def preprocess(image, label):
    image = tf.image.resize(image, (IMG_SIZE, IMG_SIZE))
    image = tf.cast(image, tf.float32) / 255.0
    return image, label

# TensorFlow data pipeline for CNN training
BATCH_SIZE = 32

# Split the train_ds into train and validation sets (90% train, 10% validation)
train_size = int(0.9 * len(train_ds_raw))
val_size = len(train_ds_raw) - train_size

# Shuffle and split the train_ds into train_ds and val_ds
train_ds = (
    train_ds_raw
    .take(train_size)  # 90% of the dataset for training
    .map(preprocess, num_parallel_calls=tf.data.AUTOTUNE)
    .cache()
    .shuffle(1000)
    .batch(BATCH_SIZE)
    .prefetch(tf.data.AUTOTUNE)
)

val_ds = (
    train_ds_raw
    .skip(train_size)  # 10% for validation
    .map(preprocess, num_parallel_calls=tf.data.AUTOTUNE)
    .batch(BATCH_SIZE)
    .cache()
    .prefetch(tf.data.AUTOTUNE)
)

# Prepare data as NumPy arrays for MLP models
images_all = []
labels_all = []

for image, label in tfds.as_numpy(train_ds_raw.map(preprocess)):
    images_all.append(image)
    labels_all.append(label)

images_all = np.array(images_all)
labels_all = np.array(labels_all)

# Split into training and validation sets (90% train, 10% val)
images_train, images_val, labels_train, labels_val = train_test_split(
    images_all, labels_all, test_size=0.1, random_state=42, stratify=labels_all
)

# Prepare test_ds (no changes needed)
test_ds = (
    test_ds_raw
    .map(preprocess, num_parallel_calls=tf.data.AUTOTUNE)
    .batch(BATCH_SIZE)
    .cache()
    .prefetch(tf.data.AUTOTUNE)
)



In [None]:
def create_mlp_model(optimizer, input_shape=(100, 100, 3), num_classes=3):
    model = models.Sequential([
        layers.Flatten(input_shape=input_shape),
        layers.Dense(512, activation='relu'),
        layers.Dropout(0.3),
        layers.Dense(256, activation='relu'),
        layers.Dropout(0.3),
        layers.Dense(num_classes, activation='softmax')
    ])

    # Compile the model
    model.compile(
        optimizer=optimizer,
        loss=losses.SparseCategoricalCrossentropy(from_logits=False),
        metrics=['acc']
    )

    return model


In [None]:
optimizers_dict = {
    'SGD': optimizers.SGD(),
    'SGD_Momentum': optimizers.SGD(momentum=0.9),
    'Adagrad': optimizers.Adagrad(),
    'RMSProp': optimizers.RMSprop(),
    'Adam': optimizers.Adam()
}


In [None]:
mlp_histories = {}
mlp_models = {}


In [None]:
# Initialize a dictionary to store history of each optimizer's training
history_mlp = {}
best_model_mlp = None
best_optimizer_mlp = ""
best_acc_mlp = 0

# Iterate through each optimizer in the optimizer list
for optimizer_name, optimizer in optimizers_dict.items():
    print(f"Training with {optimizer_name}")

    # Create and compile the MLP model using the current optimizer
    model_mlp = create_mlp_model(optimizer)  # Assuming model creation function already uses Input()

    # Train the model using train and validation data from NumPy arrays
    hist_mlp = model_mlp.fit(
        images_train, labels_train,
        validation_data=(images_val, labels_val),
        epochs=5, verbose=1
    )

    # Store the training history for each optimizer
    history_mlp[optimizer_name] = hist_mlp.history

    # Get the validation accuracy of the current model
    current_val_acc_mlp = max(hist_mlp.history['val_acc'])

    # Update the best model if the current model's accuracy is higher
    if best_model_mlp is None or current_val_acc_mlp > best_acc_mlp:
        best_model_mlp = model_mlp
        best_acc_mlp = current_val_acc_mlp
        best_optimizer_mlp = optimizer_name

# Output the best optimizer and its validation accuracy
print(f"Best MLP optimizer: {best_optimizer_mlp} with validation accuracy: {best_acc_mlp:.4f}")


Training with SGD


  super().__init__(**kwargs)


Epoch 1/5
[1m71/71[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 95ms/step - acc: 0.3352 - loss: 3.2193 - val_acc: 0.3333 - val_loss: 1.1000
Epoch 2/5
[1m71/71[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 89ms/step - acc: 0.3460 - loss: 1.1025 - val_acc: 0.4405 - val_loss: 1.0917
Epoch 3/5
[1m71/71[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 107ms/step - acc: 0.3813 - loss: 1.0887 - val_acc: 0.3333 - val_loss: 1.0989
Epoch 4/5
[1m71/71[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 90ms/step - acc: 0.3327 - loss: 1.0982 - val_acc: 0.3333 - val_loss: 1.0988
Epoch 5/5
[1m71/71[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 104ms/step - acc: 0.3373 - loss: 1.0991 - val_acc: 0.3333 - val_loss: 1.0987
Training with SGD_Momentum
Epoch 1/5
[1m71/71[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 242ms/step - acc: 0.3482 - loss: 4.1561 - val_acc: 0.3333 - val_loss: 1.0991
Epoch 2/5
[1m71/71[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m

In [None]:
# After training the MLP models, print the training and validation history for each epoch
for optimizer_name, hist in history_mlp.items():
    print(f"\n--- Training History for {optimizer_name} ---")

    # Iterate through each epoch and print metrics
    for epoch in range(len(hist['loss'])):
        print(f"Epoch {epoch + 1}:")
        print(f"  Training Loss: {hist['loss'][epoch]:.4f}")
        print(f"  Training Accuracy: {hist['acc'][epoch]:.4f}")
        print(f"  Validation Loss: {hist['val_loss'][epoch]:.4f}")
        print(f"  Validation Accuracy: {hist['val_acc'][epoch]:.4f}")
        print("-" * 40)



--- Training History for SGD ---
Epoch 1:
  Training Loss: 1.7542
  Training Accuracy: 0.3364
  Validation Loss: 1.1000
  Validation Accuracy: 0.3333
----------------------------------------
Epoch 2:
  Training Loss: 1.0969
  Training Accuracy: 0.3576
  Validation Loss: 1.0917
  Validation Accuracy: 0.4405
----------------------------------------
Epoch 3:
  Training Loss: 1.0868
  Training Accuracy: 0.3814
  Validation Loss: 1.0989
  Validation Accuracy: 0.3333
----------------------------------------
Epoch 4:
  Training Loss: 1.0962
  Training Accuracy: 0.3439
  Validation Loss: 1.0988
  Validation Accuracy: 0.3333
----------------------------------------
Epoch 5:
  Training Loss: 1.0990
  Training Accuracy: 0.3316
  Validation Loss: 1.0987
  Validation Accuracy: 0.3333
----------------------------------------

--- Training History for SGD_Momentum ---
Epoch 1:
  Training Loss: 2.2864
  Training Accuracy: 0.3289
  Validation Loss: 1.0991
  Validation Accuracy: 0.3333
----------------

In [None]:
def cnn_model(optimizer):
    model = tf.keras.Sequential([
        tf.keras.layers.Input(shape=(100, 100, 3)),  # Changed input shape to (100, 100, 3)
        tf.keras.layers.Conv2D(32, (3, 3), activation='relu'),
        tf.keras.layers.MaxPooling2D(),
        tf.keras.layers.Conv2D(64, (3, 3), activation='relu'),
        tf.keras.layers.MaxPooling2D(),
        tf.keras.layers.Conv2D(128, (3, 3), activation='relu'),
        tf.keras.layers.MaxPooling2D(),
        tf.keras.layers.Flatten(),
        tf.keras.layers.Dense(128, activation='relu'),
        tf.keras.layers.Dense(64, activation='relu'),  # <- Added hidden layer
        tf.keras.layers.Dense(3, activation='softmax')
    ])

    model.compile(
        optimizer=optimizer,
        loss=losses.SparseCategoricalCrossentropy(from_logits=False),
        metrics=['accuracy']
    )
    return model

In [None]:
# Redefine optimizer list in case it was lost
optimizer_list = {
    "SGD": tf.keras.optimizers.SGD(),
    "SGD_Momentum": tf.keras.optimizers.SGD(momentum=0.9),
    "Adagrad": tf.keras.optimizers.Adagrad(),
    "RMSProp": tf.keras.optimizers.RMSprop(),
    "Adam": tf.keras.optimizers.Adam()
}


In [None]:
# Create a dictionary to store training history for CNN models
history_cnn = {}
best_cnn_model = None
best_cnn_optimizer = ""
best_cnn_acc = 0

# Assuming you want to use 10% of the training data for validation
val_split = 0.1
val_size = int(val_split * len(train_ds_raw))

# Split the training dataset into training and validation sets
val_ds_raw = train_ds_raw.take(val_size)
train_ds_raw = train_ds_raw.skip(val_size)


# Apply preprocessing and batching to the validation dataset
val_ds = (
    val_ds_raw
    .map(preprocess, num_parallel_calls=tf.data.AUTOTUNE)
    .batch(BATCH_SIZE)
    .cache()
    .prefetch(tf.data.AUTOTUNE)
)

# Train CNN model with each optimizer
for name, opt in optimizer_list.items():
    print(f"\n🔧 Training CNN model with {name} optimizer...")
    model_cnn = cnn_model(opt)  # Create the CNN model using the current optimizer

    # Train the model using the correct dataset variables: `train_ds` and `val_ds`
    hist = model_cnn.fit(train_ds, validation_data=val_ds, epochs=5, verbose=1)

    # Store the history of training and validation metrics
    history_cnn[name] = hist.history

    # Select best model based on validation accuracy
    current_val_acc = max(hist.history['val_accuracy'])
    if best_cnn_model is None or current_val_acc > best_cnn_acc:
        best_cnn_model = model_cnn
        best_cnn_acc = current_val_acc
        best_cnn_optimizer = name

# Output the best optimizer and model
print(f"\n✅ Best CNN optimizer: {best_cnn_optimizer} with Validation Accuracy: {best_cnn_acc:.4f}")


🔧 Training CNN model with SGD optimizer...
Epoch 1/5
[1m71/71[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[0m 549ms/step - accuracy: 0.4178 - loss: 1.0779 - val_accuracy: 0.4524 - val_loss: 1.0280
Epoch 2/5
[1m71/71[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m38s[0m 533ms/step - accuracy: 0.5592 - loss: 0.9713 - val_accuracy: 0.7579 - val_loss: 0.7669
Epoch 3/5
[1m71/71[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m38s[0m 541ms/step - accuracy: 0.6662 - loss: 0.7657 - val_accuracy: 0.7183 - val_loss: 0.6231
Epoch 4/5
[1m71/71[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m38s[0m 534ms/step - accuracy: 0.8175 - loss: 0.5095 - val_accuracy: 0.9444 - val_loss: 0.2248
Epoch 5/5
[1m71/71[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m42s[0m 592ms/step - accuracy: 0.9376 - loss: 0.2260 - val_accuracy: 0.9643 - val_loss: 0.1358

🔧 Training CNN model with SGD_Momentum optimizer...
Epoch 1/5
[1m71/71[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 555ms/step - accur

In [None]:
print("\n📊 Training History for Each CNN Optimizer:\n")

for optimizer_name, history in history_cnn.items():
    print(f"🔧 Optimizer: {optimizer_name}")
    for epoch in range(len(history['loss'])):
        train_loss = history['loss'][epoch]
        train_acc = history['accuracy'][epoch]
        val_loss = history['val_loss'][epoch]
        val_acc = history['val_accuracy'][epoch]
        print(
            f"  Epoch {epoch+1}: "
            f"Train Loss = {train_loss:.4f}, "
            f"Train Acc = {train_acc:.4f}, "
            f"Val Loss = {val_loss:.4f}, "
            f"Val Acc = {val_acc:.4f}"
        )
    print("-" * 60)



📊 Training History for Each CNN Optimizer:

🔧 Optimizer: SGD
  Epoch 1: Train Loss = 1.0584, Train Acc = 0.4612, Val Loss = 1.0280, Val Acc = 0.4524
  Epoch 2: Train Loss = 0.9294, Train Acc = 0.5785, Val Loss = 0.7669, Val Acc = 0.7579
  Epoch 3: Train Loss = 0.7034, Train Acc = 0.7094, Val Loss = 0.6231, Val Acc = 0.7183
  Epoch 4: Train Loss = 0.4273, Train Acc = 0.8607, Val Loss = 0.2248, Val Acc = 0.9444
  Epoch 5: Train Loss = 0.1980, Train Acc = 0.9458, Val Loss = 0.1358, Val Acc = 0.9643
------------------------------------------------------------
🔧 Optimizer: SGD_Momentum
  Epoch 1: Train Loss = 1.0408, Train Acc = 0.4581, Val Loss = 0.9289, Val Acc = 0.5119
  Epoch 2: Train Loss = 0.3803, Train Acc = 0.8558, Val Loss = 0.0592, Val Acc = 0.9881
  Epoch 3: Train Loss = 0.0463, Train Acc = 0.9868, Val Loss = 0.0177, Val Acc = 0.9921
  Epoch 4: Train Loss = 0.0097, Train Acc = 0.9974, Val Loss = 0.0097, Val Acc = 0.9960
  Epoch 5: Train Loss = 0.0024, Train Acc = 0.9996, Val Los

In [None]:
from tabulate import tabulate

# Function to extract final epoch metrics
def extract_final_metrics(history, is_cnn=False):
    loss_key = 'loss'
    acc_key = 'accuracy' if is_cnn else 'acc'
    val_loss_key = 'val_loss'
    val_acc_key = 'val_accuracy' if is_cnn else 'val_acc'

    results = []
    for opt_name, hist in history.items():
        results.append([
            opt_name,
            hist[loss_key][-1],
            hist[acc_key][-1],
            hist[val_loss_key][-1],
            hist[val_acc_key][-1]
        ])
    return results

# Extract metrics for MLP and CNN
mlp_metrics = extract_final_metrics(history_mlp, is_cnn=False)
cnn_metrics = extract_final_metrics(history_cnn, is_cnn=True)

# Add model names for the table
mlp_rows = [["MLP", *row] for row in mlp_metrics]
cnn_rows = [["CNN", *row] for row in cnn_metrics]

# Combine for a full table
full_results = mlp_rows + cnn_rows

# Define headers
headers = ["Model", "Optimizer", "Train Loss", "Train Acc", "Validation Loss", "Validation Acc"]

# Print table using tabulate
print(tabulate(full_results, headers=headers, floatfmt=".4f", tablefmt="grid"))


+---------+--------------+--------------+-------------+-------------------+------------------+
| Model   | Optimizer    |   Train Loss |   Train Acc |   Validation Loss |   Validation Acc |
| MLP     | SGD          |       1.0990 |      0.3316 |            1.0987 |           0.3333 |
+---------+--------------+--------------+-------------+-------------------+------------------+
| MLP     | SGD_Momentum |       1.0997 |      0.3338 |            1.0987 |           0.3333 |
+---------+--------------+--------------+-------------+-------------------+------------------+
| MLP     | Adagrad      |       0.8330 |      0.6182 |            0.6771 |           0.8413 |
+---------+--------------+--------------+-------------+-------------------+------------------+
| MLP     | RMSProp      |       1.0987 |      0.3276 |            1.0986 |           0.3333 |
+---------+--------------+--------------+-------------+-------------------+------------------+
| MLP     | Adam         |       1.0215 |      0.4