<a href="https://colab.research.google.com/github/prottushee/IDS_CICIoT2023/blob/main/MeanTeacher(CICIot2023).ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from google.colab import drive
drive.mount('/content/drive')


Mounted at /content/drive


In [None]:
!pip install tensorflow

Collecting tensorflow
  Downloading tensorflow-2.19.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (4.1 kB)
Collecting astunparse>=1.6.0 (from tensorflow)
  Downloading astunparse-1.6.3-py2.py3-none-any.whl.metadata (4.4 kB)
Collecting flatbuffers>=24.3.25 (from tensorflow)
  Downloading flatbuffers-25.2.10-py2.py3-none-any.whl.metadata (875 bytes)
Collecting google-pasta>=0.1.1 (from tensorflow)
  Downloading google_pasta-0.2.0-py3-none-any.whl.metadata (814 bytes)
Collecting libclang>=13.0.0 (from tensorflow)
  Downloading libclang-18.1.1-py2.py3-none-manylinux2010_x86_64.whl.metadata (5.2 kB)
Collecting tensorboard~=2.19.0 (from tensorflow)
  Downloading tensorboard-2.19.0-py3-none-any.whl.metadata (1.8 kB)
Collecting tensorflow-io-gcs-filesystem>=0.23.1 (from tensorflow)
  Downloading tensorflow_io_gcs_filesystem-0.37.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (14 kB)
Collecting wheel<1.0,>=0.23.0 (from astunparse>=1.6.0->tensorflow

In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.utils.class_weight import compute_class_weight
from sklearn.metrics import classification_report, accuracy_score

# Load saved SMOTE-balanced labeled dataset
df = pd.read_csv("/content/drive/MyDrive/CICIoT2023_Clients/labeled_balanced_20percent.csv")

# Re-encode labels (ensures consistency)
le = LabelEncoder()
df['label'] = le.fit_transform(df['label'])

# Feature-label split
X = df.drop(columns=['label'])
y = df['label']

# Normalize features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Recalculate class weights
class_weights = compute_class_weight(class_weight='balanced', classes=np.unique(y), y=y)
class_weight_dict = dict(zip(np.unique(y), class_weights))

# Train-Test split (80/20)
X_train, X_val, y_train, y_val = train_test_split(X_scaled, y, test_size=0.2, stratify=y, random_state=42)

print(f"✅ Train: {X_train.shape}, Val: {X_val.shape}")


✅ Train: (2839961, 32), Val: (709991, 32)


In [None]:
# ======================== Install keras-tuner if not installed ========================
!pip install keras-tuner --quiet

# ======================== Import libraries ========================
import numpy as np
import tensorflow as tf
from tensorflow import keras
from keras_tuner import RandomSearch
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import classification_report, accuracy_score
from tensorflow.keras.losses import SparseCategoricalCrossentropy

# ======================== Assume your preprocessed data is ready ========================
# X_train, y_train, X_val, y_val, class_weight_dict, le (LabelEncoder for decoding labels)
# num_classes should be:
num_classes = len(np.unique(y_train))

# ======================== Build model function for tuner ========================
def build_model(hp):
    inputs = keras.Input(shape=(X_train.shape[1],))
    x = inputs

    # Hyperparameter: number of layers
    for i in range(hp.Int("num_layers", 2, 4)):
        units = hp.Int(f"units_{i}", min_value=64, max_value=512, step=64)
        x = keras.layers.Dense(units, activation="relu")(x)
        x = keras.layers.BatchNormalization()(x)
        dropout_rate = hp.Float(f"dropout_{i}", min_value=0.1, max_value=0.5, step=0.1)
        x = keras.layers.Dropout(dropout_rate)(x)

    outputs = keras.layers.Dense(num_classes, activation="softmax")(x)
    model = keras.Model(inputs, outputs)

    # Hyperparameter: learning rate (Adam only)
    lr = hp.Float("lr", min_value=1e-4, max_value=1e-2, sampling="log")
    optimizer = keras.optimizers.Adam(learning_rate=lr)

    model.compile(
        optimizer=optimizer,
        loss=SparseCategoricalCrossentropy(),
        metrics=["accuracy"]
    )

    return model

# ======================== Set tuner ========================
tuner = RandomSearch(
    build_model,
    objective="val_accuracy",
    max_trials=20,  # You can increase this later
    executions_per_trial=1,
    directory="dnn_tuning",
    project_name="ciciot2023_tuning_adam_only"
)

# ======================== Run hyperparameter search ========================
tuner.search(
    X_train, y_train,
    validation_data=(X_val, y_val),
    epochs=30,
    batch_size=128,
    class_weight=class_weight_dict,
    callbacks=[keras.callbacks.EarlyStopping(patience=5, restore_best_weights=True)],
    verbose=2
)

# ======================== Get the best model ========================
best_model = tuner.get_best_models(num_models=1)[0]

# Evaluate on validation set
val_loss, val_acc = best_model.evaluate(X_val, y_val)
print(f"\n✅ Best model validation accuracy: {val_acc * 100:.2f}%")

# ======================== Evaluation block ========================

# Make predictions
y_pred_prob = best_model.predict(X_val, batch_size=512)
y_pred = np.argmax(y_pred_prob, axis=1)

# Accuracy
acc = accuracy_score(y_val, y_pred)
print(f"\n✅ Hyperparameter Tuned DNN Accuracy: {acc * 100:.2f}%")

# Classification report with proper label decoding
present_labels = np.unique(y_val)
present_class_names = [str(cls_name) for cls_name in le.inverse_transform(present_labels)]

print("\n📊 Classification Report:")
print(classification_report(y_val, y_pred, labels=present_labels, target_names=present_class_names))


Trial 2 Complete [01h 08m 12s]
val_accuracy: 0.8509164452552795

Best val_accuracy So Far: 0.8509164452552795
Total elapsed time: 01h 34m 18s

Search: Running Trial #3

Value             |Best Value So Far |Hyperparameter
3                 |3                 |num_layers
320               |320               |units_0
0.2               |0.3               |dropout_0
384               |384               |units_1
0.5               |0.1               |dropout_1
0.00025612        |0.004759          |lr
512               |64                |units_2
0.4               |0.1               |dropout_2

Epoch 1/30
22188/22188 - 485s - 22ms/step - accuracy: 0.6847 - loss: 0.8349 - val_accuracy: 0.7902 - val_loss: 0.5183
Epoch 2/30
22188/22188 - 481s - 22ms/step - accuracy: 0.7677 - loss: 0.5849 - val_accuracy: 0.8027 - val_loss: 0.4671
Epoch 3/30
22188/22188 - 483s - 22ms/step - accuracy: 0.7838 - loss: 0.5326 - val_accuracy: 0.8097 - val_loss: 0.4508
Epoch 4/30
22188/22188 - 481s - 22ms/step - accurac

#Baseline DNN with 100% labeled data

In [None]:
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense, Dropout, BatchNormalization
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.losses import SparseCategoricalCrossentropy

# Build the DNN
def build_dnn(input_dim, num_classes):
    inputs = Input(shape=(input_dim,))
    x = Dense(512, activation='relu')(inputs)
    x = BatchNormalization()(x)
    x = Dropout(0.4)(x)
    x = Dense(256, activation='relu')(x)
    x = BatchNormalization()(x)
    x = Dropout(0.3)(x)
    x = Dense(128, activation='relu')(x)
    x = BatchNormalization()(x)
    outputs = Dense(num_classes, activation='softmax')(x)
    return Model(inputs, outputs)

# Compile model
num_classes = len(np.unique(y))
dnn_model = build_dnn(X_train.shape[1], num_classes)
dnn_model.compile(
   optimizer = tf.keras.optimizers.RMSprop(learning_rate=0.001, rho=0.9),
    loss=SparseCategoricalCrossentropy(),
    metrics=['accuracy']
)

# Train
history = dnn_model.fit(
    X_train, y_train,
    validation_data=(X_val, y_val),
    epochs=10,
    batch_size=512,
    class_weight=class_weight_dict,
    callbacks=[tf.keras.callbacks.EarlyStopping(patience=2, restore_best_weights=True)]
)


ModuleNotFoundError: No module named 'tensorflow'

In [None]:
# ========================== Evaluation Block  of baseline DNN training with 100% label & rmsprop optimizer ==========================

from sklearn.metrics import classification_report, accuracy_score
import numpy as np

# Make predictions
batch_size = 512 # Define batch_size
y_pred_prob = dnn_model.predict(X_val, batch_size=batch_size)
y_pred = np.argmax(y_pred_prob, axis=1)

# Calculate and print accuracy
acc = accuracy_score(y_val, y_pred)
print(f"\n✅ Supervised DNN Accuracy: {acc * 100:.2f}%")

# Handle label decoding properly
present_labels = np.unique(y_val)
present_class_names = [str(cls_name) for cls_name in le.inverse_transform(present_labels)]

# Print classification report
print("\n📊 Classification Report:")
print(classification_report(y_val, y_pred, labels=present_labels, target_names=present_class_names))

[1m1387/1387[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 10ms/step

✅ Supervised DNN Accuracy: 83.10%

📊 Classification Report:
              precision    recall  f1-score   support

           0       0.96      1.00      0.98     22187
           1       0.57      0.64      0.60     22187
           2       0.91      1.00      0.95     22187
           3       0.94      0.99      0.96     22187
           4       0.99      1.00      0.99     22187
           5       0.93      0.89      0.91     22187
           6       1.00      1.00      1.00     22188
           7       0.99      1.00      0.99     22188
           8       1.00      1.00      1.00     22187
           9       1.00      1.00      1.00     22187
          10       0.48      0.20      0.29     22188
          11       0.42      0.73      0.53     22187
          12       0.57      0.79      0.66     22187
          13       0.99      1.00      0.99     22187
          14       0.77      0.70      0.73     2

#Baseline DNN with 20% labeled datas

In [None]:
import tensorflow as tf
import numpy as np
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical
from tqdm import tqdm

# Assume X_train, X_val, y_train, y_val already exist
X_lab, _, y_lab, _ = train_test_split(
    X_train, y_train, test_size=0.8, stratify=y_train, random_state=42
)

X_lab = np.array(X_lab)
y_lab = np.array(y_lab)
X_test = X_val
y_test = y_val
num_classes = len(np.unique(y_train))


def build_dnn(input_dim, num_classes):
    inputs = tf.keras.Input(shape=(input_dim,))
    x = tf.keras.layers.Dense(512, activation='relu')(inputs)
    x = tf.keras.layers.BatchNormalization()(x)
    x = tf.keras.layers.Dropout(0.4)(x)
    x = tf.keras.layers.Dense(256, activation='relu')(x)
    x = tf.keras.layers.BatchNormalization()(x)
    x = tf.keras.layers.Dropout(0.3)(x)
    x = tf.keras.layers.Dense(128, activation='relu')(x)
    x = tf.keras.layers.BatchNormalization()(x)
    outputs = tf.keras.layers.Dense(num_classes, activation='softmax')(x)
    return tf.keras.Model(inputs, outputs)

optimizers = {
    "Adam": tf.keras.optimizers.Adam(),
    "RMSprop": tf.keras.optimizers.RMSprop(learning_rate=0.001),
    "SGD": tf.keras.optimizers.SGD(learning_rate=0.01, momentum=0.9)
}

loss_fn = tf.keras.losses.SparseCategoricalCrossentropy()
batch_size = 512
epochs = 10

baseline_results = {}

for name, optimizer in optimizers.items():
    print(f"\nTraining Baseline DNN with {name}")
    model = build_dnn(X_lab.shape[1], num_classes)
    model.compile(optimizer=optimizer, loss=loss_fn, metrics=['accuracy'])

    history = model.fit(
        X_lab, y_lab,
        validation_data=(X_test, y_test),
        epochs=epochs,
        batch_size=batch_size,
        verbose=2
    )

    y_pred = np.argmax(model.predict(X_test, batch_size=batch_size), axis=1)
    acc = accuracy_score(y_test, y_pred)
    baseline_results[name] = acc

# Show summary
print("\n Baseline Supervised DNN Accuracy Comparison:")
for name, acc in baseline_results.items():
    print(f"{name}: Accuracy = {acc * 100:.2f}%")



Training Baseline DNN with Adam
Epoch 1/10
1110/1110 - 56s - 50ms/step - accuracy: 0.6264 - loss: 1.0072 - val_accuracy: 0.7081 - val_loss: 0.7285
Epoch 2/10
1110/1110 - 53s - 48ms/step - accuracy: 0.7091 - loss: 0.7408 - val_accuracy: 0.7645 - val_loss: 0.5977
Epoch 3/10
1110/1110 - 53s - 47ms/step - accuracy: 0.7410 - loss: 0.6562 - val_accuracy: 0.7791 - val_loss: 0.5459
Epoch 4/10
1110/1110 - 53s - 47ms/step - accuracy: 0.7582 - loss: 0.6098 - val_accuracy: 0.7818 - val_loss: 0.5581
Epoch 5/10
1110/1110 - 53s - 47ms/step - accuracy: 0.7714 - loss: 0.5739 - val_accuracy: 0.7951 - val_loss: 0.4991
Epoch 6/10
1110/1110 - 53s - 47ms/step - accuracy: 0.7779 - loss: 0.5541 - val_accuracy: 0.8018 - val_loss: 0.4770
Epoch 7/10
1110/1110 - 52s - 47ms/step - accuracy: 0.7810 - loss: 0.5424 - val_accuracy: 0.7863 - val_loss: 0.4973
Epoch 8/10
1110/1110 - 53s - 47ms/step - accuracy: 0.7842 - loss: 0.5300 - val_accuracy: 0.8075 - val_loss: 0.4586
Epoch 9/10
1110/1110 - 53s - 47ms/step - accura

In [None]:
# ========================== Evaluation Block  of baseline DNN training with 20% label ==========================

from sklearn.metrics import classification_report, accuracy_score
import numpy as np

# Make predictions
batch_size = 512
y_pred_prob =  model.predict(X_test, batch_size=batch_size)
y_pred = np.argmax(y_pred_prob, axis=1)

# Calculate and print accuracy
acc = accuracy_score(y_val, y_pred)
print(f"\nSupervised DNN Accuracy: {acc * 100:.2f}%")

# Handle label decoding properly
present_labels = np.unique(y_val)
present_class_names = [str(cls_name) for cls_name in le.inverse_transform(present_labels)]

# Print classification report
print("\nClassification Report:")
print(classification_report(y_val, y_pred, labels=present_labels, target_names=present_class_names))

[1m1387/1387[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 9ms/step

Supervised DNN Accuracy: 76.15%

Classification Report:
              precision    recall  f1-score   support

           0       0.87      0.99      0.93     22187
           1       0.53      0.56      0.55     22187
           2       0.79      0.97      0.87     22187
           3       0.81      0.97      0.88     22187
           4       0.99      0.99      0.99     22187
           5       0.88      0.88      0.88     22187
           6       1.00      1.00      1.00     22188
           7       0.99      0.99      0.99     22188
           8       1.00      1.00      1.00     22187
           9       1.00      1.00      1.00     22187
          10       0.39      0.41      0.40     22188
          11       0.54      0.35      0.42     22187
          12       0.68      0.46      0.55     22187
          13       0.99      0.99      0.99     22187
          14       0.64      0.50      0.56     22188


#Mean Teacher Training

In [None]:
import tensorflow as tf
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from tensorflow.keras.models import clone_model
from tqdm import tqdm


# Assume X_train, X_val, y_train, y_val already exist
# Split 20% labeled, 80% unlabeled from training set
X_lab, X_unlab, y_lab, _ = train_test_split(
    X_train, y_train, test_size=0.8, stratify=y_train, random_state=42
)

X_lab = np.array(X_lab)
y_lab = np.array(y_lab)
X_unlab = np.array(X_unlab)
X_test = X_val
y_test = y_val
num_classes = len(np.unique(y_train))


def build_dnn(input_dim, num_classes):
    inputs = tf.keras.Input(shape=(input_dim,))
    x = tf.keras.layers.Dense(512, activation='relu')(inputs)
    x = tf.keras.layers.BatchNormalization()(x)
    x = tf.keras.layers.Dropout(0.4)(x)
    x = tf.keras.layers.Dense(256, activation='relu')(x)
    x = tf.keras.layers.BatchNormalization()(x)
    x = tf.keras.layers.Dropout(0.3)(x)
    x = tf.keras.layers.Dense(128, activation='relu')(x)
    x = tf.keras.layers.BatchNormalization()(x)
    outputs = tf.keras.layers.Dense(num_classes, activation='softmax')(x)
    return tf.keras.Model(inputs, outputs)


optimizers = {
    "Adam": tf.keras.optimizers.Adam(),
    "RMSprop": tf.keras.optimizers.RMSprop(learning_rate=0.001),
    "SGD": tf.keras.optimizers.SGD(learning_rate=0.01, momentum=0.9)
}

loss_fn = tf.keras.losses.SparseCategoricalCrossentropy()
batch_size = 512
epochs = 10
ema_decay = 0.99
confidence_threshold = 0.9

# === Run training for each optimizer ===
results = {}

for name, optimizer in optimizers.items():
    print(f"\n🧪 Training with {name} optimizer")
    student = build_dnn(X_lab.shape[1], num_classes)
    teacher = clone_model(student)
    teacher.set_weights(student.get_weights())

    for epoch in range(1, epochs + 1):
        print(f"\nEpoch {epoch}/{epochs} [{name}]")
        idxs = np.arange(len(X_lab))
        np.random.shuffle(idxs)

        for i in tqdm(range(0, len(idxs), batch_size)):
            batch_idx = idxs[i:i+batch_size]
            x_l_batch = tf.convert_to_tensor(X_lab[batch_idx])
            y_l_batch = tf.convert_to_tensor(y_lab[batch_idx])
            unlab_idx = np.random.choice(len(X_unlab), size=len(batch_idx))
            x_u_batch = tf.convert_to_tensor(X_unlab[unlab_idx])

            with tf.GradientTape() as tape:
                y_l_pred = student(x_l_batch, training=True)
                y_u_pred = student(x_u_batch, training=True)
                y_u_teacher = tf.stop_gradient(teacher(x_u_batch, training=False))

                mask = tf.reduce_max(y_u_teacher, axis=1) > confidence_threshold
                masked_student = tf.boolean_mask(y_u_pred, mask)
                masked_teacher = tf.boolean_mask(y_u_teacher, mask)

                consistency_loss = tf.reduce_mean(tf.square(masked_student - masked_teacher))
                sup_loss = loss_fn(y_l_batch, y_l_pred)
                total_loss = sup_loss + consistency_loss

            grads = tape.gradient(total_loss, student.trainable_weights)
            optimizer.apply_gradients(zip(grads, student.trainable_weights))

            # EMA update
            student_weights = student.get_weights()
            teacher_weights = teacher.get_weights()
            teacher.set_weights([
                ema_decay * t + (1 - ema_decay) * s
                for s, t in zip(student_weights, teacher_weights)
            ])

    # Final Evaluation
    student_pred = np.argmax(student.predict(X_test, batch_size=batch_size), axis=1)
    teacher_pred = np.argmax(teacher.predict(X_test, batch_size=batch_size), axis=1)
    student_acc = accuracy_score(y_test, student_pred)
    teacher_acc = accuracy_score(y_test, teacher_pred)

    results[name] = {
        "Student Accuracy": student_acc,
        "Teacher Accuracy": teacher_acc
    }

# Show summary
print("\nOptimizer Comparison Results:")
for name, accs in results.items():
    print(f"{name}: Student Acc = {accs['Student Accuracy']*100:.2f}%, Teacher Acc = {accs['Teacher Accuracy']*100:.2f}%")



🧪 Training with Adam optimizer

Epoch 1/10 [Adam]


100%|██████████| 1110/1110 [04:01<00:00,  4.60it/s]



Epoch 2/10 [Adam]


100%|██████████| 1110/1110 [03:59<00:00,  4.63it/s]



Epoch 3/10 [Adam]


100%|██████████| 1110/1110 [03:59<00:00,  4.63it/s]



Epoch 4/10 [Adam]


100%|██████████| 1110/1110 [03:59<00:00,  4.63it/s]



Epoch 5/10 [Adam]


100%|██████████| 1110/1110 [04:00<00:00,  4.61it/s]



Epoch 6/10 [Adam]


100%|██████████| 1110/1110 [04:01<00:00,  4.59it/s]



Epoch 7/10 [Adam]


100%|██████████| 1110/1110 [03:59<00:00,  4.64it/s]



Epoch 8/10 [Adam]


100%|██████████| 1110/1110 [03:58<00:00,  4.66it/s]



Epoch 9/10 [Adam]


100%|██████████| 1110/1110 [03:57<00:00,  4.67it/s]



Epoch 10/10 [Adam]


100%|██████████| 1110/1110 [03:57<00:00,  4.66it/s]


[1m1387/1387[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 10ms/step
[1m1387/1387[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 10ms/step

🧪 Training with RMSprop optimizer

Epoch 1/10 [RMSprop]


100%|██████████| 1110/1110 [03:31<00:00,  5.24it/s]



Epoch 2/10 [RMSprop]


100%|██████████| 1110/1110 [03:32<00:00,  5.24it/s]



Epoch 3/10 [RMSprop]


100%|██████████| 1110/1110 [03:32<00:00,  5.24it/s]



Epoch 4/10 [RMSprop]


100%|██████████| 1110/1110 [03:32<00:00,  5.22it/s]



Epoch 5/10 [RMSprop]


100%|██████████| 1110/1110 [03:32<00:00,  5.23it/s]



Epoch 6/10 [RMSprop]


100%|██████████| 1110/1110 [03:31<00:00,  5.24it/s]



Epoch 7/10 [RMSprop]


100%|██████████| 1110/1110 [03:32<00:00,  5.23it/s]



Epoch 8/10 [RMSprop]


100%|██████████| 1110/1110 [03:31<00:00,  5.24it/s]



Epoch 9/10 [RMSprop]


100%|██████████| 1110/1110 [03:30<00:00,  5.27it/s]



Epoch 10/10 [RMSprop]


100%|██████████| 1110/1110 [03:29<00:00,  5.29it/s]


[1m1387/1387[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 9ms/step
[1m1387/1387[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 9ms/step

🧪 Training with SGD optimizer

Epoch 1/10 [SGD]


100%|██████████| 1110/1110 [03:16<00:00,  5.64it/s]



Epoch 2/10 [SGD]


100%|██████████| 1110/1110 [03:17<00:00,  5.62it/s]



Epoch 3/10 [SGD]


100%|██████████| 1110/1110 [03:17<00:00,  5.62it/s]



Epoch 4/10 [SGD]


100%|██████████| 1110/1110 [03:17<00:00,  5.63it/s]



Epoch 5/10 [SGD]


100%|██████████| 1110/1110 [03:16<00:00,  5.65it/s]



Epoch 6/10 [SGD]


100%|██████████| 1110/1110 [03:17<00:00,  5.61it/s]



Epoch 7/10 [SGD]


100%|██████████| 1110/1110 [03:17<00:00,  5.61it/s]



Epoch 8/10 [SGD]


100%|██████████| 1110/1110 [03:17<00:00,  5.63it/s]



Epoch 9/10 [SGD]


100%|██████████| 1110/1110 [03:16<00:00,  5.64it/s]



Epoch 10/10 [SGD]


100%|██████████| 1110/1110 [03:16<00:00,  5.64it/s]


[1m1387/1387[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 9ms/step
[1m1387/1387[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 9ms/step

📊 Optimizer Comparison Results:
Adam: Student Acc = 80.43%, Teacher Acc = 81.02%
RMSprop: Student Acc = 81.11%, Teacher Acc = 81.16%
SGD: Student Acc = 78.27%, Teacher Acc = 78.24%


In [None]:
from sklearn.metrics import classification_report


# After evaluation in each optimizer loop
print(f"\nClassification Report for {name} Optimizer")

# Student
print(f"\n Student Model Report:")
print(classification_report(
    y_test,
    student_pred,
    labels=np.unique(y_test)
))

# Teacher
print(f"\nTeacher Model Report:")
print(classification_report(
    y_test,
    teacher_pred,
    labels=np.unique(y_test)
))



Classification Report for SGD Optimizer

 Student Model Report:
              precision    recall  f1-score   support

           0       0.86      0.99      0.92     22187
           1       0.50      0.58      0.54     22187
           2       0.77      0.96      0.86     22187
           3       0.81      0.96      0.88     22187
           4       1.00      0.99      0.99     22187
           5       0.92      0.83      0.87     22187
           6       1.00      1.00      1.00     22188
           7       0.98      0.99      0.99     22188
           8       1.00      0.99      1.00     22187
           9       1.00      1.00      1.00     22187
          10       0.52      0.11      0.19     22188
          11       0.41      0.75      0.53     22187
          12       0.58      0.71      0.64     22187
          13       0.99      0.99      0.99     22187
          14       0.65      0.48      0.55     22188
          15       0.79      0.86      0.82     22187
          16    

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import confusion_matrix

def plot_confusion_matrix(y_true, y_pred, class_names, title):
    cm = confusion_matrix(y_true, y_pred)
    plt.figure(figsize=(12, 10))
    sns.heatmap(cm, annot=False, fmt='d', cmap='Blues', xticklabels=class_names, yticklabels=class_names)
    plt.title(title, fontsize=16)
    plt.xlabel('Predicted Labels')
    plt.ylabel('True Labels')
    plt.xticks(rotation=90)
    plt.yticks(rotation=0)
    plt.tight_layout()
    plt.show()


In [None]:
# If using LabelEncoder and want readable labels
class_names = le.classes_.tolist() if le else [f"Class_{i}" for i in range(num_classes)]

# Student Confusion Matrix
plot_confusion_matrix(y_test, student_pred, class_names, title=f'{name} - Student Confusion Matrix')

# Teacher Confusion Matrix
plot_confusion_matrix(y_test, teacher_pred, class_names, title=f'{name} - Teacher Confusion Matrix')


NameError: name 'student_pred' is not defined