# Metrics Generator
Classification Report: A summary with precision, recall, and F1 scores for each emotion.
Confusion Matrix: A heatmap visual that displays true vs. predicted classifications, showing which emotions are commonly confused.
ROC Curve: Displays the model’s ability to distinguish between emotions, with AUC providing a summary score for each.
Precision-Recall Curve: Captures how precision and recall vary with different thresholds for each emotion, helpful for understanding performance on imbalanced data.
Training/Validation Curves: Line charts showing the learning progression over epochs, highlighting if the model is overfitting or converging well.

In [None]:
import time
import json
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sklearn.metrics import classification_report, confusion_matrix

def evaluate_classification_model_generator(model, history, test_generator, class_names):
    """
    Evaluates a classification model using a data generator to avoid loading all data in memory.

    Parameters:
    - model: Trained Keras/TensorFlow model.
    - history: Dictionary containing training history.
    - test_generator: A Keras/TensorFlow generator for test data.
    - class_names: List of class names corresponding to emotions.

    Returns:
    - None (displays metrics and plots).
    """
    # --- Measure Inference Latency using generator ---
    start_time = time.time()
    predictions = model.predict(test_generator, verbose=1)
    total_inference_time = time.time() - start_time
    avg_latency = total_inference_time / test_generator.samples
    print(f"Total inference time for test set: {total_inference_time:.4f} seconds")
    print(f"Average inference latency per sample: {avg_latency:.6f} seconds")

    # Compute predicted classes and true classes
    predicted_classes = np.argmax(predictions, axis=1)
    true_classes = test_generator.classes  # Automatically available from generator

    # --- Classification Report ---
    report = classification_report(true_classes, predicted_classes, target_names=class_names, digits=4, output_dict=True)
    print("\nClassification Report:")
    print("Per-Class Metrics:")
    for class_name in class_names:
        print(f"  {class_name}: Precision={report[class_name]['precision']:.4f}")
    print("\nOverall Metrics:")
    print(f"  Accuracy: {report['accuracy']:.4f}")
    print(f"  Macro Avg Precision: {report['macro avg']['precision']:.4f}")
    print(f"  Weighted Avg Precision: {report['weighted avg']['precision']:.4f}")

    # --- Confusion Matrices ---
    cm = confusion_matrix(true_classes, predicted_classes)
    plt.figure(figsize=(10, 7))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
                xticklabels=class_names, yticklabels=class_names)
    plt.title('Confusion Matrix - Raw Counts')
    plt.xlabel('Predicted Label')
    plt.ylabel('True Label')
    plt.show()

    cm_normalized = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
    plt.figure(figsize=(10, 7))
    sns.heatmap(cm_normalized, annot=True, fmt='.2f', cmap='Blues',
                xticklabels=class_names, yticklabels=class_names)
    plt.title('Confusion Matrix - Normalized')
    plt.xlabel('Predicted Label')
    plt.ylabel('True Label')
    plt.show()

    # --- Training and Validation Curves ---
    plt.figure(figsize=(14, 5))
    # Accuracy Curve
    plt.subplot(1, 2, 1)
    plt.plot(history['accuracy'], label='Training Accuracy')
    plt.plot(history['val_accuracy'], label='Validation Accuracy')
    plt.title('Training and Validation Accuracy')
    plt.xlabel('Epoch')
    plt.ylabel('Accuracy')
    plt.legend(loc='lower right')
    # Loss Curve
    plt.subplot(1, 2, 2)
    plt.plot(history['loss'], label='Training Loss')
    plt.plot(history['val_loss'], label='Validation Loss')
    plt.title('Training and Validation Loss')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.legend(loc='upper right')
    plt.tight_layout()
    plt.show()


# model = tf.keras.models.load_model("/content/vit80%/ViT_pretrained_finetuned_80%.keras", compile=False)

# --- Load Training History from JSON ---
with open("/content/vit80%/finetune_history.json", "r") as f:
    history = json.load(f)

# --- Setup the Test Data Generator ---
# Update these parameters as needed
image_height, image_width = 224, 224
batch_size = 32

# Define class names (order must match the training order)
class_names = ['anger', 'disgust', 'fear', 'happiness', 'neutral', 'sadness', 'surprise']

# --- Evaluate the Model ---
evaluate_classification_model_generator(
    model=model,
    history=history,
    test_generator=test_generator,
    class_names=class_names
)
