<a href="https://colab.research.google.com/github/theresaskruzna/riiid_knowledge_tracing/blob/main/05_Results_Analysis.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import confusion_matrix, classification_report, roc_curve, auc, precision_recall_curve
import numpy as np
import pandas as pd

def evaluate_model(model, X_val, y_val, history=None):
    """
    Comprehensive evaluation of the model with various metrics and visualizations.

    Parameters:
    model: Trained Keras model
    X_val: Validation features
    y_val: Validation labels
    history: Training history object (optional)

    Returns:
    eval_results: Dictionary containing evaluation metrics and figures
    """
    eval_results = {}

    # 1. Get basic metrics
    print("Calculating basic metrics...")
    y_pred_proba = model.predict(X_val)
    y_pred = (y_pred_proba > 0.5).astype(int).flatten()

    # Loss, accuracy and AUC from model.evaluate()
    loss, accuracy, auc_score = model.evaluate(X_val, y_val, verbose=0)
    eval_results['loss'] = loss
    eval_results['accuracy'] = accuracy
    eval_results['auc'] = auc_score

    # 2. Confusion Matrix
    print("Creating confusion matrix...")
    cm = confusion_matrix(y_val, y_pred)
    plt.figure(figsize=(8, 6))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')
    plt.title('Confusion Matrix')
    plt.ylabel('True Label')
    plt.xlabel('Predicted Label')
    plt.tight_layout()
    eval_results['confusion_matrix_fig'] = plt
    plt.savefig('confusion_matrix.png')
    plt.close()

    # 3. Classification Report
    print("Generating classification report...")
    report = classification_report(y_val, y_pred, output_dict=True)
    eval_results['classification_report'] = report

    # 4. ROC Curve
    print("Plotting ROC curve...")
    fpr, tpr, _ = roc_curve(y_val, y_pred_proba)
    roc_auc = auc(fpr, tpr)

    plt.figure(figsize=(8, 6))
    plt.plot(fpr, tpr, color='darkorange', lw=2, label=f'ROC curve (area = {roc_auc:.3f})')
    plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
    plt.xlim([0.0, 1.0])
    plt.ylim([0.0, 1.05])
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.title('Receiver Operating Characteristic')
    plt.legend(loc="lower right")
    plt.tight_layout()
    eval_results['roc_curve_fig'] = plt
    plt.savefig('roc_curve.png')
    plt.close()

    # 5. Precision-Recall Curve
    print("Plotting precision-recall curve...")
    precision, recall, _ = precision_recall_curve(y_val, y_pred_proba)
    pr_auc = auc(recall, precision)

    plt.figure(figsize=(8, 6))
    plt.plot(recall, precision, color='blue', lw=2, label=f'PR curve (area = {pr_auc:.3f})')
    plt.xlim([0.0, 1.0])
    plt.ylim([0.0, 1.05])
    plt.xlabel('Recall')
    plt.ylabel('Precision')
    plt.title('Precision-Recall Curve')
    plt.legend(loc="lower left")
    plt.tight_layout()
    eval_results['pr_curve_fig'] = plt
    plt.savefig('precision_recall_curve.png')
    plt.close()

    # 6. Training History Plots (if history is provided)
    if history:
        print("Plotting training history...")
        plt.figure(figsize=(12, 5))

        # Loss plot
        plt.subplot(1, 2, 1)
        plt.plot(history.history['loss'], label='Training Loss')
        plt.plot(history.history['val_loss'], label='Validation Loss')
        plt.title('Model Loss')
        plt.xlabel('Epoch')
        plt.ylabel('Loss')
        plt.legend()

        # Accuracy plot
        plt.subplot(1, 2, 2)
        plt.plot(history.history['accuracy'], label='Training Accuracy')
        plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
        plt.title('Model Accuracy')
        plt.xlabel('Epoch')
        plt.ylabel('Accuracy')
        plt.legend()

        plt.tight_layout()
        eval_results['training_history_fig'] = plt
        plt.savefig('training_history.png')
        plt.close()

        # AUC plot
        plt.figure(figsize=(8, 6))
        plt.plot(history.history['auc'], label='Training AUC')
        plt.plot(history.history['val_auc'], label='Validation AUC')
        plt.title('Model AUC')
        plt.xlabel('Epoch')
        plt.ylabel('AUC')
        plt.legend()
        plt.tight_layout()
        eval_results['auc_history_fig'] = plt
        plt.savefig('auc_history.png')
        plt.close()

    # 7. Prediction distribution
    print("Plotting prediction distribution...")
    plt.figure(figsize=(10, 6))
    sns.histplot(y_pred_proba, bins=50, kde=True)
    plt.axvline(x=0.5, color='r', linestyle='--')
    plt.title('Distribution of Prediction Probabilities')
    plt.xlabel('Predicted Probability')
    plt.ylabel('Count')
    plt.tight_layout()
    eval_results['pred_distribution_fig'] = plt
    plt.savefig('prediction_distribution.png')
    plt.close()

    # 8. Model performance by user (if user IDs are available)
    # This would require the user_ids from the validation set

    # 9. Generate a summary of the evaluation
    summary = {
        'accuracy': accuracy,
        'auc': auc_score,
        'precision': report['1']['precision'],
        'recall': report['1']['recall'],
        'f1_score': report['1']['f1-score'],
    }
    eval_results['summary'] = summary

    # Create a markdown report for easy presentation
    report_md = f"""
    # Model Evaluation Report

    ## Performance Metrics

    | Metric | Value |
    |--------|-------|
    | Accuracy | {accuracy:.4f} |
    | AUC | {auc_score:.4f} |
    | Precision | {report['1']['precision']:.4f} |
    | Recall | {report['1']['recall']:.4f} |
    | F1 Score | {report['1']['f1-score']:.4f} |

    ## Key Findings

    - The model achieved {accuracy*100:.2f}% accuracy on the validation set
    - Area Under the ROC Curve (AUC) is {auc_score:.4f}
    - The model's precision is {report['1']['precision']:.4f}, meaning that {report['1']['precision']*100:.2f}% of predicted positive answers were actually correct
    - The model's recall is {report['1']['recall']:.4f}, meaning it correctly identified {report['1']['recall']*100:.2f}% of all correct answers

    ## Visualizations

    See the generated plots:
    - Confusion Matrix
    - ROC Curve
    - Precision-Recall Curve
    - Training History
    - Prediction Distribution
    """

    with open('model_evaluation_report.md', 'w') as f:
        f.write(report_md)

    print("Evaluation complete! Report saved as 'model_evaluation_report.md'")
    print("All figures have been saved as PNG files")

    return eval_results


# Example usage:
# Add this to your main function after training
def add_evaluation_to_main():
    # After training in your main function:
    print("Evaluating model...")
    eval_results = evaluate_model(trained_model, X_val, y_val, history)

    # You can now use the eval_results dictionary for further analysis
    # or to generate presentation materials

    # Example: Print the summary metrics
    print("\nEvaluation Summary:")
    for metric, value in eval_results['summary'].items():
        print(f"{metric}: {value:.4f}")

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import confusion_matrix, classification_report, roc_curve, auc, precision_recall_curve
import numpy as np
import pandas as pd

def calculate_basic_metrics(model, X_val, y_val):
    """
    Calculate basic evaluation metrics for the model.

    Parameters:
    model: Trained Keras model
    X_val: Validation features
    y_val: Validation labels

    Returns:
    dict: Dictionary containing basic evaluation metrics
    """
    print("Calculating basic metrics...")
    y_pred_proba = model.predict(X_val)
    y_pred = (y_pred_proba > 0.5).astype(int).flatten()

    # Get metrics from model.evaluate()
    loss, accuracy, auc_score = model.evaluate(X_val, y_val, verbose=0)

    # Get classification report
    report = classification_report(y_val, y_pred, output_dict=True)

    metrics = {
        'loss': loss,
        'accuracy': accuracy,
        'auc': auc_score,
        'precision': report['1']['precision'],
        'recall': report['1']['recall'],
        'f1_score': report['1']['f1-score'],
        'y_pred': y_pred,
        'y_pred_proba': y_pred_proba
    }

    print(f"Accuracy: {accuracy:.4f}")
    print(f"AUC: {auc_score:.4f}")
    print(f"Precision: {metrics['precision']:.4f}")
    print(f"Recall: {metrics['recall']:.4f}")
    print(f"F1 Score: {metrics['f1_score']:.4f}")

    return metrics


def plot_confusion_matrix(y_val, y_pred, save_path=None):
    """
    Create and plot confusion matrix.

    Parameters:
    y_val: Validation labels
    y_pred: Predicted labels
    save_path: Path to save the figure (optional)

    Returns:
    plt.Figure: Matplotlib figure object
    """
    print("Creating confusion matrix...")
    cm = confusion_matrix(y_val, y_pred)

    plt.figure(figsize=(8, 6))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')
    plt.title('Confusion Matrix')
    plt.ylabel('True Label')
    plt.xlabel('Predicted Label')
    plt.tight_layout()

    if save_path:
        plt.savefig(save_path)
        print(f"Confusion matrix saved to {save_path}")

    return plt


def plot_roc_curve(y_val, y_pred_proba, save_path=None):
    """
    Create and plot ROC curve.

    Parameters:
    y_val: Validation labels
    y_pred_proba: Predicted probabilities
    save_path: Path to save the figure (optional)

    Returns:
    plt.Figure: Matplotlib figure object
    float: ROC AUC score
    """
    print("Plotting ROC curve...")
    fpr, tpr, _ = roc_curve(y_val, y_pred_proba)
    roc_auc = auc(fpr, tpr)

    plt.figure(figsize=(8, 6))
    plt.plot(fpr, tpr, color='darkorange', lw=2, label=f'ROC curve (area = {roc_auc:.3f})')
    plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
    plt.xlim([0.0, 1.0])
    plt.ylim([0.0, 1.05])
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.title('Receiver Operating Characteristic')
    plt.legend(loc="lower right")
    plt.tight_layout()

    if save_path:
        plt.savefig(save_path)
        print(f"ROC curve saved to {save_path}")

    return plt, roc_auc


def plot_precision_recall_curve(y_val, y_pred_proba, save_path=None):
    """
    Create and plot Precision-Recall curve.

    Parameters:
    y_val: Validation labels
    y_pred_proba: Predicted probabilities
    save_path: Path to save the figure (optional)

    Returns:
    plt.Figure: Matplotlib figure object
    float: PR AUC score
    """
    print("Plotting precision-recall curve...")
    precision, recall, _ = precision_recall_curve(y_val, y_pred_proba)
    pr_auc = auc(recall, precision)

    plt.figure(figsize=(8, 6))
    plt.plot(recall, precision, color='blue', lw=2, label=f'PR curve (area = {pr_auc:.3f})')
    plt.xlim([0.0, 1.0])
    plt.ylim([0.0, 1.05])
    plt.xlabel('Recall')
    plt.ylabel('Precision')
    plt.title('Precision-Recall Curve')
    plt.legend(loc="lower left")
    plt.tight_layout()

    if save_path:
        plt.savefig(save_path)
        print(f"Precision-Recall curve saved to {save_path}")

    return plt, pr_auc


def plot_training_history(history, save_path=None):
    """
    Plot training history metrics.

    Parameters:
    history: Keras history object from model.fit()
    save_path: Path to save the figure (optional)

    Returns:
    plt.Figure: Matplotlib figure object
    """
    if not history:
        print("No training history provided.")
        return None

    print("Plotting training history...")
    plt.figure(figsize=(12, 5))

    # Loss plot
    plt.subplot(1, 2, 1)
    plt.plot(history.history['loss'], label='Training Loss')
    plt.plot(history.history['val_loss'], label='Validation Loss')
    plt.title('Model Loss')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.legend()

    # Accuracy plot
    plt.subplot(1, 2, 2)
    plt.plot(history.history['accuracy'], label='Training Accuracy')
    plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
    plt.title('Model Accuracy')
    plt.xlabel('Epoch')
    plt.ylabel('Accuracy')
    plt.legend()

    plt.tight_layout()

    if save_path:
        plt.savefig(save_path)
        print(f"Training history saved to {save_path}")

    return plt


def plot_auc_history(history, save_path=None):
    """
    Plot AUC history.

    Parameters:
    history: Keras history object from model.fit()
    save_path: Path to save the figure (optional)

    Returns:
    plt.Figure: Matplotlib figure object
    """
    if not history or 'auc' not in history.history:
        print("No AUC history available in the provided history object.")
        return None

    print("Plotting AUC history...")
    plt.figure(figsize=(8, 6))
    plt.plot(history.history['auc'], label='Training AUC')
    plt.plot(history.history['val_auc'], label='Validation AUC')
    plt.title('Model AUC')
    plt.xlabel('Epoch')
    plt.ylabel('AUC')
    plt.legend()
    plt.tight_layout()

    if save_path:
        plt.savefig(save_path)
        print(f"AUC history saved to {save_path}")

    return plt


def plot_prediction_distribution(y_pred_proba, save_path=None):
    """
    Plot distribution of prediction probabilities.

    Parameters:
    y_pred_proba: Predicted probabilities
    save_path: Path to save the figure (optional)

    Returns:
    plt.Figure: Matplotlib figure object
    """
    print("Plotting prediction distribution...")
    plt.figure(figsize=(10, 6))
    sns.histplot(y_pred_proba, bins=50, kde=True)
    plt.axvline(x=0.5, color='r', linestyle='--')
    plt.title('Distribution of Prediction Probabilities')
    plt.xlabel('Predicted Probability')
    plt.ylabel('Count')
    plt.tight_layout()

    if save_path:
        plt.savefig(save_path)
        print(f"Prediction distribution saved to {save_path}")

    return plt


def generate_markdown_report(metrics, save_path='model_evaluation_report.md'):
    """
    Generate a markdown report summarizing model performance.

    Parameters:
    metrics: Dictionary containing evaluation metrics
    save_path: Path to save the markdown report

    Returns:
    str: Markdown report content
    """
    print("Generating markdown report...")

    report_md = f"""
    # Model Evaluation Report

    ## Performance Metrics

    | Metric | Value |
    |--------|-------|
    | Accuracy | {metrics['accuracy']:.4f} |
    | AUC | {metrics['auc']:.4f} |
    | Precision | {metrics['precision']:.4f} |
    | Recall | {metrics['recall']:.4f} |
    | F1 Score | {metrics['f1_score']:.4f} |

    ## Key Findings

    - The model achieved {metrics['accuracy']*100:.2f}% accuracy on the validation set
    - Area Under the ROC Curve (AUC) is {metrics['auc']:.4f}
    - The model's precision is {metrics['precision']:.4f}, meaning that {metrics['precision']*100:.2f}% of predicted positive answers were actually correct
    - The model's recall is {metrics['recall']:.4f}, meaning it correctly identified {metrics['recall']*100:.2f}% of all correct answers

    ## Visualizations

    See the generated plots:
    - Confusion Matrix
    - ROC Curve
    - Precision-Recall Curve
    - Training History
    - Prediction Distribution
    """

    with open(save_path, 'w') as f:
        f.write(report_md)

    print(f"Markdown report saved to {save_path}")

    return report_md


# Example usage in your main file:
def example_usage():
    """
    Example of how to use the evaluation functions in your main code.
    """
    # After training your model:

    # Calculate basic metrics
    metrics = calculate_basic_metrics(trained_model, X_val, y_val)

    # Generate plots
    plot_confusion_matrix(y_val, metrics['y_pred'], save_path='confusion_matrix.png')
    plot_roc_curve(y_val, metrics['y_pred_proba'], save_path='roc_curve.png')
    plot_precision_recall_curve(y_val, metrics['y_pred_proba'], save_path='precision_recall_curve.png')

    # If you have training history
    plot_training_history(history, save_path='training_history.png')
    plot_auc_history(history, save_path='auc_history.png')

    # Plot prediction distribution
    plot_prediction_distribution(metrics['y_pred_proba'], save_path='prediction_distribution.png')

    # Generate markdown report
    generate_markdown_report(metrics)