In [None]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import (
    classification_report,
    confusion_matrix,
)

def aggregate_labels_from_folder(folder_path):
    """
    Read all CSV files from the folder and aggregate actual_label and predicted_label
    """
    actual_labels = []
    predicted_labels = []
    
    for filename in os.listdir(folder_path):
        if filename.endswith('.csv'):
            file_path = os.path.join(folder_path, filename)
            try:
                df = pd.read_csv(file_path)
                actual_labels.extend(df['actual_label'].tolist())
                predicted_labels.extend(df['predicted_label'].tolist())
                print(f"Processed {filename}: {len(df)} samples")
            except Exception as e:
                print(f"Error reading {file_path}: {e}")
    
    return actual_labels, predicted_labels

def save_classification_report(predictions, labels, output_dir):
    """
    Generate and save classification report
    """
    report = classification_report(
        labels, predictions,
        target_names=["Non-Argumentative", "Argumentative"]
    )

    os.makedirs(output_dir, exist_ok=True)
    with open(f"{output_dir}/classification_report.txt", "w") as f:
        f.write(report)
    
    print("Classification report saved!")
    print(report)

def plot_confusion_matrix(y_true, y_pred, output_path):
    """
    Generate and save confusion matrix plot
    """
    cm = confusion_matrix(y_true, y_pred)

    labels = np.array([
        ['(TP)', '(FN)'],
        ['(FP)', '(TN)']
    ])

    # Create annotation array properly
    annot = np.empty_like(labels, dtype=object)
    for i in range(labels.shape[0]):
        for j in range(labels.shape[1]):
            annot[i, j] = f"{labels[i, j]}\n{cm[i, j]}"

    plt.figure(figsize=(8, 6))
    sns.heatmap(cm, annot=annot, fmt='', cmap="Blues",
                xticklabels=["Predicted Arg", "Predicted Non-Arg"],
                yticklabels=["Actual Arg", "Actual Non-Arg"])
    plt.xlabel('Prediction')
    plt.ylabel('Ground Truth')
    plt.title('Confusion Matrix for Argumentative Classification')
    plt.tight_layout()
    
    # Create output directory if it doesn't exist
    os.makedirs(os.path.dirname(output_path), exist_ok=True)
    plt.savefig(output_path)
    plt.close()
    print(f"Confusion matrix saved to {output_path}")

def main():
    """
    Main function to process all predictions and generate reports
    """
    folder_path = 'predictions'
    output_dir = 'results'
    
    # Check if predictions folder exists
    if not os.path.exists(folder_path):
        print(f"Error: Folder '{folder_path}' does not exist!")
        return
    
    # Aggregate labels from all CSV files
    print("Aggregating predictions from all CSV files...")
    actual_labels, predicted_labels = aggregate_labels_from_folder(folder_path)
    
    if len(actual_labels) == 0:
        print("No data found in CSV files!")
        return
    
    print(f"Total samples processed: {len(actual_labels)}")
    
    # Generate and save classification report
    save_classification_report(predicted_labels, actual_labels, output_dir)
    
    # Generate and save confusion matrix
    plot_confusion_matrix(actual_labels, predicted_labels, f"{output_dir}/confusion_matrix.png")
    
    print(f"\nResults saved in '{output_dir}' folder:")
    print("- classification_report.txt")
    print("- confusion_matrix.png")

if __name__ == "__main__":
    main()
