In [3]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import (
    classification_report,
    confusion_matrix,
)


def save_classification_report(predictions, labels, output_dir):
    report = classification_report(
        labels, predictions,
        target_names=["conclusion", "non-argumentative", "premise"]
    )

    with open(f"{output_dir}/classification_report.txt", "w") as f:
        f.write(report)


def plot_confusion_matrix(y_true, y_pred, output_path):
    cm = confusion_matrix(y_true, y_pred, labels=["non-argumentative", "premise", "conclusion"])

    labels = np.array([
        ['(TP)', '(FN)', '(FN)'],
        ['(FP)', '(TP)', '(FN)'],
        ['(FP)', '(FP)', '(TP)']
    ])

    # Fix: Create annotation array properly
    annot = np.empty_like(labels, dtype=object)
    for i in range(labels.shape[0]):
        for j in range(labels.shape[1]):
            annot[i, j] = f"{labels[i, j]}\n{cm[i, j]}"

    plt.figure(figsize=(8, 6))
    sns.heatmap(cm, annot=annot, fmt='', cmap="Blues",
                xticklabels=["Predicted Non-Arg", "Predicted Premise", "Predicted Conclusion"],
                yticklabels=["Actual Non-Arg", "Actual Premise", "Actual Conclusion"])
    plt.xlabel('Prediction')
    plt.ylabel('Ground Truth')
    plt.title('Confusion Matrix for Three-Way Classification')
    plt.tight_layout()
    plt.savefig(output_path)
    plt.close()


def main():
    folder_path = 'predictions'
    all_actual_labels = []
    all_predicted_labels = []

    # Read all CSV files from the predictions folder
    for file_name in os.listdir(folder_path):
        if file_name.endswith('.csv'):
            file_path = os.path.join(folder_path, file_name)
            df = pd.read_csv(file_path)
            all_actual_labels.extend(df['actual_label'].tolist())
            all_predicted_labels.extend(df['predicted_label'].tolist())

    # Create output directory
    output_dir = 'output'
    os.makedirs(output_dir, exist_ok=True)

    # Save classification report and confusion matrix
    save_classification_report(all_predicted_labels, all_actual_labels, output_dir)
    plot_confusion_matrix(all_actual_labels, all_predicted_labels, f"{output_dir}/confusion_matrix.png")

    print(f"Classification report saved to {output_dir}/classification_report.txt")
    print(f"Confusion matrix saved to {output_dir}/confusion_matrix.png")


if __name__ == '__main__':
    main()


Classification report saved to output/classification_report.txt
Confusion matrix saved to output/confusion_matrix.png
