In [1]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import classification_report, confusion_matrix
import glob

def load_all_predictions(folder_path):
    """Load all CSV files from the predictions folder and aggregate labels."""
    csv_files = glob.glob(os.path.join(folder_path, '*.csv'))
    
    all_actual_labels = []
    all_predicted_labels = []
    
    for file_path in csv_files:
        df = pd.read_csv(file_path)
        # Extract third column (actual_label) and fourth column (predicted_label)
        actual_labels = df.iloc[:, 2].tolist()
        predicted_labels = df.iloc[:, 3].tolist()
        all_actual_labels.extend(actual_labels)
        all_predicted_labels.extend(predicted_labels)
    
    return all_actual_labels, all_predicted_labels

def save_classification_report(predictions, labels, output_dir):
    """Generate and save classification report."""
    report = classification_report(
        labels, predictions,
        target_names=["Argumentative", "Non-Argumentative"]
    )
    
    os.makedirs(output_dir, exist_ok=True)
    with open(f"{output_dir}/classification_report.txt", "w") as f:
        f.write(report)
    
    return report

def plot_confusion_matrix(y_true, y_pred, output_path):
    """Generate and save confusion matrix plot."""
    cm = confusion_matrix(y_true, y_pred)
    
    labels = np.array([
        ['(TP)', '(FN)'],
        ['(FP)', '(TN)']
    ])
    
    # Create annotation array properly
    annot = np.empty_like(labels, dtype=object)
    for i in range(labels.shape[0]):
        for j in range(labels.shape[1]):
            annot[i, j] = f"{labels[i, j]}\n{cm[i, j]}"
    
    plt.figure(figsize=(8, 6))
    sns.heatmap(cm, annot=annot, fmt='', cmap="Blues",
                xticklabels=["Predicted Non-Arg", "Predicted Arg"],
                yticklabels=["Actual Non-Arg", "Actual Arg"])
    plt.xlabel('Prediction')
    plt.ylabel('Ground Truth')
    plt.title('Confusion Matrix for Argumentative Classification')
    plt.tight_layout()
    plt.savefig(output_path)
    plt.close()

# Main execution
def main():
    folder_path = "predictions"
    output_dir = "results"
    
    # Load all predictions from CSV files
    actual_labels, predicted_labels = load_all_predictions(folder_path)
    
    # Map labels to match target_names order in classification_report
    label_map = {'non-argumentative': 'Non-Argumentative', 'argumentative': 'Argumentative'}
    actual_mapped = [label_map[label] for label in actual_labels]
    predicted_mapped = [label_map[label] for label in predicted_labels]
    
    # Generate classification report
    report = save_classification_report(predicted_mapped, actual_mapped, output_dir)
    print("Classification Report:")
    print(report)
    
    # Generate confusion matrix
    plot_confusion_matrix(actual_mapped, predicted_mapped, f"{output_dir}/confusion_matrix.png")
    print(f"Confusion matrix saved to {output_dir}/confusion_matrix.png")

if __name__ == "__main__":
    main()


Classification Report:
                   precision    recall  f1-score   support

    Argumentative       0.56      0.74      0.64       655
Non-Argumentative       0.74      0.55      0.63       847

         accuracy                           0.64      1502
        macro avg       0.65      0.65      0.64      1502
     weighted avg       0.66      0.64      0.64      1502

Confusion matrix saved to results/confusion_matrix.png
