# arg vs. non-arg

In [4]:
import os
from pathlib import Path
import pandas as pd
import numpy as np
from sklearn.metrics import classification_report, confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt
import json

# Paths
PREDICTIONS_DIR = Path("./predictions/arg_vs_non-arg_updated_events_echr/RoBERTa")
OUTPUT_DIR = Path("./predictions/arg_vs_non-arg_updated_events_echr/RoBERTa_results")
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)

# Label mapping for display
LABELS = ["Non-Argumentative", "Argumentative"]

def load_all_predictions(pred_dir):
    """Load all CSV prediction files and concatenate into a single DataFrame."""
    all_files = list(pred_dir.glob("*.csv"))
    dfs = []
    for file in all_files:
        df = pd.read_csv(file)
        dfs.append(df)
    combined_df = pd.concat(dfs, ignore_index=True)
    return combined_df

def save_classification_report(y_true, y_pred, output_dir):
    """Generate and save classification report as text file."""
    report = classification_report(y_true, y_pred, target_names=LABELS, zero_division=0)
    output_dir.mkdir(parents=True, exist_ok=True)
    with open(output_dir / "classification_report.txt", "w") as f:
        f.write(report)

def save_confusion_matrix(y_true, y_pred, output_dir):
    """Generate and save confusion matrix plot."""
    cm = confusion_matrix(y_true, y_pred)
    plt.figure(figsize=(6, 5))
    sns.heatmap(
        cm,
        annot=True,
        fmt="d",
        cmap="Blues",
        xticklabels=LABELS,
        yticklabels=LABELS
    )
    plt.xlabel("Predicted")
    plt.ylabel("Actual")
    plt.title("Confusion Matrix - Argumentative vs Non-Argumentative")
    plt.tight_layout()
    output_dir.mkdir(parents=True, exist_ok=True)
    plt.savefig(output_dir / "confusion_matrix.png")
    plt.close()

def main():
    # Load all prediction files into one DataFrame
    df = load_all_predictions(PREDICTIONS_DIR)
    
    # Verify expected columns
    for col in ["label", "predictions"]:
        if col not in df.columns:
            raise ValueError(f"Expected column '{col}' not found in predictions files.")
    
    # Convert labels to numerical for evaluation:
    label_map = {"non-argumentative": 0, "argumentative": 1}
    
    # Lowercase the label column (in case labels are string)
    y_true = df["label"].str.lower().map(label_map)
    y_pred = df["predictions"].astype(int)
    
    # Check for any missing label mappings
    if y_true.isnull().any():
        missing = df.loc[y_true.isnull(), "label"].unique()
        raise ValueError(f"Unexpected labels found in true labels: {missing}")
    
    # Save results
    save_dir = OUTPUT_DIR
    save_classification_report(y_true, y_pred, save_dir)
    save_confusion_matrix(y_true, y_pred, save_dir)
    
    print(f"Classification report and confusion matrix saved to {save_dir}")

if __name__ == "__main__":
    main()


Classification report and confusion matrix saved to predictions/arg_vs_non-arg_updated_events_echr/RoBERTa_results


# prem vs. conc

In [8]:
import os
from pathlib import Path
import pandas as pd
from sklearn.metrics import classification_report, confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt
import json

# Paths
PREDICTIONS_DIR = Path("./predictions/prem_vs_conc_updated_events_echr/RoBERTa")
OUTPUT_DIR = Path("./predictions/prem_vs_conc_updated_events_echr/RoBERTa_results")
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)

# Label names and mapping
LABELS = ["Conclusion", "Premise"]
label_map = {"conclusion": 0, "premise": 1}

def load_all_predictions(pred_dir):
    """Load all CSV prediction files and concatenate into one DataFrame."""
    csv_files = list(pred_dir.glob("*.csv"))
    dfs = []
    for f in csv_files:
        df = pd.read_csv(f)
        dfs.append(df)
    combined_df = pd.concat(dfs, ignore_index=True)
    return combined_df

def save_classification_report(y_true, y_pred, output_dir):
    """Generate and save classification report text file."""
    report = classification_report(y_true, y_pred, target_names=LABELS, zero_division=0)
    with open(output_dir / "classification_report.txt", "w") as f:
        f.write(report)

def save_confusion_matrix(y_true, y_pred, output_dir):
    """Generate and save confusion matrix plot."""
    cm = confusion_matrix(y_true, y_pred)
    plt.figure(figsize=(6, 5))
    sns.heatmap(cm, annot=True, fmt='d', cmap="Blues",
                xticklabels=LABELS,
                yticklabels=LABELS)
    plt.xlabel("Predicted")
    plt.ylabel("Actual")
    plt.title("Confusion Matrix - Premise vs Conclusion")
    plt.tight_layout()
    plt.savefig(output_dir / "confusion_matrix.png")
    plt.close()

def main():
    df = load_all_predictions(PREDICTIONS_DIR)
    
    # Map true labels (strings) to integers
    y_true = df["label"].str.lower().map(label_map)
    y_pred = df["predictions"].astype(int)
    
    # Check for unexpected labels
    if y_true.isnull().any():
        unexpected = df.loc[y_true.isnull(), "label"].unique()
        raise ValueError(f"Unexpected labels found in ground truth: {unexpected}")
    
    # Save classification report and confusion matrix
    save_classification_report(y_true, y_pred, OUTPUT_DIR)
    save_confusion_matrix(y_true, y_pred, OUTPUT_DIR)
    
    print(f"Classification report and confusion matrix saved to {OUTPUT_DIR}")

if __name__ == "__main__":
    main()


Classification report and confusion matrix saved to predictions/prem_vs_conc_updated_events_echr/RoBERTa_results


# p_c_na

In [12]:
import os
from pathlib import Path
import pandas as pd
from sklearn.metrics import classification_report, confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt

# Paths
PREDICTIONS_DIR = Path("./predictions/p_c_na_updated_events_echr/RoBERTa")
OUTPUT_DIR = Path("./predictions/p_c_na_updated_events_echr/RoBERTa_results")
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)

# Label names and mapping
LABELS = ["Non-Argumentative", "Premise", "Conclusion"]
label_map = {"non-argumentative": 0, "premise": 1, "conclusion": 2}

def load_all_predictions(pred_dir):
    """Load all CSV prediction files and combine into one DataFrame."""
    csv_files = list(pred_dir.glob("*.csv"))
    dfs = [pd.read_csv(f) for f in csv_files]
    combined_df = pd.concat(dfs, ignore_index=True)
    return combined_df

def save_classification_report(y_true, y_pred, output_dir):
    """Save classification report to text file."""
    report = classification_report(y_true, y_pred, target_names=LABELS, zero_division=0)
    with open(output_dir / "classification_report.txt", "w") as f:
        f.write(report)

def save_confusion_matrix(y_true, y_pred, output_dir):
    """Save confusion matrix heatmap plot."""
    cm = confusion_matrix(y_true, y_pred)
    plt.figure(figsize=(8, 6))
    sns.heatmap(cm, annot=True, fmt="d", cmap="Blues",
                xticklabels=LABELS,
                yticklabels=LABELS)
    plt.xlabel("Predicted")
    plt.ylabel("Actual")
    plt.title("Confusion Matrix - Premise vs Conclusion vs Non-Argumentative")
    plt.tight_layout()
    plt.savefig(output_dir / "confusion_matrix.png")
    plt.close()

def main():
    df = load_all_predictions(PREDICTIONS_DIR)
    
    # Map string labels to integers
    y_true = df["label"].str.lower().map(label_map)
    y_pred = df["predictions"].astype(int)
    
    # Validate label mappings
    if y_true.isnull().any():
        unexpected = df.loc[y_true.isnull(), "label"].unique()
        raise ValueError(f"Unexpected true labels: {unexpected}")
    
    # Save reports and plots
    save_classification_report(y_true, y_pred, OUTPUT_DIR)
    save_confusion_matrix(y_true, y_pred, OUTPUT_DIR)
    
    print(f"Saved classification report and confusion matrix to {OUTPUT_DIR}")

if __name__ == "__main__":
    main()


Saved classification report and confusion matrix to predictions/p_c_na_updated_events_echr/RoBERTa_results
