# Analysis from KODIS Fine-Tuning on WIKI pre-trained corpus

# Import Corpora

In [5]:
from convokit import Corpus
corpus_kodis_ground = Corpus(filename="/Users/mishkin/Desktop/Research/Convo_Kit/ConvoKit_Disputes/data/saved_corpora/corpus_kodis_ground_resuls")
corpus_no_last = Corpus(filename="/Users/mishkin/Desktop/Research/Convo_Kit/ConvoKit_Disputes/data/saved_corpora/corpus_kodis_no_last_resuls")
corpus_no_submit_last = Corpus(filename="/Users/mishkin/Desktop/Research/Convo_Kit/ConvoKit_Disputes/data/saved_corpora/corpus_kodis_no_last_submit_results")


# Performance Comparison


In [None]:
import matplotlib.pyplot as plt
import numpy as np
from sklearn.metrics import ConfusionMatrixDisplay
from sklearn.calibration import CalibrationDisplay

# Replace these with your actual variables
corpora_info = [
    ("KODIS_NO_SUBMIT_LAST", corpus_kodis_no_submit_last, forecaster_kodis_ground, nolast_submit_metrics, no_submit_last_convo_df),
    ("KODIS_NO_LAST",         corpus_kodis_no_last, forecaster_kodis_no_last, nolast_metrics,  nolast_conv_df),
    ("KODIS_GROUND",          corpus_kodis_ground,forecaster_kodis_no_last_submit, ground_metrics, ground_conv_df),
]

n_rows = 5
n_cols = len(corpora_info)
fig, axes = plt.subplots(n_rows, n_cols, figsize=(5 * n_cols, 3 * n_rows))
fig.suptitle("Comparison Across KODIS Variants", fontsize=18)

for col, (label, corpus, forecaster, metrics, conv_df) in enumerate(corpora_info):
    # Row 0: Average conversation length
    lengths = [
        len(convo.get_utterance_ids())
        for convo in corpus.iter_conversations()
        if convo.meta.get("split") == "test"
    ]
    avg_len = np.mean(lengths)
    ax0 = axes[0, col]
    ax0.bar([0], [avg_len])
    ax0.set_xticks([])
    ax0.set_title(f"{label}\nAvg Length: {avg_len:.1f}")

    # Row 1: Calibration curve
    ax1 = axes[1, col]
    y_true = conv_df["forecast"]
    y_prob = conv_df["score"]
    CalibrationDisplay.from_predictions(
        y_true=y_true,
        y_prob=y_prob,
        n_bins=10,
        name=label,
        ax=ax1
    )
    ax1.set_title(f"Calibration Curve for {label}")
    ax1.grid(True)

    # Row 2: Probability histogram
    ax2 = axes[2, col]
    bins_prob = np.linspace(0, 1, 11)
    ax2.hist(y_prob, bins=bins_prob, edgecolor='k')
    ax2.set_title("Probability Histogram")
    ax2.set_xlabel("Predicted Probability")
    ax2.set_ylabel("Count")
    ax2.grid(True)

    # Row 3: Confusion matrix
    ax3 = axes[3, col]
    y_pred = conv_df["forecast"]
    ConfusionMatrixDisplay.from_predictions(
        y_true=y_true,
        y_pred=y_pred,
        display_labels=["No Derail", "Derail"],
        cmap="Blues",
        ax=ax3
    )
    ax3.set_title("Confusion Matrix")

    # Row 4: Metrics summary text
    ax4 = axes[4, col]
    ax4.axis('off')
    metrics_text = "\n".join(f"{k}: {v:.2f}" for k, v in metrics.items())
    ax4.text(0.5, 0.5, metrics_text, ha='center', va='center', fontsize=10)
    ax4.set_title("Test Metrics Summary")

plt.tight_layout(rect=[0, 0, 1, 0.96])
plt.show()