In [1]:
import numpy as np
import json

data = np.load("rawnet2_eval_embeddings.npz")
embeddings = data["embeddings"]   # shape [N, 256]
labels = data["labels"]           # shape [N]
scores = data["scores"]           # shape [N]

print(embeddings.shape, labels.shape, scores.shape)


(71237, 256) (71237,) (71237,)


In [2]:
def make_explanation(label, score):
    """
    label: 0 (bonafide), 1 (spoof)
    score: spoof probability in [0, 1]
    Returns a 1â€“2 sentence explanation string.
    """

    # Confidence buckets
    if score >= 0.8:
        conf = "high"
    elif score >= 0.6:
        conf = "medium"
    elif score >= 0.4:
        conf = "uncertain"
    elif score >= 0.2:
        conf = "medium"
    else:
        conf = "high"

    if label == 0:
        # bonafide
        if score < 0.3:
            return (
                "The model is confident this is bonafide: the temporal envelope and "
                "spectral structure appear smooth and consistent with natural human speech, "
                "with no obvious vocoder or synthesis artifacts."
            )
        else:
            return (
                "The model classifies this as bonafide but with some uncertainty: most cues "
                "look like natural speech, although a few spectral regions resemble synthesized audio."
            )
    else:
        # spoof
        if score > 0.7:
            return (
                "The model is confident this is spoofed audio: it shows irregular spectral patterns, "
                "unnatural high-frequency noise, and inconsistent formant transitions typical of TTS or VC systems."
            )
        else:
            return (
                "The model predicts spoofed audio with moderate confidence: some features deviate from "
                "typical human speech, such as subtle temporal distortions and slightly unstable harmonics."
            )


In [3]:
output_path = "rawnet2_eval_explanations.jsonl"

with open(output_path, "w", encoding="utf-8") as f:
    for emb, y, s in zip(embeddings, labels, scores):
        expl = make_explanation(int(y), float(s))
        record = {
            "embedding": emb.tolist(),   # 256-d vector
            "label": int(y),             # 0 = bonafide, 1 = spoof
            "score": float(s),           # spoof probability
            "explanation": expl          # natural language text
        }
        f.write(json.dumps(record) + "\n")

print(f"Saved explanation dataset to {output_path}")


Saved explanation dataset to rawnet2_eval_explanations.jsonl
