In [1]:
import os
from glob import glob
import json
import pandas as pd

In [2]:

def read_file(f):
    j = json.load(open(f, "r"))
    if j.get("Method", False):
        return j
    elif j["use_clean"]:
        j["Method"] = "Gold"
    elif j.get("use_n_best", False):
            j["Method"] = "NBest"
    else:
        assert (j.get("use_n_best", False) == False)
        j["Method"] = "1Best"
    return j

    
def get_df(dataset):
    all_files = glob(f"stats/{dataset}/*.json")
    rows = []
    for f in all_files:
        ret = read_file(f)
        if ret is not None:
            rows.append(ret)
    df = pd.DataFrame(rows)
    return df


In [3]:
df = get_df("snips_tts")
df = df.sort_values("Method")

In [4]:
out = df.groupby("Method").agg({"asr_f1": ["mean", "std"], "clean_f1": ["mean", "std"]}).round(3)
snips = out.copy()
snips.columns = ["asr_f1 mean", "asr_f1 std", "clean_f1 mean", "clean_f1 std"]
snips.reset_index(inplace=True)
snips

Unnamed: 0,Method,asr_f1 mean,asr_f1 std,clean_f1 mean,clean_f1 std
0,1Best,0.923,0.004,0.974,0.006
1,Gold,0.817,0.011,0.981,0.009
2,NBest,0.949,0.004,0.971,0.01


In [5]:
df = get_df("dstc2")
df = df.sort_values("Method")

In [6]:
out = df.groupby("Method").agg({"asr_f1": ["mean", "std"], "clean_f1": ["mean", "std"]}).round(3)
dstc2 = out.copy()
dstc2.columns = ["asr_f1 mean", "asr_f1 std", "clean_f1 mean", "clean_f1 std"]
dstc2.reset_index(inplace=True)
dstc2

Unnamed: 0,Method,asr_f1 mean,asr_f1 std,clean_f1 mean,clean_f1 std
0,1Best,0.579,0.023,0.684,0.034
1,Gold,0.595,0.022,0.829,0.073
2,NBest,0.602,0.024,0.638,0.051


In [7]:
out.columns = ["asr_f1 mean", "asr_f1 std", "clean_f1 mean", "clean_f1 std",]
print(out.reset_index().sort_values("asr_f1 mean")[["Method", "asr_f1 mean", "clean_f1 mean"]])

  Method  asr_f1 mean  clean_f1 mean
0  1Best        0.579          0.684
1   Gold        0.595          0.829
2  NBest        0.602          0.638


In [8]:
from Levenshtein import distance, ratio
import textdistance

def AverageRatio(predictions):
    dists = []
    for nbest in predictions:
        source = nbest[0]
        d = 0.0
        for dest in nbest[1:]:
            d += ratio(source, dest) / (len(nbest) - 1)
        dists.append(d / len(predictions))
    return sum(dists)

def AverageJaccard(predictions):
    dists = []
    for nbest in predictions:
        source = nbest[0].split()
        d = 0.0
        for dest in nbest[1:]:
            d += textdistance.jaccard(source, dest.split()) / (len(nbest) - 1)
        dists.append(d / len(predictions))
    return sum(dists)

In [9]:
from torchmetrics import WordErrorRate
from experiments.dstc2.data import Dstc2Dataset

m = WordErrorRate()

tr_dstc = Dstc2Dataset("train", True, "<s>")
ts_dstc = Dstc2Dataset("test", True, "<s>", label_encoder=tr_dstc.label_encoder)
all_clean, all_preds, all_nbest, all_nbest_broken = [], [], [], []

for i in range(len(tr_dstc)):
    clean, preds, nbest, _ = tr_dstc[i]
    all_clean.append(clean)
    all_preds.append(preds)
    all_nbest.append(nbest)
    all_nbest_broken.append(nbest.split("<s>"))

for i in range(len(ts_dstc)):
    clean, preds, nbest, _ = ts_dstc[i]
    all_clean.append(clean)
    all_preds.append(preds)
    all_nbest.append(nbest)
    all_nbest_broken.append(nbest.split("<s>"))

dstc_wer = m(all_preds, all_clean).item()
dstc_average_ratio = AverageRatio(all_nbest_broken)
dstc_average_jaccard = AverageJaccard(all_nbest_broken)
dstc_wer

Num Labels 21
Num Labels 21


0.29046177864074707

In [10]:
from torchmetrics import WordErrorRate
from experiments.snips_tts.data import SnipsTtsDataset

m = WordErrorRate()

tr_snips = SnipsTtsDataset("train", True, "<s>")
ts_snips = SnipsTtsDataset("test", True, "<s>")
all_clean, all_preds, all_nbest, all_nbest_broken = [], [], [], []
for i in range(len(tr_snips)):
    clean, preds, nbest, _ = tr_snips[i]
    all_clean.append(clean)
    all_preds.append(preds)
    all_nbest.append(nbest)
    all_nbest_broken.append(nbest.split(" <s> "))
    
for i in range(len(ts_snips)):
    clean, preds, nbest, _ = ts_snips[i]
    all_clean.append(clean)
    all_preds.append(preds)
    all_nbest.append(nbest)
    all_nbest_broken.append(nbest.split(" <s> "))

snips_wer = m(all_preds, all_clean).item()
snips_average_ratio = AverageRatio(all_nbest_broken)
snips_average_jaccard = AverageJaccard(all_nbest_broken)
snips_wer

0.4434865713119507

In [11]:
d = [
    {
        "Dataset": "DSTC-2",
        "WER": dstc_wer,
        "Gestalt-PM": dstc_average_ratio,
        "Jaccard-Index": dstc_average_jaccard,
    },
    {
        "Dataset": "SNIPS-TTS",
        "WER": snips_wer,
        "Gestalt-PM": snips_average_ratio,
        "Jaccard-Index": snips_average_jaccard,
    },
]

def compute_delta(table, key, method1, method2):
    idx1 = table["Method"].to_list().index(method1)
    idx2 = table["Method"].to_list().index(method2)
    return round((table[key][idx1] - table[key][idx2]) * 100 / table[key][idx2], 2)

d = pd.DataFrame(d).round(3)[["Dataset", "WER", "Gestalt-PM", "Jaccard-Index"]]

d_1best_nbest = [{"index": r"$\Delta^{1Best}_{NBest}$", 0: compute_delta(dstc2, "asr_f1 mean", "NBest", "1Best"), 1: compute_delta(snips, "asr_f1 mean", "NBest", "1Best")}]
d_1best_nbest = pd.DataFrame(d_1best_nbest)

d = d.T.reset_index()
d = pd.concat([d, d_1best_nbest]).round(2)
d = d.T
d.columns = d.iloc[0]
d = d.iloc[1:].sort_values("Jaccard-Index", ascending=False)
d = d[["Dataset", "WER", "Jaccard-Index", "Gestalt-PM", "$\Delta^{1Best}_{NBest}$"]]
d

index,Dataset,WER,Jaccard-Index,Gestalt-PM,$\Delta^{1Best}_{NBest}$
1,SNIPS-TTS,0.443,0.639,0.882,2.82
0,DSTC-2,0.29,0.6,0.799,3.97


In [12]:
print(d.to_csv(index=False))

Dataset,WER,Jaccard-Index,Gestalt-PM,$\Delta^{1Best}_{NBest}$
SNIPS-TTS,0.443,0.639,0.882,2.82
DSTC-2,0.29,0.6,0.799,3.97



In [13]:
print("dstc2", len(tr_dstc), len(ts_dstc), len(tr_dstc) + len(ts_dstc))
print("snips", len(tr_snips), len(ts_snips), len(tr_snips) + len(ts_snips))

dstc2 10886 9159 20045
snips 11015 580 11595
