# Small_Scale_Analysis.ipynb
This notebook:
* Obtains values for the various automatic metrics (WER, CER, etc.) versus the Ground Truth for both Google ASR and WhisperX.
* Writes the results to a df.
* Aggregates the results across the entire df, the scripted podcasts, and the non-scripted podcasts.

In [1]:
import pandas as pd
pd.set_option('display.max_columns', None)

import re
import string

# torch metrics
import torch
from torcheval.metrics.functional import word_error_rate
from torcheval.metrics import BLEUScore
from torcheval.metrics.text import WordInformationLost
from torcheval.metrics import WordInformationPreserved

# huggingface metrics
from datasets import load_metric
import evaluate

# set up objects for object-oriented metrics
bleu_score = BLEUScore(n_gram=4)
wil_score = WordInformationLost()
wip_score = WordInformationPreserved()
cer_score = evaluate.load("cer")
bert_score = evaluate.load("bertscore")
rouge_score = evaluate.load('rouge')

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
df = pd.read_csv("./csv/df-english-fisher-annotations-test-all3.csv")

In [3]:
df["length_GroundTruth"] = -1.0
df["length_GoogleASR"] = -1.0
df["length_WhisperX"] = -1.0

df["wer_GoogleASR"] = -1.0
df["wer_WhisperX"] = -1.0

df["cer_GoogleASR"] = -1.0
df["cer_WhisperX"] = -1.0

df["wil_GoogleASR"] = -1.0
df["wil_WhisperX"] = -1.0

df["wip_GoogleASR"] = -1.0
df["wip_WhisperX"] = -1.0

df["bleu_GoogleASR"] = -1.0
df["bleu_WhisperX"] = -1.0

df["bert_p_GoogleASR"] = -1.0
df["bert_p_WhisperX"] = -1.0
df["bert_r_GoogleASR"] = -1.0
df["bert_r_WhisperX"] = -1.0
df["bert_f_GoogleASR"] = -1.0
df["bert_f_WhisperX"] = -1.0

df["rouge1_GoogleASR"] = -1.0
df["rouge1_WhisperX"] = -1.0
df["rouge2_GoogleASR"] = -1.0
df["rouge2_WhisperX"] = -1.0
df["rougeL_GoogleASR"] = -1.0
df["rougeL_WhisperX"] = -1.0

def count_punctuation(input_string):
    
    # get punctuation string
    p = string.punctuation
    p = p.replace("'","")
    p = p.replace("[","")
    p = p.replace("]","")
    
    count = 0
    for char in input_string:
        if char in p:
            count += 1
    return count

for index, row in df.iterrows():
    
    # transcript length
    for asr in ["WhisperX", "GoogleASR", "GroundTruth"]:
        if row[f"rpl_{asr}"] != "":
            num_words = len(row[f"rpl_{asr}"].split(" "))
        else:
            num_words = 0
        df.loc[index, f"length_{asr}"] = num_words
    
    # specific token counts
    for target in ["uh", "um", "well"]:
        for asr in ["WhisperX", "GoogleASR", "GroundTruth"]:
            df[f"{target}_count_{asr}"] = -1
            df[f"{target}_count_{asr}"] = df[f"rpl_{asr}"].str.count(r"\b"+re.escape(target)+r"\b", flags=re.IGNORECASE)
            
    # english-fisher-annotator parse counts
    for target in ["INTJ", "PRN", "EDITED"]:
        for asr in ["WhisperX", "GoogleASR", "GroundTruth-max", "GroundTruth-min", "GroundTruth-neutral"]:
            df[f"{target}_count_{asr}"] = -1
            df[f"{target}_count_{asr}"] = df[f"{asr}_parse"].str.count(r"\("+re.escape(target)+r" ", flags=re.IGNORECASE)
    
    # WER
    df.loc[index, "wer_GoogleASR"] = word_error_rate(input=row["rpl_GoogleASR"], target=row["rpl_GroundTruth"]).item()
    df.loc[index, "wer_WhisperX"] = word_error_rate(input=row["rpl_WhisperX"], target=row["rpl_GroundTruth"]).item()
    
    # CER
    df.loc[index, "cer_GoogleASR"] = cer_score.compute(predictions=[row["rpl_GoogleASR"]], references=[row["rpl_GroundTruth"]])
    df.loc[index, "cer_WhisperX"] = cer_score.compute(predictions=[row["rpl_WhisperX"]], references=[row["rpl_GroundTruth"]])
    
     # WIL
    wil_score.update([row["rpl_GoogleASR"]], [row["rpl_GroundTruth"]])
    df.loc[index, "wil_GoogleASR"] = wil_score.compute().item()
    
    wil_score.update([row["rpl_WhisperX"]], [row["rpl_GroundTruth"]])
    df.loc[index, "wil_WhisperX"] = wil_score.compute().item()
    
    # WIP
    wip_score.update([row["rpl_GoogleASR"]], [row["rpl_GroundTruth"]])
    df.loc[index, "wip_GoogleASR"] = wip_score.compute().item()
    
    wip_score.update([row["rpl_WhisperX"]], [row["rpl_GroundTruth"]])
    df.loc[index, "wip_WhisperX"] = wip_score.compute().item()
    
    # BLEU
    bleu_score.update([row["rpl_GoogleASR"]], [row["rpl_GroundTruth"]])
    df.loc[index, "bleu_GoogleASR"] = bleu_score.compute().item()
    
    bleu_score.update([row["rpl_WhisperX"]], [row["rpl_GroundTruth"]])
    df.loc[index, "bleu_WhisperX"] = bleu_score.compute().item()
    
    # BERTscore
    google = bert_score.compute(predictions=[row["rpl_GoogleASR"]], references=[row["rpl_GroundTruth"]], lang="en")
    df.loc[index, "bert_p_GoogleASR"] = google["precision"]
    df.loc[index, "bert_r_GoogleASR"] = google["recall"]
    df.loc[index, "bert_f_GoogleASR"] = google["f1"]
    
    whisper = bert_score.compute(predictions=[row["rpl_WhisperX"]], references=[row["rpl_GroundTruth"]], lang="en")
    df.loc[index, "bert_p_WhisperX"] = whisper["precision"]
    df.loc[index, "bert_r_WhisperX"] = whisper["recall"]
    df.loc[index, "bert_f_WhisperX"] = whisper["f1"]
    
    # rouge
    google = rouge_score.compute(predictions=[row["rpl_GoogleASR"]], references=[row["rpl_GroundTruth"]])
    df.loc[index, "rouge1_GoogleASR"] = google["rouge1"]
    df.loc[index, "rouge2_GoogleASR"] = google["rouge2"]
    df.loc[index, "rougeL_GoogleASR"] = google["rougeL"]
    
    whisper = rouge_score.compute(predictions=[row["rpl_WhisperX"]], references=[row["rpl_GroundTruth"]])
    df.loc[index, "rouge1_WhisperX"] = whisper["rouge1"]
    df.loc[index, "rouge2_WhisperX"] = whisper["rouge2"]
    df.loc[index, "rougeL_WhisperX"] = whisper["rougeL"]
    
    # punctuation counts
    df["punctuation_count_GoogleASR"] = df["GoogleASR"].apply(count_punctuation)
    df["punctuation_count_WhisperX"] = df["WhisperX"].apply(count_punctuation)
    df["punctuation_count_GroundTruth-max"] = df["GroundTruth-max"].apply(count_punctuation)
    df["punctuation_count_GroundTruth-min"] = df["GroundTruth-min"].apply(count_punctuation)
    df["punctuation_count_GroundTruth-neutral"] = df["GroundTruth-neutral"].apply(count_punctuation)

display(df)

Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Unnamed: 0.2,Unnamed: 0.1,Unnamed: 0,nums,ids,labels,percent_diff,show_uri,show_name,show_description,publisher,language,rss_link,episode_uri,episode_name,episode_description,duration,show_filename_prefix,episode_filename_prefix,GroundTruth,GroundTruth-max,GroundTruth-neutral,GroundTruth-min,GoogleASR,WhisperX,rpl_GroundTruth,rpl_GoogleASR,rpl_WhisperX,GroundTruth-max_parse,GroundTruth-max_orig_dys,GroundTruth-max_dys,GroundTruth-min_parse,GroundTruth-min_orig_dys,GroundTruth-min_dys,GroundTruth-neutral_parse,GroundTruth-neutral_orig_dys,GroundTruth-neutral_dys,GoogleASR_parse,GoogleASR_orig_dys,GoogleASR_dys,WhisperX_parse,WhisperX_orig_dys,WhisperX_dys,length_GroundTruth,length_GoogleASR,length_WhisperX,wer_GoogleASR,wer_WhisperX,cer_GoogleASR,cer_WhisperX,wil_GoogleASR,wil_WhisperX,wip_GoogleASR,wip_WhisperX,bleu_GoogleASR,bleu_WhisperX,bert_p_GoogleASR,bert_p_WhisperX,bert_r_GoogleASR,bert_r_WhisperX,bert_f_GoogleASR,bert_f_WhisperX,rouge1_GoogleASR,rouge1_WhisperX,rouge2_GoogleASR,rouge2_WhisperX,rougeL_GoogleASR,rougeL_WhisperX,uh_count_WhisperX,uh_count_GoogleASR,uh_count_GroundTruth,um_count_WhisperX,um_count_GoogleASR,um_count_GroundTruth,well_count_WhisperX,well_count_GoogleASR,well_count_GroundTruth,INTJ_count_WhisperX,INTJ_count_GoogleASR,INTJ_count_GroundTruth-max,INTJ_count_GroundTruth-min,INTJ_count_GroundTruth-neutral,PRN_count_WhisperX,PRN_count_GoogleASR,PRN_count_GroundTruth-max,PRN_count_GroundTruth-min,PRN_count_GroundTruth-neutral,EDITED_count_WhisperX,EDITED_count_GoogleASR,EDITED_count_GroundTruth-max,EDITED_count_GroundTruth-min,EDITED_count_GroundTruth-neutral,punctuation_count_GoogleASR,punctuation_count_WhisperX,punctuation_count_GroundTruth-max,punctuation_count_GroundTruth-min,punctuation_count_GroundTruth-neutral
0,1,1,1,1ll5WGWjWANfGKHZfrpL5A,not-scripted,14.2,spotify:show:5HLsz7WFjW8hzJurMDdozi,Granger Smith Podcast,"American, Texan, father, husband, musician, Gr...",Granger Smith,['en'],https://anchor.fm/s/11170a28/podcast/rss,spotify:episode:1ll5WGWjWANfGKHZfrpL5A,How do you move on from THIS?,"Episode 21: On this week's episode, I answered...",25.8916,show_5HLsz7WFjW8hzJurMDdozi,1ll5WGWjWANfGKHZfrpL5A,if you're ever in deep deep grief and you wond...,"If you're ever in deep, deep grief, and you wo...","If you're ever in deep, deep grief and you won...","If you're ever in deep, deep grief and you won...",If you're ever in deep deep grief and you wond...,"If you're ever in deep, deep grief and you won...",if you're ever in deep deep grief and you wond...,if you're ever in deep deep grief and you wond...,if you're ever in deep deep grief and you wond...,(S (SBAR (UNK if) (S (S (NP (UNK you)) (VP (UN...,if _ you _ 're _ ever _ in _ deep E deep _ gri...,if _ you _ 're _ ever _ in _ deep E deep _ gri...,(S (SBAR (UNK if) (S (S (NP (UNK you)) (VP (UN...,if _ you _ 're _ ever _ in _ deep E deep _ gri...,if _ you _ 're _ ever _ in _ deep E deep _ gri...,(S (SBAR (UNK if) (S (S (NP (UNK you)) (VP (UN...,if _ you _ 're _ ever _ in _ deep E deep _ gri...,if _ you _ 're _ ever _ in _ deep E deep _ gri...,(S (SBAR (UNK if) (S (S (NP (UNK you)) (VP (UN...,if _ you _ 're _ ever _ in _ deep E deep _ gri...,if _ you _ 're _ ever _ in _ deep E deep _ gri...,(S (SBAR (UNK if) (S (S (NP (UNK you)) (VP (UN...,if _ you _ 're _ ever _ in _ deep E deep _ gri...,if _ you _ 're _ ever _ in _ deep E deep _ gri...,267.0,260.0,255.0,0.089888,0.108614,0.058296,0.068012,0.149395,0.158718,0.850605,0.841282,0.818645,0.809326,0.983228,0.98201,0.969828,0.967203,0.976482,0.97455,0.93381,0.927798,0.868941,0.862319,0.93381,0.927798,0,0,3,0,0,3,0,0,0,1,1,7,7,6,0,1,0,1,0,2,8,8,7,8,22,42,68,53,60
1,2,2,2,1wHRMsWVurmo56xd1AcSfe,scripted,1.24,spotify:show:1g056e2x0Y9AwW6CQF3qA5,Mythology,Myths endure for a reason. This episodic audio...,Parcast Network,['en'],https://feeds.megaphone.fm/mythology,spotify:episode:1wHRMsWVurmo56xd1AcSfe,Orphan Boy & Elk Dog Pt. 1,Long Arrow is an orphan and an outcast—unable ...,36.144767,show_1g056e2x0Y9AwW6CQF3qA5,1wHRMsWVurmo56xd1AcSfe,something to note all myths have many versions...,Something to note: all myths have many version...,Something to note: all myths have many version...,Something to note: all myths have many version...,Something to note all myths have many versions...,"Something to note, all myths have many version...",something to note all myths have many versions...,something to note all myths have many versions...,something to note all myths have many versions...,(NP (NP (UNK something)) (SBAR (S (VP (UNK to)...,something _ to _ note _ all _ myths _ have _ m...,something _ to _ note _ all _ myths _ have _ m...,(NP (NP (UNK something)) (SBAR (S (VP (UNK to)...,something _ to _ note _ all _ myths _ have _ m...,something _ to _ note _ all _ myths _ have _ m...,(NP (NP (UNK something)) (SBAR (S (VP (UNK to)...,something _ to _ note _ all _ myths _ have _ m...,something _ to _ note _ all _ myths _ have _ m...,(NP (NP (UNK something)) (SBAR (S (VP (UNK to)...,something _ to _ note _ all _ myths _ have _ m...,something _ to _ note _ all _ myths _ have _ m...,(NP (NP (UNK something)) (SBAR (S (VP (UNK to)...,something _ to _ note _ all _ myths _ have _ m...,something _ to _ note _ all _ myths _ have _ m...,269.0,271.0,267.0,0.037175,0.022305,0.01355,0.008808,0.127692,0.105235,0.872308,0.894765,0.849952,0.877995,0.98942,0.99483,0.98986,0.993735,0.98964,0.994283,0.971326,0.983784,0.938849,0.969259,0.967742,0.983784,0,0,0,0,0,0,0,0,0,0,1,1,1,1,0,0,0,0,0,3,4,0,2,2,18,40,51,45,45
2,3,3,3,3i4qZ6FqzXdj8UiMeuaX3q,not-scripted,13.8,spotify:show:2z1LtWVnflRUONFAo0FADb,Gamecock Central Podcast Network,The Gamecock Central Podcast Network brings yo...,Gamecock Central Podcasts,['en'],https://www.spreaker.com/show/957147/episodes/...,spotify:episode:3i4qZ6FqzXdj8UiMeuaX3q,The Hard Fowl: Team Victory and SEC Tournament...,Collyn Taylor and Pearson Fowler break down Ca...,63.8972,show_2z1LtWVnflRUONFAo0FADb,3i4qZ6FqzXdj8UiMeuaX3q,hello and welcome back into another episode of...,"Hello, and welcome back into another episode o...",Hello and welcome back into another episode of...,Hello and welcome back into another episode of...,Hello and welcome back in to another episode o...,Hello and welcome back into another episode of...,hello and welcome back into another episode of...,hello and welcome back in to another episode o...,hello and welcome back into another episode of...,(FRAG (INTJ (UNK hello)) (UNK and) (VP (UNK we...,hello _ and _ welcome _ back _ into _ another ...,hello E and _ welcome _ back _ into _ another ...,(FRAG (INTJ (UNK hello)) (UNK and) (VP (UNK we...,hello _ and _ welcome _ back _ into _ another ...,hello E and _ welcome _ back _ into _ another ...,(FRAG (INTJ (UNK hello)) (UNK and) (VP (UNK we...,hello _ and _ welcome _ back _ into _ another ...,hello E and _ welcome _ back _ into _ another ...,(FRAG (INTJ (UNK hello)) (UNK and) (INTJ (UNK ...,hello _ and _ welcome _ back _ in _ to _ anoth...,hello E and _ welcome E back _ in _ to _ anoth...,(FRAG (INTJ (UNK hello)) (UNK and) (VP (UNK we...,hello _ and _ welcome _ back _ into _ another ...,hello E and _ welcome _ back _ into _ another ...,377.0,369.0,372.0,0.140584,0.095491,0.083163,0.053571,0.142197,0.148172,0.857803,0.851828,0.853186,0.849956,0.961241,0.97234,0.955786,0.968941,0.958506,0.970637,0.909091,0.937984,0.822917,0.873057,0.890909,0.932817,0,0,0,0,0,0,1,1,1,5,4,3,3,3,6,8,7,6,6,6,6,8,7,7,23,31,64,57,66
3,4,4,4,4YOq90opfiBsc3MwflzFbJ,not-scripted,9.05,spotify:show:4O6pIMQHq2GkVhwQ0KNdnH,The Frog and The Dragon,A podcast devoted to everything Magic: the Gat...,The Frog & The Dragon,['en'],https://anchor.fm/s/1338c918/podcast/rss,spotify:episode:4YOq90opfiBsc3MwflzFbJ,Welcome to Cube,"Today we give you the basics of what is cube, ...",19.622017,show_4O6pIMQHq2GkVhwQ0KNdnH,4YOq90opfiBsc3MwflzFbJ,hello everyone and welcome to the frog and the...,"Hello, everyone, and welcome to ""The Frog and ...","Hello everyone, and welcome to The Frog and th...","Hello everyone, and welcome to The Frog and Th...",Hello everyone and welcome to frog and the dra...,Hello everyone and welcome to the Frog and the...,hello everyone and welcome to the frog and the...,hello everyone and welcome to frog and the dra...,hello everyone and welcome to the frog and the...,(FRAG (INTJ (UNK hello)) (NP (UNK everyone)) (...,hello _ everyone _ and _ welcome _ to _ the _ ...,hello E everyone _ and _ welcome _ to _ the _ ...,(S (INTJ (UNK hello)) (NP (UNK everyone)) (UNK...,hello _ everyone _ and _ welcome _ to _ the _ ...,hello E everyone _ and _ welcome _ to _ the _ ...,(FRAG (INTJ (UNK hello)) (NP (UNK everyone)) (...,hello _ everyone _ and _ welcome _ to _ the _ ...,hello E everyone _ and _ welcome _ to _ the _ ...,(FRAG (INTJ (UNK hello)) (NP (UNK everyone)) (...,hello _ everyone _ and _ welcome _ to _ frog _...,hello E everyone _ and _ welcome _ to _ frog _...,(FRAG (INTJ (UNK hello)) (NP (UNK everyone)) (...,hello _ everyone _ and _ welcome _ to _ the _ ...,hello E everyone _ and _ welcome _ to _ the _ ...,296.0,294.0,295.0,0.108108,0.057432,0.081715,0.048225,0.155412,0.149714,0.844588,0.850286,0.846446,0.853206,0.970504,0.985514,0.972069,0.984199,0.971286,0.984856,0.926174,0.958124,0.858586,0.921008,0.919463,0.958124,0,0,1,1,0,2,1,1,1,11,12,11,12,11,0,0,0,0,0,0,2,0,0,0,22,36,72,58,54
4,5,5,5,5a0dxl6w7wh5SGAbNpxhu4,scripted,8.87,spotify:show:21ASCcEXgUlbFSmoqjroZm,Chompers,Make tooth time easy with this morning and nig...,Gimlet,['en'],https://feeds.megaphone.fm/chompers,spotify:episode:5a0dxl6w7wh5SGAbNpxhu4,Heroines Week Xi Ling Shi (3-2-2020),Put your superhero cape on because its Heroine...,3.20915,show_21ASCcEXgUlbFSmoqjroZm,5a0dxl6w7wh5SGAbNpxhu4,good morning we hope you're ready to have a su...,Good morning! We hope you're ready to have a s...,Good morning! We hope you're ready to have a s...,"Good morning, we hope you're ready to have a s...",Good morning. We hope you're ready to have a s...,Good morning. We hope you're ready to have a s...,good morning we hope you're ready to have a su...,good morning we hope you're ready to have a su...,good morning we hope you're ready to have a su...,(INTJ (UNK good) (UNK morning))\n(S (NP (UNK w...,good _ morning _\nwe _ hope _ you _ 're _ read...,good _ morning _ we _ hope _ you _ 're _ ready...,(S (INTJ (UNK good) (UNK morning)) (NP (UNK we...,good _ morning _ we _ hope _ you _ 're _ ready...,good E morning E we _ hope _ you _ 're _ ready...,(INTJ (UNK good) (UNK morning))\n(S (NP (UNK w...,good _ morning _\nwe _ hope _ you _ 're _ read...,good _ morning _ we _ hope _ you _ 're _ ready...,(INTJ (UNK good) (UNK morning))\n(S (NP (UNK w...,good _ morning _\nwe _ hope _ you _ 're _ read...,good _ morning _ we _ hope _ you _ 're _ ready...,(INTJ (UNK good) (UNK morning))\n(S (NP (UNK w...,good _ morning _\nwe _ hope _ you _ 're _ read...,good _ morning _ we _ hope _ you _ 're _ ready...,269.0,269.0,268.0,0.107807,0.052045,0.062947,0.040773,0.15527,0.150118,0.84473,0.849882,0.849695,0.85598,0.957724,0.980813,0.960261,0.984289,0.958991,0.982548,0.908108,0.961887,0.84991,0.939891,0.908108,0.961887,0,0,0,0,0,0,0,0,0,2,2,2,2,2,0,0,0,0,0,0,2,0,0,0,20,38,48,39,40
5,6,6,6,5TpkTe5mlb5xx1UT8OvZ6l,scripted,5.44,spotify:show:4gds4Ip86RDAlAgH4PQuQs,Crime Over Coffee,Listen in each week while true crime enthusias...,Crime Over Coffee,['en'],https://anchor.fm/s/b402dc8/podcast/rss,spotify:episode:5TpkTe5mlb5xx1UT8OvZ6l,Mini Episode: Florence Salon Murders,This episode tells you the stories of the Flor...,10.76555,show_4gds4Ip86RDAlAgH4PQuQs,5TpkTe5mlb5xx1UT8OvZ6l,good morning and welcome to another mini monda...,"Good morning, and welcome to another mini Mond...","Good morning, and welcome to another Mini Mond...","Good morning, and welcome to another Mini Mond...","Good morning, and welcome to another mini Mond...","Good morning, and welcome to another mini Mond...",good morning and welcome to another mini monda...,good morning and welcome to another mini monda...,good morning and welcome to another mini monda...,(FRAG (INTJ (UNK good) (UNK morning)) (UNK and...,good _ morning _ and _ welcome _ to _ another ...,good E morning E and _ welcome _ to _ another ...,(FRAG (INTJ (UNK good) (UNK morning)) (UNK and...,good _ morning _ and _ welcome _ to _ another ...,good E morning E and _ welcome _ to _ another ...,(FRAG (INTJ (UNK good) (UNK morning)) (UNK and...,good _ morning _ and _ welcome _ to _ another ...,good E morning E and _ welcome _ to _ another ...,(FRAG (INTJ (UNK good) (UNK morning)) (UNK and...,good _ morning _ and _ welcome _ to _ another ...,good E morning E and _ welcome _ to _ another ...,(FRAG (INTJ (UNK good) (UNK morning)) (UNK and...,good _ morning _ and _ welcome _ to _ another ...,good E morning E and _ welcome _ to _ another ...,306.0,310.0,307.0,0.084967,0.035948,0.033587,0.013942,0.150093,0.143107,0.849907,0.856893,0.855628,0.862814,0.980832,0.990207,0.983917,0.992188,0.982372,0.991196,0.937997,0.972887,0.880383,0.9504,0.934817,0.972887,0,0,0,0,0,0,0,0,0,1,1,1,1,1,0,0,1,1,1,0,0,1,1,1,21,40,54,47,48
6,7,7,7,6jyaOoQ6aF3TB8z93Honak,not-scripted,12.39,spotify:show:6JqOLHQRh2G24ItUTlmxu9,Legends Cast - A Podcast About Legends of Rune...,"A podcast about the meta, cards, and community...",The Lift,['en'],https://anchor.fm/s/c4221b8/podcast/rss,spotify:episode:6jyaOoQ6aF3TB8z93Honak,S2 Episode 5.5: State of the Show,Mark gives a quick state of the show in this i...,19.075,show_6JqOLHQRh2G24ItUTlmxu9,6jyaOoQ6aF3TB8z93Honak,you're listening to legends cast a podcast abo...,"You're listening to Legends Cast, a podcast ab...","You're listening to Legends Cast, a podcast ab...","You're listening to Legends Cast, a podcast ab...",You're listening to Legends cast a podcast abo...,"You're listening to Legendscast, a podcast abo...",you're listening to legends cast a podcast abo...,you're listening to legends cast a podcast abo...,you're listening to legendscast a podcast abou...,(S (NP (UNK you)) (VP (UNK 're) (VP (VP (UNK l...,you _ 're _ listening _ to _ legends _ cast _ ...,you _ 're _ listening _ to _ legends _ cast _ ...,(S (NP (UNK you)) (VP (UNK 're) (VP (VP (UNK l...,you _ 're _ listening _ to _ legends _ cast _ ...,you _ 're _ listening _ to _ legends _ cast _ ...,(S (NP (UNK you)) (VP (UNK 're) (VP (VP (UNK l...,you _ 're _ listening _ to _ legends _ cast _ ...,you _ 're _ listening _ to _ legends _ cast _ ...,(S (NP (UNK you)) (VP (UNK 're) (VP (VP (UNK l...,you _ 're _ listening _ to _ legends _ cast _ ...,you _ 're _ listening _ to _ legends _ cast _ ...,(S (NP (UNK you)) (VP (UNK 're) (VP (UNK liste...,you _ 're _ listening _ to _ legendscast _ a _...,you _ 're _ listening _ to _ legendscast _ a _...,318.0,308.0,311.0,0.106918,0.113208,0.061261,0.049249,0.145623,0.149461,0.854377,0.850539,0.860415,0.858742,0.972324,0.975364,0.965672,0.97552,0.968987,0.975442,0.938967,0.936236,0.872841,0.873635,0.929577,0.926905,2,0,5,2,0,0,1,1,1,7,3,8,8,4,4,1,3,3,3,2,3,0,2,3,15,29,56,51,49
7,8,8,8,6MaijKHa5MU0MsdS0yVZHE,not-scripted,10.36,spotify:show:0cTycvoAlE6LSppQHpgr48,Align By Design,Align by Design with Amy Allchurch is the podc...,Amy Allchurch,['en'],https://anchor.fm/s/de57c04/podcast/rss,spotify:episode:6MaijKHa5MU0MsdS0yVZHE,42. Myths Part 1,Debunking myths about Human Design and answeri...,36.06335,show_0cTycvoAlE6LSppQHpgr48,6MaijKHa5MU0MsdS0yVZHE,welcome to align by design i'm amy allchurch a...,"Welcome to Align by Design; I'm Amy Allchurch,...","Welcome to Align by Design, I'm Amy Allchurch,...","Welcome to Align by Design, I'm Amy Allchurch,...",Welcome to 9 by Design. I'm eating all church ...,"Welcome to Align by Design. I'm Amy Alchurch, ...",welcome to align by design i'm amy allchurch a...,welcome to 9 by design i'm eating all church a...,welcome to align by design i'm amy alchurch an...,(S (ADJP (UNK welcome) (S (VP (UNK to) (VP (UN...,welcome _ to _ align _ by _ design _ i _ 'm _ ...,welcome _ to _ align _ by _ design _ i _ 'm _ ...,(S (ADJP (UNK welcome) (S (VP (UNK to) (VP (UN...,welcome _ to _ align _ by _ design _ i _ 'm _ ...,welcome _ to _ align _ by _ design _ i _ 'm _ ...,(S (ADJP (UNK welcome) (S (VP (UNK to) (VP (UN...,welcome _ to _ align _ by _ design _ i _ 'm _ ...,welcome _ to _ align _ by _ design _ i _ 'm _ ...,(FRAG (ADJP (UNK welcome) (S (VP (UNK to)))) (...,welcome _ to _ by _ design _\ni _ 'm _ eating ...,welcome _ to _ by _ design _ i _ 'm _ eating _...,(ADJP (UNK welcome) (S (VP (UNK to) (VP (UNK a...,welcome _ to _ align _ by _ design _\ni _ 'm _...,welcome _ to _ align _ by _ design _ i _ 'm _ ...,258.0,259.0,256.0,0.069767,0.027132,0.041783,0.01532,0.148383,0.142935,0.851617,0.857065,0.860411,0.864627,0.97941,0.987229,0.978383,0.985813,0.978896,0.986521,0.949907,0.975701,0.908752,0.953096,0.946197,0.975701,0,0,0,0,0,1,0,0,0,4,4,5,5,5,1,1,1,1,1,5,5,4,4,4,22,24,47,39,45
8,9,9,9,6URm69QBcvJpqoGiTE1nyy,not-scripted,25.67,spotify:show:40tQwCsD43wEfTZuDuZ66c,Let's All Be Serious,Imagine sitting around a table with three of y...,Banger,['en'],https://anchor.fm/s/f7a6444/podcast/rss,spotify:episode:6URm69QBcvJpqoGiTE1nyy,"Colonics, Psychics, and Amber Alerts, OH MY! w...","Mindy Hamilton is a medical anthropologist, fo...",57.617217,show_40tQwCsD43wEfTZuDuZ66c,6URm69QBcvJpqoGiTE1nyy,so is that all they do for icy hot those lying...,"So, is that all they do for Icy Hot, those lyi...","So, is that all they do for Icy Hot? Those lyi...","So, is that all they do for Icy Hot, those lyi...",So that all they do for Icy Hot those Lyin mot...,"So is that all they do for Icy Hot, those lyin...",so is that all they do for icy hot those lying...,so that all they do for icy hot those lyin mot...,so is that all they do for icy hot those lying...,(SQ (UNK so) (UNK is) (NP (UNK that)) (NP (NP ...,so _ is _ that _ all _ they _ do _ for _ icy _...,so _ is _ that _ all _ they _ do _ for _ icy _...,(SQ (UNK so) (UNK is) (NP (UNK that)) (NP (NP ...,so _ is _ that _ all _ they _ do _ for _ icy _...,so _ is _ that _ all _ they _ do _ for _ icy _...,(SQ (UNK so) (UNK is) (NP (UNK that)) (NP (NP ...,so _ is _ that _ all _ they _ do _ for _ icy _...,so _ is _ that _ all _ they _ do _ for _ icy _...,(SBAR (UNK so) (UNK that) (S (NP (UNK all)) (N...,so _ that _ all _ they _ do _ for _ icy _ hot ...,so _ that _ all _ they _ do _ for _ icy _ hot ...,(SQ (UNK so) (UNK is) (NP (UNK that)) (NP (NP ...,so _ is _ that _ all _ they _ do _ for _ icy _...,so _ is _ that _ all _ they _ do _ for _ icy _...,296.0,278.0,299.0,0.263514,0.182432,0.205825,0.128803,0.160556,0.170105,0.839444,0.829895,0.852031,0.846748,0.933658,0.957691,0.913114,0.945918,0.923272,0.951768,0.838926,0.882448,0.727273,0.801292,0.808725,0.879227,0,0,1,1,0,2,0,0,0,19,14,21,21,23,2,2,1,1,2,7,8,12,11,11,33,70,99,85,89
9,10,10,10,7n6zBPeHcH1t3MUvBNIvDa,scripted,5.31,spotify:show:5PFgy6lc8UvYIjDsaaO3dN,Crimes of Passion,"Every Wednesday, we tell a love story that tur...",Parcast Network,['en'],https://feeds.megaphone.fm/crimesofpassion,spotify:episode:7n6zBPeHcH1t3MUvBNIvDa,Crimes of Passion Bites: Notorious Assassins,In addition to new episodes of Crimes of Passi...,22.49055,show_5PFgy6lc8UvYIjDsaaO3dN,7n6zBPeHcH1t3MUvBNIvDa,welcome to podcast crime bites we're offering ...,"Welcome to Podcast Crime Bites, where we're of...",Welcome to Podcast Crime Bites. We're offering...,"Welcome to Podcast Crime Bites, we're offering...",Welcome to park asked crime bites. We're offer...,Welcome to ParCast Crime Bites. We're offering...,welcome to podcast crime bites we're offering ...,welcome to park asked crime bites we're offeri...,welcome to parcast crime bites we're offering ...,(ADJP (UNK welcome) (PP (UNK to) (NP (NP (UNK ...,welcome _ to _ podcast _ crime _ bites _ where...,welcome _ to _ podcast _ crime _ bites _ where...,(S (ADJP (UNK welcome) (PP (UNK to) (NP (UNK p...,welcome _ to _ podcast _ crime _ bites _ we _ ...,welcome _ to _ podcast _ crime _ bites _ we _ ...,(ADJP (UNK welcome) (PP (UNK to) (NP (UNK podc...,welcome _ to _ podcast _ crime _ bites _\nwe _...,welcome _ to _ podcast _ crime _ bites _ we _ ...,(S (ADJP (UNK welcome) (S (VP (UNK to) (VP (UN...,welcome _ to _ park _ asked _ crime _ bites _\...,welcome _ to _ park _ asked _ crime _ bites _ ...,(ADJP (UNK welcome) (S (VP (UNK to) (VP (UNK p...,welcome _ to _ parcast _ crime _ bites _\nwe _...,welcome _ to _ parcast _ crime _ bites _ we _ ...,289.0,290.0,288.0,0.065744,0.024221,0.028492,0.011173,0.167727,0.161859,0.832273,0.838141,0.848302,0.85281,0.974949,0.992618,0.975659,0.993902,0.975304,0.99326,0.942568,0.978003,0.898305,0.95416,0.942568,0.978003,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,13,30,40,38,32


## Describe overall df

In [4]:
temp_df = df.describe()

for col in temp_df.columns:
    if "count" not in col and "duration" not in col and "percent_diff" not in col and "length" not in col and "nums" not in col:
        temp_df[col] = round(temp_df[col]*100.0,2)
    else:
        temp_df[col] = round(temp_df[col],2)
        
display(temp_df)

Unnamed: 0.2,Unnamed: 0.1,Unnamed: 0,nums,percent_diff,duration,length_GroundTruth,length_GoogleASR,length_WhisperX,wer_GoogleASR,wer_WhisperX,cer_GoogleASR,cer_WhisperX,wil_GoogleASR,wil_WhisperX,wip_GoogleASR,wip_WhisperX,bleu_GoogleASR,bleu_WhisperX,bert_p_GoogleASR,bert_p_WhisperX,bert_r_GoogleASR,bert_r_WhisperX,bert_f_GoogleASR,bert_f_WhisperX,rouge1_GoogleASR,rouge1_WhisperX,rouge2_GoogleASR,rouge2_WhisperX,rougeL_GoogleASR,rougeL_WhisperX,uh_count_WhisperX,uh_count_GoogleASR,uh_count_GroundTruth,um_count_WhisperX,um_count_GoogleASR,um_count_GroundTruth,well_count_WhisperX,well_count_GoogleASR,well_count_GroundTruth,INTJ_count_WhisperX,INTJ_count_GoogleASR,INTJ_count_GroundTruth-max,INTJ_count_GroundTruth-min,INTJ_count_GroundTruth-neutral,PRN_count_WhisperX,PRN_count_GoogleASR,PRN_count_GroundTruth-max,PRN_count_GroundTruth-min,PRN_count_GroundTruth-neutral,EDITED_count_WhisperX,EDITED_count_GoogleASR,EDITED_count_GroundTruth-max,EDITED_count_GroundTruth-min,EDITED_count_GroundTruth-neutral,punctuation_count_GoogleASR,punctuation_count_WhisperX,punctuation_count_GroundTruth-max,punctuation_count_GroundTruth-min,punctuation_count_GroundTruth-neutral
count,1000.0,1000.0,10.0,10.0,10.0,10.0,10.0,10.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0
mean,550.0,550.0,5.5,10.63,29.48,294.5,290.8,291.8,10.74,7.19,6.71,4.38,15.02,14.79,84.98,85.21,84.95,85.32,97.03,98.19,96.65,97.92,96.84,98.05,92.57,95.15,86.27,90.98,91.82,94.97,0.2,0.0,1.0,0.4,0.0,0.8,0.3,0.3,0.3,5.0,4.2,5.9,6.0,5.6,1.3,1.3,1.3,1.3,1.3,2.5,3.8,3.3,3.4,3.6,20.9,38.0,59.9,51.2,52.8
std,302.77,302.77,3.03,6.68,19.36,34.82,32.97,34.84,6.18,5.21,5.37,3.64,1.09,1.73,1.09,1.73,1.18,1.78,1.61,1.11,2.14,1.51,1.86,1.3,3.57,3.11,5.75,5.45,4.39,3.27,0.63,0.0,1.7,0.7,0.0,1.14,0.48,0.48,0.48,6.04,4.85,6.38,6.48,6.9,2.11,2.45,2.21,1.89,1.95,2.68,2.94,4.47,3.78,3.86,5.38,12.66,16.97,13.96,15.96
min,100.0,100.0,1.0,1.24,3.21,258.0,259.0,255.0,3.72,2.23,1.36,0.88,12.77,10.52,83.23,82.99,81.86,80.93,93.37,95.77,91.31,94.59,92.33,95.18,83.89,88.24,72.73,80.13,80.87,87.92,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,13.0,24.0,40.0,38.0,32.0
25%,325.0,325.0,3.25,6.3,19.21,269.0,269.5,267.25,7.36,2.93,3.56,1.43,14.63,14.44,84.46,84.34,84.86,85.07,96.36,97.67,96.16,97.06,96.15,97.48,91.34,93.67,85.21,87.32,91.09,92.91,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.25,1.25,1.25,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,0.25,0.25,18.5,30.25,48.75,40.5,45.0
50%,550.0,550.0,5.5,9.7,24.19,292.5,284.0,291.5,9.84,5.47,5.98,4.45,14.97,14.96,85.03,85.04,85.1,85.46,97.36,98.38,97.09,98.42,97.33,98.37,93.59,96.0,87.09,93.04,93.17,96.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,2.5,4.0,4.0,3.5,0.0,0.5,0.5,1.0,0.5,2.0,3.5,0.5,2.0,2.5,21.5,37.0,55.0,49.0,48.5
75%,775.0,775.0,7.75,13.45,36.12,303.5,304.5,305.0,10.8,10.53,7.7,5.25,15.54,15.66,85.37,85.56,85.5,86.18,98.05,98.95,97.77,99.06,97.83,99.0,94.17,97.5,89.38,95.24,94.06,97.5,0.0,0.0,1.0,0.75,0.0,1.75,0.75,0.75,0.75,6.5,4.0,7.75,7.75,5.75,1.75,1.0,1.0,1.0,1.75,4.5,5.75,7.0,6.25,6.25,22.0,40.0,67.0,56.0,58.5
max,1000.0,1000.0,10.0,25.67,63.9,377.0,369.0,372.0,26.35,18.24,20.58,12.88,16.77,17.01,87.23,89.48,86.04,87.8,98.94,99.48,98.99,99.39,98.96,99.43,97.13,98.38,93.88,96.93,96.77,98.38,2.0,0.0,5.0,2.0,0.0,3.0,1.0,1.0,1.0,19.0,14.0,21.0,21.0,23.0,6.0,8.0,7.0,6.0,6.0,7.0,8.0,12.0,11.0,11.0,33.0,70.0,99.0,85.0,89.0


## Describe non-scripted df

In [5]:
temp_df = df[df["labels"]=="not-scripted"].describe()

for col in temp_df.columns:
    if "count" not in col and "duration" not in col and "percent_diff" not in col and "length" not in col and "nums" not in col:
        temp_df[col] = round(temp_df[col]*100.0,2)
    else:
        temp_df[col] = round(temp_df[col],2)
        
display(temp_df)

Unnamed: 0.2,Unnamed: 0.1,Unnamed: 0,nums,percent_diff,duration,length_GroundTruth,length_GoogleASR,length_WhisperX,wer_GoogleASR,wer_WhisperX,cer_GoogleASR,cer_WhisperX,wil_GoogleASR,wil_WhisperX,wip_GoogleASR,wip_WhisperX,bleu_GoogleASR,bleu_WhisperX,bert_p_GoogleASR,bert_p_WhisperX,bert_r_GoogleASR,bert_r_WhisperX,bert_f_GoogleASR,bert_f_WhisperX,rouge1_GoogleASR,rouge1_WhisperX,rouge2_GoogleASR,rouge2_WhisperX,rougeL_GoogleASR,rougeL_WhisperX,uh_count_WhisperX,uh_count_GoogleASR,uh_count_GroundTruth,um_count_WhisperX,um_count_GoogleASR,um_count_GroundTruth,well_count_WhisperX,well_count_GoogleASR,well_count_GroundTruth,INTJ_count_WhisperX,INTJ_count_GoogleASR,INTJ_count_GroundTruth-max,INTJ_count_GroundTruth-min,INTJ_count_GroundTruth-neutral,PRN_count_WhisperX,PRN_count_GoogleASR,PRN_count_GroundTruth-max,PRN_count_GroundTruth-min,PRN_count_GroundTruth-neutral,EDITED_count_WhisperX,EDITED_count_GoogleASR,EDITED_count_GroundTruth-max,EDITED_count_GroundTruth-min,EDITED_count_GroundTruth-neutral,punctuation_count_GoogleASR,punctuation_count_WhisperX,punctuation_count_GroundTruth-max,punctuation_count_GroundTruth-min,punctuation_count_GroundTruth-neutral
count,600.0,600.0,6.0,6.0,6.0,6.0,6.0,6.0,600.0,600.0,600.0,600.0,600.0,600.0,600.0,600.0,600.0,600.0,600.0,600.0,600.0,600.0,600.0,600.0,600.0,600.0,600.0,600.0,600.0,600.0,6.0,6.0,6.0,6.0,6.0,6.0,6.0,6.0,6.0,6.0,6.0,6.0,6.0,6.0,6.0,6.0,6.0,6.0,6.0,6.0,6.0,6.0,6.0,6.0,6.0,6.0,6.0,6.0,6.0
mean,533.33,533.33,5.33,14.24,37.03,302.0,294.67,298.0,12.98,9.74,8.87,6.05,15.03,15.32,84.97,84.68,84.85,84.71,96.67,97.67,95.91,97.13,96.29,97.4,91.61,93.64,84.32,88.07,90.48,93.34,0.33,0.0,1.67,0.67,0.0,1.33,0.5,0.5,0.5,7.83,6.33,9.17,9.33,8.67,2.17,2.17,2.0,2.0,2.0,3.67,5.33,5.33,5.17,5.5,22.83,38.67,67.67,57.17,60.5
std,314.11,314.11,3.14,5.94,19.47,42.69,41.12,43.05,6.96,5.32,5.95,3.77,0.67,0.97,0.67,0.97,1.56,1.96,1.79,1.09,2.38,1.46,2.07,1.25,4.02,3.16,6.31,5.22,5.06,3.29,0.82,0.0,1.97,0.82,0.0,1.21,0.55,0.55,0.55,6.4,5.32,6.4,6.47,7.55,2.4,2.93,2.68,2.19,2.28,2.73,2.5,4.84,3.97,3.94,5.78,16.54,17.76,15.24,15.86
min,100.0,100.0,1.0,9.05,19.08,258.0,259.0,255.0,6.98,2.71,4.18,1.53,14.22,14.29,83.94,82.99,81.86,80.93,93.37,95.77,91.31,94.59,92.33,95.18,83.89,88.24,72.73,80.13,80.87,87.92,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,3.0,3.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,15.0,24.0,47.0,39.0,45.0
25%,325.0,325.0,3.25,10.87,21.19,274.25,264.5,265.75,9.41,6.69,5.9,4.85,14.63,14.85,84.61,84.35,84.78,84.76,96.36,97.31,95.83,96.76,96.11,97.16,91.34,92.99,83.18,86.5,89.8,92.71,0.0,0.0,0.25,0.0,0.0,0.25,0.0,0.0,0.0,4.25,3.25,5.5,5.5,4.25,0.25,1.0,0.25,1.0,0.25,2.0,3.5,1.0,2.5,3.25,22.0,29.5,58.0,51.5,50.25
50%,550.0,550.0,5.5,13.1,30.98,296.0,286.0,297.0,10.75,10.21,7.15,5.14,14.89,14.96,85.11,85.04,85.26,85.16,97.14,97.87,96.78,97.22,97.01,97.5,93.0,93.71,86.38,87.33,92.45,93.03,0.0,0.0,1.0,0.5,0.0,1.5,0.5,0.5,0.5,6.0,4.0,7.5,7.5,5.5,1.5,1.0,1.0,1.0,1.5,3.5,5.5,6.0,5.5,5.5,22.0,33.5,66.0,55.0,57.0
75%,775.0,775.0,7.75,14.1,52.23,312.5,304.5,308.0,13.25,11.21,8.28,6.44,15.39,15.65,85.37,85.15,85.86,85.74,97.76,98.46,97.15,98.2,97.52,98.25,93.77,95.31,87.19,90.92,93.28,95.18,0.0,0.0,2.5,1.0,0.0,2.0,1.0,1.0,1.0,10.0,10.0,10.25,11.0,9.75,3.5,1.75,2.5,2.5,2.75,5.75,7.5,8.0,7.0,7.75,22.75,40.5,71.0,57.75,64.5
max,900.0,900.0,9.0,25.67,63.9,377.0,369.0,372.0,26.35,18.24,20.58,12.88,16.06,17.01,85.78,85.71,86.04,86.46,98.32,98.72,97.84,98.58,97.89,98.65,94.99,97.57,90.88,95.31,94.62,97.57,2.0,0.0,5.0,2.0,0.0,3.0,1.0,1.0,1.0,19.0,14.0,21.0,21.0,23.0,6.0,8.0,7.0,6.0,6.0,7.0,8.0,12.0,11.0,11.0,33.0,70.0,99.0,85.0,89.0


## Describe scripted df

In [6]:
temp_df = df[df["labels"]=="scripted"].describe()

for col in temp_df.columns:
    if "count" not in col and "duration" not in col and "percent_diff" not in col and "length" not in col and "nums" not in col:
        temp_df[col] = round(temp_df[col]*100.0,2)
    else:
        temp_df[col] = round(temp_df[col],2)
        
display(temp_df)

Unnamed: 0.2,Unnamed: 0.1,Unnamed: 0,nums,percent_diff,duration,length_GroundTruth,length_GoogleASR,length_WhisperX,wer_GoogleASR,wer_WhisperX,cer_GoogleASR,cer_WhisperX,wil_GoogleASR,wil_WhisperX,wip_GoogleASR,wip_WhisperX,bleu_GoogleASR,bleu_WhisperX,bert_p_GoogleASR,bert_p_WhisperX,bert_r_GoogleASR,bert_r_WhisperX,bert_f_GoogleASR,bert_f_WhisperX,rouge1_GoogleASR,rouge1_WhisperX,rouge2_GoogleASR,rouge2_WhisperX,rougeL_GoogleASR,rougeL_WhisperX,uh_count_WhisperX,uh_count_GoogleASR,uh_count_GroundTruth,um_count_WhisperX,um_count_GoogleASR,um_count_GroundTruth,well_count_WhisperX,well_count_GoogleASR,well_count_GroundTruth,INTJ_count_WhisperX,INTJ_count_GoogleASR,INTJ_count_GroundTruth-max,INTJ_count_GroundTruth-min,INTJ_count_GroundTruth-neutral,PRN_count_WhisperX,PRN_count_GoogleASR,PRN_count_GroundTruth-max,PRN_count_GroundTruth-min,PRN_count_GroundTruth-neutral,EDITED_count_WhisperX,EDITED_count_GoogleASR,EDITED_count_GroundTruth-max,EDITED_count_GroundTruth-min,EDITED_count_GroundTruth-neutral,punctuation_count_GoogleASR,punctuation_count_WhisperX,punctuation_count_GroundTruth-max,punctuation_count_GroundTruth-min,punctuation_count_GroundTruth-neutral
count,400.0,400.0,4.0,4.0,4.0,4.0,4.0,4.0,400.0,400.0,400.0,400.0,400.0,400.0,400.0,400.0,400.0,400.0,400.0,400.0,400.0,400.0,400.0,400.0,400.0,400.0,400.0,400.0,400.0,400.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0
mean,575.0,575.0,5.75,5.22,18.15,283.25,285.0,282.5,7.39,3.36,3.46,1.87,15.02,14.01,84.98,85.99,85.09,86.24,97.57,98.96,97.74,99.1,97.66,99.03,94.0,97.41,89.19,95.34,93.83,97.41,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.75,1.0,1.0,1.0,1.0,0.0,0.0,0.25,0.25,0.25,0.75,1.5,0.25,0.75,0.75,18.0,37.0,48.25,42.25,41.25
std,330.4,330.4,3.3,3.12,14.38,17.86,19.17,18.98,2.99,1.37,2.07,1.49,1.67,2.45,1.67,2.45,0.32,1.12,1.34,0.62,1.28,0.46,1.31,0.53,2.59,0.93,3.72,1.22,2.46,0.93,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.96,0.82,0.82,0.82,0.82,0.0,0.0,0.5,0.5,0.5,1.5,1.91,0.5,0.96,0.96,3.56,4.76,6.02,4.43,6.99
min,200.0,200.0,2.0,1.24,3.21,269.0,269.0,267.0,3.72,2.23,1.36,0.88,12.77,10.52,83.23,83.81,84.83,85.28,95.77,98.08,96.03,98.43,95.9,98.25,90.81,96.19,84.99,93.99,90.81,96.19,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,13.0,30.0,40.0,38.0,32.0
25%,425.0,425.0,4.25,4.29,8.88,269.0,270.5,267.75,5.86,2.37,2.48,1.06,14.45,13.36,84.16,84.69,84.93,85.52,97.06,98.79,97.18,99.02,97.12,98.9,93.05,97.01,87.28,94.78,92.81,97.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.75,0.75,0.75,0.75,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,16.75,36.0,46.0,38.75,38.0
50%,550.0,550.0,5.5,5.38,16.63,279.0,280.5,278.0,7.54,3.01,3.1,1.26,15.27,14.66,84.73,85.34,84.98,85.94,97.79,99.14,97.98,99.3,97.88,99.22,94.03,97.54,88.93,95.23,93.87,97.54,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5,1.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.5,0.5,19.0,39.0,49.5,42.0,42.5
75%,700.0,700.0,7.0,6.3,25.9,293.25,295.0,292.75,9.07,4.0,4.09,2.06,15.84,15.31,85.55,86.64,85.14,86.66,98.3,99.32,98.54,99.38,98.42,99.35,94.98,97.94,90.84,95.79,94.89,97.94,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.25,1.25,1.25,1.25,1.25,0.0,0.0,0.25,0.25,0.25,0.75,2.5,0.25,1.25,1.25,20.25,40.0,51.75,45.5,45.75
max,1000.0,1000.0,10.0,8.87,36.14,306.0,310.0,307.0,10.78,5.2,6.29,4.08,16.77,16.19,87.23,89.48,85.56,87.8,98.94,99.48,98.99,99.39,98.96,99.43,97.13,98.38,93.88,96.93,96.77,98.38,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,2.0,2.0,2.0,2.0,0.0,0.0,1.0,1.0,1.0,3.0,4.0,1.0,2.0,2.0,21.0,40.0,54.0,47.0,48.0
