In [14]:
import glob
from ast import literal_eval

import pandas as pd 
import plotly.graph_objects as go
import plotly.express as px
from plotly.subplots import make_subplots

## WER, CER and losses

In [15]:
asr_output_paths = {
    "Copy original (N=31xx)": glob.glob("../experiments/ex0_copy_original/output_*.out"),
#     "Additive noise (N=31xx)": glob.glob("../experiments/ex1_additive_noise/output_*.out"),
    "Band reject (N=31xx)": glob.glob("../experiments/ex1_band_reject/output_*.out"),
#     "Pitch shift (N=31xx)": glob.glob("../experiments/ex1_pitch_shift/output_*.out"),
#     "Reverberation (N=31xx)": glob.glob("../experiments/ex1_reverberation/output_*.out"),
#     "Tempo perturbation (N=31xx)": glob.glob("../experiments/ex1_tempo_perturbation/output_*.out"), 
#     "Time masking (N=31xx)": glob.glob("../experiments/ex1_time_masking/output_*.out"), 
    "Data resampling (pron) (N=31xx)": glob.glob("../experiments/ex2_resample_data_pron/output_*.out"),
    "Data resampling (cefr) (N=31xx)": glob.glob("../experiments/ex2_resample_data_cefr/output_*.out"),
    "No augment (N=15xx)": glob.glob("../experiments/ex0_no_augment/output_*.out"), 
    
}

In [16]:
def get_df(paths, avg=True):
    eval_log = []
    train_log = []

    for path in paths:

        with open(path, 'r') as file:
            fold = path.split("/")[-1].split("_")[-1].strip(".out")

            while True:
                line = file.readline().strip()
                if not line: 
                    break 

                if line.startswith("{'eval_"):
                    line = f"{line[:-1]}, 'fold': {fold}{line[-1]}"
                    eval_log.append(literal_eval(line))
                elif line.startswith("{'loss'"):
                    line = f"{line[:-1]}, 'fold': {fold}{line[-1]}"
                    train_log.append(literal_eval(line))
    
    eval_df = pd.DataFrame(eval_log)
    train_df = pd.DataFrame(train_log)
    
    # turn wer and cer to percentages 
    eval_df["eval_wer"] = eval_df["eval_wer"]*100
    eval_df["eval_cer"] = eval_df["eval_cer"]*100
    
    
    losses = train_df.merge(eval_df[["epoch", "eval_loss", "fold"]]).drop("learning_rate", axis=1)
    wer_cer = eval_df \
                .drop(["eval_runtime", "eval_samples_per_second", "eval_steps_per_second", "eval_loss"], axis=1) \
                .fillna(0)
    if avg: 
        # avg across fold
        wer_cer = wer_cer.groupby("epoch").mean("fold").drop("fold", axis=1).reset_index()
        losses = losses.groupby("epoch").mean("fold").drop("fold", axis=1).reset_index()
        
    return wer_cer, losses

In [17]:
def plot_metrics(df, ys, group, x="epoch", titles=None, rows=None, cols=2, height=600):
    
    if not titles: 
        titles = ys
    if not rows: 
        rows = len(ys)//cols + int(len(ys)%cols > 0)
    
    fig = make_subplots(rows=rows, cols=cols,
                        horizontal_spacing=0.15,vertical_spacing=0.1,
                       subplot_titles=titles)
    
    figs = [px.line(df, x=x, y=y, color=group) for y in ys]

    for i, f in enumerate(figs):
        row = 1+i//2
        col = 1+i%2
        fig.add_traces(f.data, rows=row, cols=col)
        fig.update_traces(showlegend=(i==0), row=row, col=col, line=dict(width=1.2))
        fig.update_yaxes(title=f"{titles[i]}", row=row, col=col)
    
    fig.update_layout(height=height, width=1000, plot_bgcolor='whitesmoke')
    fig.update_xaxes(title=x.title())
    fig.show()

In [18]:
all_wer_cer = []
all_losses = []

for name, paths in asr_output_paths.items():
    wer_cer, losses = get_df(paths)
    wer_cer["transform"] = name
    losses["transform"] = name
    all_wer_cer.append(wer_cer)
    all_losses.append(losses)

all_wer_cer = pd.concat(all_wer_cer)
all_losses = pd.concat(all_losses)

plot_metrics(all_wer_cer,
             ys=["eval_wer", "eval_cer"],
             group="transform",
             titles=["Eval WER (%)", "Eval CER (%)"], rows=1)

# plot_metrics(all_losses,
#              ys=["eval_loss", "loss"],
#              group="transform",
#              titles=["Eval loss", "Train loss"], rows=1)

## WER, CER and losses by categories

In [6]:
import yaml
import numpy as np
import evaluate
from typing import List, Tuple

import sys 
sys.path.append("../")
from helper import DataArguments
from helper import round_off


IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html



In [7]:
with open("../config.yml") as file:
    train_config = yaml.safe_load(file)
    
data_args = DataArguments(**train_config["data_args"])

In [8]:
wer = evaluate.load("wer")
cer = evaluate.load("cer")

In [9]:
csv_transcrip = {
#     "Copy original (N=31xx)": glob.glob("../experiments/ex0_copy_original/finnish_ASR_transcrip_fold*.csv"),
#     "Additive noise (N=31xx)": glob.glob("../experiments/ex1_additive_noise/finnish_ASR_transcrip_fold*.csv"),
#     "Band reject (N=31xx)": glob.glob("../experiments/ex1_band_reject/finnish_ASR_transcrip_fold*.csv"),
#     "Pitch shift (N=31xx)": glob.glob("../experiments/ex1_pitch_shift/finnish_ASR_transcrip_fold*.csv"),
#     "Reverberation (N=31xx)": glob.glob("../experiments/ex1_reverberation/finnish_ASR_transcrip_fold*.csv"),
#     "Tempo perturbation (N=31xx)": glob.glob("../experiments/ex1_tempo_perturbation/finnish_ASR_transcrip_fold*.csv"), 
#     "Time masking (N=31xx)": glob.glob("../experiments/ex1_time_masking/finnish_ASR_transcrip_fold*.csv"), 
    "Data resampling (cefr)": glob.glob("../experiments/ex2_resample_data_cefr/finnish_ASR_transcrip_fold*.csv"),
    "Data resampling (pron) (N=31xx)": glob.glob("../experiments/ex2_resample_data_pron/finnish_ASR_transcrip_fold*.csv"),
    "No augment (N=15xx)": glob.glob("../experiments/ex0_no_augment/finnish_ASR_transcrip_fold*.csv"), 
}

In [10]:
def get_aggregated_df(paths, ratings):
    dfs = []
    for path in paths:
        df = pd.read_csv(path)
        df = df.drop(["labels", "input_values", "duration_seconds", 
                      "speech", "sampling_rate", "__index_level_0__"], axis=1)
        dfs.append(df)
    
    df = pd.concat(dfs).reset_index(drop=True)

    # rounding all ranting
    for rating in ratings:
        df[rating] = df[rating].map(round_off)
        
    return df

In [11]:
def get_wer(group):
    group = group.reset_index()
    return 100*wer.compute(predictions=group.ASR_transcript, references=group.text)
def get_cer(group):
    group = group.reset_index()
    return 100*cer.compute(predictions=group.ASR_transcript, references=group.text)

In [12]:
def plot_by_metrics(dfs: List[Tuple[str, pd.DataFrame]], 
                    ratings: List[str], 
                    metric_name:str):
    
    metric_func = get_wer if metric_name == "WER" else get_cer
    colours = px.colors.qualitative.Plotly
    
    fig = make_subplots(rows=3, cols=2, horizontal_spacing=0.2, vertical_spacing=0.1,
                        specs=[[{'secondary_y':True}, {'secondary_y':True}], 
                               [{'secondary_y':True}, {'secondary_y':True}], 
                               [{'secondary_y':True}, {'secondary_y':True}]])
    all_counts = []
    all_metrics = []
    
    for i, rating in enumerate(ratings):
        row = 1+i//2
        col = 1+i%2
        
        count = dfs[0][1].groupby(rating).apply(lambda group: len(group))        
        metrics = [(name, df.groupby(rating).apply(metric_func)) for name, df in dfs]
        
        all_counts.extend(list(count.values))
        all_metrics.extend([ value for _, metric in metrics for value in metric.values])
        
        bar = go.Bar(x=count.index, y=count.values, name="# samples",
                     showlegend=False, marker=dict(color="#749BC2"))
        lines = [go.Scatter(x=metric.index, 
                            y=metric.values, 
                            line=dict(color=colours[j%len(colours)]),
                            name=name, 
                            showlegend=(i==0)) for j, (name, metric) in enumerate(metrics)]
        
        fig.add_trace(bar, row=row, col=col)
        fig.add_traces(lines, secondary_ys=[True]*len(lines), rows=row, cols=col)
        fig.update_xaxes(title=f"<b>{rating}</b>", row=row, col=col)
        
    fig.update_layout(
        xaxis=dict(dtick=1), 
        plot_bgcolor="whitesmoke", height=1300
    )

    fig.update_yaxes(showgrid=False, title="Number of samples", range=[0, max(all_counts)*1.1])
    fig.update_yaxes(title=metric_name, secondary_y=True, showgrid=False, range=[min(all_metrics)*0.9,max(all_metrics)*1.1])
        
    fig.show()

In [13]:
ratings = ["cefr_mean", "pronunciation_mean","fluency_mean", "accuracy_mean","range_mean","task_completion_mean"]
dfs = [(name, get_aggregated_df(paths, ratings)) for name, paths in csv_transcrip.items()]
plot_by_metrics(dfs, ratings, "WER")

In [14]:
plot_by_metrics(dfs, ratings, "CER")

## ASA metrics

In [53]:
asa_output_paths = {
#     "No augment (round up, drop 1)": glob.glob("../experiments/ex0_no_augment/asa_ex0_drop_1_original_df/asa_output_?.out"), 
#     "No augment (drop 1)": glob.glob("../experiments/ex0_no_augment/asa_ex0_drop_1/asa_output_?.out"), 
#     "No augment": glob.glob("../experiments/ex0_no_augment/asa_ex0_no_augment_no_drop/asa_output_?.out"),
#     "Duplicate": glob.glob("../experiments/ex0_copy_original/asa_ex0_no_augment/asa_output_?.out"), 
#     "Additive noise": glob.glob("../experiments/ex1_additive_noise/asa_ex0_no_augment/asa_output_?.out"), 
#     "Band reject": glob.glob("../experiments/ex1_band_reject/asa_ex0_no_augment/asa_output_?.out"), 
#     "Pitch shift": glob.glob("../experiments/ex1_pitch_shift/asa_ex0_no_augment/asa_output_?.out"), 
    "Reverberation": glob.glob("../experiments/ex1_reverberation/asa_ex0_no_augment/asa_output_?.out"), 
#     "Tempo perturbation": glob.glob("../experiments/ex1_tempo_perturbation/asa_ex0_no_augment/asa_output_?.out"), 
#     "Time masking": glob.glob("../experiments/ex1_time_masking/asa_ex0_no_augment/asa_output_?.out"),
#     "Resample (cefr)": glob.glob("../experiments/ex2_resample_data_cefr/asa_ex0_no_augment/asa_output_?.out")
}

In [54]:
def asa_get_df(paths):
    eval_metrics = []
    for path in paths:
        fold = path.split("/")[-1].split('.')[0].split('_')[-1]
        
        with open(path) as f:
            while True: 
                line = f.readline().strip()
                
                if not line:
                    break 
                if line.startswith("{'eval_loss'"):
#                     line = f"{line[:-1]}, 'fold': {fold}{line[-1]}"
                    eval_metrics.append(literal_eval(line))
    return pd.DataFrame(eval_metrics).drop(columns=["eval_runtime", "eval_samples_per_second", "eval_steps_per_second"])

In [55]:
asa_metrics = []

for name, paths in asa_output_paths.items():
    df = asa_get_df(paths)
    df = df.groupby("epoch").mean().reset_index()
    df["exp_name"] = name
    asa_metrics.append(df)

asa_df = pd.concat(asa_metrics).reset_index(drop=True)

ys=["eval_precision", "eval_recall", "eval_f1", "eval_spearmanr"]
plot_metrics(asa_df,
             ys=ys,
             group="exp_name", 
             height=1000, 
             titles=[y.split('_')[-1].title() for y in ys])

In [49]:
def true_round(x):
    import decimal
    return int(decimal.Decimal(str(x)).quantize(decimal.Decimal("1"), rounding=decimal.ROUND_HALF_UP))

In [108]:
from IPython.display import Audio

In [113]:
df = pd.read_csv('/scratch/work/lunt1/wav2vec2-finetune/csv/finnish_df.csv')
df["cefr_mean"] = df["cefr_mean"].apply(true_round)

In [129]:
exmple=df[df.cefr_mean==7].recording_path.iloc[5]

In [130]:
Audio(exmple, rate=16000)

In [None]:
50