## Preamble

In [70]:
# load packages + declare constants

%load_ext autoreload
%autoreload

import os

import pandas as pd
import numpy as np
import pingouin as pg
import plotly.io as pio
import plotly.express as px
import plotly.graph_objects as go
import pingouin as pg

from sklearn.neighbors import KernelDensity
from itertools import product
from scipy.optimize import curve_fit
from analysis_utils import *
from sklearn.metrics import r2_score

from scipy.special import logit

pio.renderers.default = "vscode"

# gunzip -c data/local/lm/3-gram.arpa.gz | head -n 3
LM_VOCAB_SIZE = 200_003

FIGS = '../figs'
os.makedirs(FIGS, exist_ok=True)
FIG_TYPE = 'pdf'

COL_SIZE_MM = 80
MID_MARGIN_SIZE_MM = 10

MM_TO_IN = 0.03937008
IN_TO_PX = 96

COL_SIZE_PX = int(COL_SIZE_MM * MM_TO_IN * IN_TO_PX)
MID_MARGIN_SIZE_PX = int(MID_MARGIN_SIZE_MM * MM_TO_IN * IN_TO_PX)

DOUBLE_COL_SIZE_PX = COL_SIZE_PX * 2 + MID_MARGIN_SIZE_PX
FONT_SIZE = 9
MARGIN_PX = COL_SIZE_PX // 10
MARGINS_PX = dict(l=MARGIN_PX, r=MARGIN_PX, t=MARGIN_PX, b=MARGIN_PX)


def format_fig_path(prefix : str, **kwargs) -> str:
    pth = f"{FIGS}/{prefix}"
    for key, vals in sorted(kwargs.items()):
        if isinstance(vals, (str, int, float, bool)):
            vals = (str(vals).lower(),)
        assert isinstance(vals, (set, list, tuple)) and len(vals) and all(isinstance(x, str) for x in vals)
        pth += f'-{key}'
        for val in sorted(vals):
            pth += f"_{val.replace('-', '_')}"
    pth += f'.{FIG_TYPE}'
    return pth


The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [None]:
# load tables

print("text_df contents")
text_df = read_text_as_df()
display(text_df.head())

print("perp_df contents")
perp_df = read_perps_as_df()
perp_df = perp_df.merge(text_df[['utt', 'len']], on='utt')
display(perp_df.head())

print("wer_df contents")
wer_df = read_best_wers_as_df()
display(wer_df.head())

print("uttwer_df contents")
uttwer_df = read_best_uttwers_as_df()
uttwer_df = uttwer_df.merge(text_df[['utt', 'len']], on='utt')
display(uttwer_df.head())

df = agg_mean_by_lens(uttwer_df, 'len', ['wer', 'acc'], ['mdl', 'latlm', 'reslm', 'part', 'snr'])
df = df.merge(wer_df, on=['mdl', 'latlm', 'reslm', 'part', 'snr'])
acc_diff = np.abs(df['acc_x'] - df['acc_y'])
idx = np.argmax(acc_diff)
max_, offender = acc_diff.iloc[idx], df.iloc[idx]['mdl']
print(f"max WER diff (%) btw uttwer and wer: {max_:.01%} ({offender})")

## Perplexity

In [None]:
print("entropy/perplexity by partition and LM")
df = agg_mean_by_lens(perp_df, 'len', 'ent', ['part', 'perplm'])
df['perp'] = np.exp(df['ent'])
df = df.pivot(values=['ent', 'perp'], index='part', columns='perplm')
display(df.round(2))


In [None]:
print('distribution of per-utt entropy by partition and LM')
perplms = ('tgsmall', 'fglarge', 'rnnlm_lstm_1a')
parts = ('dev-clean', 'dev-other', 'ROC', 'PRV')
fig = px.violin(
    perp_df.loc[perp_df['perplm'].isin(perplms) & perp_df['part'].isin(parts)], x='ent', y='part', color='perplm',
    box=True,
    labels=dict(ent='Entropy (nats)', part='Partition', lm="LM"),
    category_orders=dict(part=parts, perplm=perplms),
)
fig.update_layout(
    legend=dict(orientation="h", yanchor="bottom", y=1.0),
    yaxis=dict(tickangle=45, title_standoff=5),
    margin=MARGINS_PX,
    font=dict(size=FONT_SIZE),
    width=COL_SIZE_PX, height=2 * COL_SIZE_PX,
)
fig.show()
fig.write_image(format_fig_path("violin-ent", perplms=perplms, parts=parts))

In [None]:
print('Test of normality of entropy given LM')
display(pg.normality(perp_df, dv='ent', group='perplm', method='normaltest').round(3))

print("pairwise spearman correlations of entropy across LMs")
df = perp_df.pivot(values='ent', index='utt', columns='perplm')
display(pg.pairwise_corr(df, columns=df.columns, alternative='greater', method='spearman').round(3))

print("scatter plot matrix of per-utterance entropy of each LM")
fig = px.scatter_matrix(df, dimensions=df.columns, opacity=0.1)
fig.show()

In [None]:
print("per-utterance perplexity vs. rank by LM")

df = perp_df.copy()
df = df.loc[df['perplm'].isin({'tgsmall', 'fglarge', 'rnnlm_lstm_1a'})]
df['rank'] = df.groupby(['perplm', 'part'])['perp'].transform(lambda x: x.rank() / len(x))

fig = px.scatter(df, x='rank', y='perp', color='perplm', symbol='part', log_y=True)
fig.show()

## WER

In [None]:
part = 'dev-clean'
latlm = reslm = 'tgsmall'
mdl = 'tdnn_1d_sp'
desc = f"({part} partition, {mdl} model, {latlm} lattice LM, and {reslm} rescoring lm)"

df = uttwer_df.loc[
    np.isfinite(uttwer_df['snr']) &
    (uttwer_df['latlm'] == latlm) &
    (uttwer_df['reslm'] == reslm) &
    (uttwer_df['part'] == part) &
    (uttwer_df['mdl'] == mdl)
].copy()
df['snr'] = df['snr'].astype('int')

with pd.option_context('display.max_rows', 10):
    print(f"test of normality of per-utterance WERs given SNR {desc}")
    display(pg.normality(df, dv='wer', group='snr', method='normaltest').round(3).sort_index())


    print(f"spearman correlation of WERs across SNRs {desc}")
    df = df.pivot(values='wer', index='utt', columns='snr')
    display(pg.pairwise_corr(df, columns=df.columns, alternative='greater', method='spearman').round(3).sort_index())

print(f"scatter plot matrix of per-utterance WERs of select SNRs {desc}")
fig = px.scatter_matrix(df, dimensions=[5, 10, 20, 30], opacity=0.1)
fig.update_layout({"xaxis"+str(i+1): dict(range = [-0.1, 1]) for i in range(len(df.columns))})
fig.update_layout({"yaxis"+str(i+1): dict(range = [-0.1, 1]) for i in range(len(df.columns))})
fig.show()

In [97]:
# Zhang et al (2023) "Estimate the noise effect on automatic speech recognition
# accuracy for mandarin by an approach associating articulation index"
# FIXME(sdrobert): the fit is very bad if we use eq. 12

latlm = 'tgsmall'
reslm = 'tgsmall'
part = 'dev-clean'
desc = f"({part} partition, {latlm} lattice LM, and {reslm} rescoring lm)"
num_points = 100
fit_inverse = False

df = wer_df.replace(dict(latlm=dict(null=latlm), reslm=dict(null=reslm)))
df = df.loc[
    (df['latlm'] == latlm) &
    (df['reslm'] == reslm) &
    (df['part'] == part)
].copy()

idx = np.isinf(df['snr'])
df, Ainvs = df.loc[~idx], df.loc[idx, ['mdl', 'acc']]
snr_min = df['snr'].min() - 1
snr_max = df['snr'].max() + 1
x_interp = np.linspace(snr_min, snr_max, num_points)

mdls = df['mdl'].unique()
assert all(mdls == Ainvs['mdl'].unique())
ratio = num_points // (len(mdls) + 2)


fit = []
fig = go.Figure()
for mdl_idx, mdl in enumerate(mdls):
    colour = px.colors.qualitative.Plotly[mdl_idx]
    df_ = df.loc[df['mdl'] == mdl]
    Ainv = Ainvs.loc[Ainvs['mdl'] == mdl, 'acc'].iloc[0]
    A_init = 1 / Ainv
    N = len(df_)
    x = df_['snr'].array
    y = df_['acc'].array
    A, B, C = zhang_fit(x, y, fit_inverse)
    y_pred = zhang_func(x, A, B, C)
    r2 = r2_score(y, y_pred)
    fit.append(dict(mdl=mdl, A=A, B=B, C=C, r2=r2))
    y_interp = zhang_func(x_interp, A, B, C)
    print(inv_zhang_func(y_interp, A, B, C)[-5:], x_interp[-5:])
    fig.add_scatter(
        x=x, y=df_['acc'] * 100,
        name=mdl, mode='markers',
        marker=dict(color=colour),
    )
    fig.add_scatter(
        x=x_interp, y=y_interp * 100,
        name=f"{mdl} fit",
        mode='lines',
        opacity=0.5,
        showlegend=False,
        line=dict(color=colour),
    )
    fig.add_annotation(
        x=x_interp[ratio * (mdl_idx + 1)], y=y_interp[ratio * (mdl_idx + 1)] * 100,
        text=f"A={A:.02f},B={B:.02f},C={C:.02f}",
        showarrow=True,
        font=dict(color=colour, size=FONT_SIZE),
    )
print(f"Zhang et al fits by model {desc}")
display(pd.DataFrame.from_records(fit).round(3))

print(f"accuracy (inv. WER) by SNR across models w/ Zhang et al fits {desc}")
fig.update_layout(
    xaxis=dict(title='SNR (dB)', range=[snr_min, snr_max], tickformat='d'),
    yaxis=dict(title='accuracy (%)', range=[0, 100], tickformat='d'),
    legend=dict(title='model', yanchor="top", y=0.99, xanchor='left', x=0.01),
    margin=MARGINS_PX,
    font=dict(size=FONT_SIZE),
    width=DOUBLE_COL_SIZE_PX, height=DOUBLE_COL_SIZE_PX / 2,
)
fig.show()
fig.write_image(format_fig_path("zhang", latlm=latlm, reslm=reslm, part=part))


[29.3030303  29.72727273 30.15151515 30.57575758 31.        ] [29.3030303  29.72727273 30.15151515 30.57575758 31.        ]
[29.3030303  29.72727273 30.15151515 30.57575758 31.        ] [29.3030303  29.72727273 30.15151515 30.57575758 31.        ]
[29.3030303  29.72727273 30.15151515 30.57575758 31.        ] [29.3030303  29.72727273 30.15151515 30.57575758 31.        ]
[29.3030303  29.72727273 30.15151515 30.57575758 31.        ] [29.3030303  29.72727273 30.15151515 30.57575758 31.        ]
Zhang et al fits by model (dev-clean partition, tgsmall lattice LM, and tgsmall rescoring lm)


Unnamed: 0,mdl,A,B,C,r2
0,tdnn_1d_sp,1.063,-3.647,3.987,0.999
1,tri6b,1.148,-10.939,4.058,1.0
2,wav2vec2-base-960h,1.045,-6.649,2.694,1.0
3,wav2vec2-large-960h-lv60,1.023,-2.671,3.033,0.999


accuracy (inv. WER) by SNR across models w/ Zhang et al fits (dev-clean partition, tgsmall lattice LM, and tgsmall rescoring lm)


## Perplexity vs. WER

In [None]:
# wer by perp

perplm = 'tgsmall'

mdl = 'tdnn_1d_sp'
latlm = reslm = 'tgsmall'
num_points = 100
part = 'dev-clean'
print(
    f"mdl {mdl}, partition {part}, lattice LM {latlm}, rescore LM {reslm}, "
    f"perlexity LM {perplm}"
)

df = perp_df.loc[(perp_df['perplm'] == perplm) & (perp_df['part'] == part)]
df = df.merge(uttwer_df.loc[
    (uttwer_df['reslm'] == reslm) &
    (uttwer_df['latlm'] == latlm) &
    (uttwer_df['mdl'] == mdl)
], on=['utt', 'part'])
df = df.loc[df['snr'].isnull()]  # without noise
ymin, ymax = df['wer'].quantile(0.05), df['wer'].quantile(0.95)
xmin, xmax = df['perp'].quantile(0.05), df['perp'].quantile(0.95)
perp_interp = np.linspace(xmin, xmax, num_points)

print("per-utterance WER by perplexity")
fig = px.scatter(df, x='perp', y='wer')
fig.update_xaxes(type='log', range=[np.log10(xmin), np.log10(xmax)])
fig.update_yaxes(range=[ymin, ymax])
fig.show()

In [76]:
# boothroyd's k

mdl = 'wav2vec2-large-960h-lv60'
latlm = reslm = 'null'

# mdl = 'tdnn_1d_sp'
# latlm = reslm = 'tgsmall'
# reslm = 'tgsmall'

perplm = binlm = 'rnnlm_lstm_1a'

num_bins = 5
num_points = 100
binpart = 'dev-clean'
parts = ('ROC', 'PRV')
# parts = ('dev-clean', 'dev-other')
add_intercept = False
include_extrema = True

bounds = bin_series(perp_df.loc[(perp_df['perplm'] == binlm) & (perp_df['part'] == binpart), 'ent'], num_bins)[1]
df = perp_df.loc[(perp_df['perplm'] == perplm) & (perp_df['part'].isin(parts))].copy()
df['ent_bin'] = bin_series(df['ent'], bounds, by_rank=False, fmt="{:.01f}")[0]
bin_cats = df['ent_bin'].dtype.categories

if include_extrema:
    min_bin, max_bin = 0, num_bins - 1
else:
    assert num_bins > 2
    min_bin, max_bin = 1, num_bins - 2
    df = df.loc[df['ent_bin'].isin({bin_cats[bin] for bin in range(min_bin, max_bin + 1)})]

# print("mean entropy by bin and k estimates")
# df_ent = agg_mean_by_lens(df, 'len', 'ent', 'ent_bin')
# log_V = np.log(LM_VOCAB_SIZE)
# df_ent['est_k'] = df_ent['ent'] - log_V
# df_ent['est_k'] /= df_ent.loc[df_ent['ent_bin'] == bin_cats[max_bin], 'ent'].iloc[0] - log_V
# display(df_ent.round(3))

df = df.merge(
    uttwer_df.loc[
        (uttwer_df['reslm'] == reslm) &
        (uttwer_df['latlm'] == latlm) &
        (uttwer_df['mdl'] == mdl)
    ], on=['utt', 'part', 'len'])

df = agg_mean_by_lens(df, 'len', 'wer', ['snr', 'ent_bin', 'part'])
df, df_norm = df.loc[np.isfinite(df['snr'])], df.loc[~np.isfinite(df['snr'])]
df = df.pivot(values='wer', index=['part', 'snr'], columns='ent_bin')
df_norm = df_norm.pivot(values='wer', index=['part'], columns='ent_bin')

fig_acc, fig_loge = go.Figure(), go.Figure()
log_x_lims = np.log10(100 * df_norm[bin_cats[max_bin]].min()), np.log10(100)
log_y_lims = np.log10(100 * df_norm[bin_cats[min_bin]].min()), np.log10(100)
x_interp = np.linspace(1 - df_norm[bin_cats[max_bin]].max(), 1.0, num_points)
log_x_interp = np.linspace(np.log1p(-df_norm[bin_cats[max_bin]].max()), 0.0, num_points)

fig_acc.add_scatter(
    x=[0, 100],
    y=[0, 100],
    mode='lines',
    line_color='gray',
    showlegend=False,
)
fig_loge.add_scatter(
    x=[10 ** log_y_lims[0], 100],
    y=[10 ** log_y_lims[0], 100],
    mode='lines',
    line_color='gray',
    showlegend=False,
)

fits = []
X = df[bin_cats[max_bin]]
for symbol, bin in enumerate(range(min_bin, max_bin)):
    Y = df[bin_cats[bin]]
    for colour, part in enumerate(parts):
        y = Y.loc[part]
        x = X.loc[part]
        fig_acc.add_scatter(
            x=100 - x * 100, y=100 - y * 100,
            name=bin_cats[bin],
            mode='markers',
            legendgroup=part,
            legendgrouptitle=dict(text=part),
            marker=dict(color=px.colors.qualitative.Plotly[colour], symbol=symbol),
        )
        fig_loge.add_scatter(
            x=100 * x, y=100 * y,
            name=bin_cats[bin],
            mode='markers',
            legendgroup=part,
            legendgrouptitle=dict(text=part),
            marker=dict(color=px.colors.qualitative.Plotly[colour], symbol=symbol),
        )
        fit = pg.linear_regression(np.log(x).reset_index(), np.log(y), add_intercept)
        # k = fit.loc[fit['names'] == bin_cats[max_bin], 'coef'].iloc[0]
        # c = fit.loc[fit['names'] == 'Intercept', 'coef'].iloc[0] if add_intercept else 0
        # c = np.exp(c)
        fit['part'] = part
        fit['y'] = bin_cats[bin]
        fits.append(fit)
#     y = df[bin_cats[bin]].to_numpy()
#     # k, c = boothroyd_fit(
#     #     np.log(x),
#     #     np.log(y.to_numpy()),
#     #     fit_exponent=fit_exponent,
#     #     add_intercept=add_intercept,
#     #     resample_points=resample_points,
#     # )
#     # fit = pg.linear_regression(np.log(x), np.log(y), add_intercept)
#     # fits.append(fit)
#     # k = fit.loc[fit['names'] == 'x1', 'coef'].iloc[0]
#     # c = np.exp(fit.loc[fit['names'] == 'x1', 'coef'].iloc[0]) if add_intercept else 1.0
#     # yguess = boothroyd_func(x, k, c)
#     # N = len(y)
#     y_interp = boothroyd_func(x_interp, k, c)
#     interp_name = f"k={k:.02f}" + (f", c={c:.02f}" if add_intercept else "")

#     # fig_acc.add_scatter(
#     #     x=100 - 100 * x_interp, y=100 - 100 * y_interp,
#     #     name=interp_name,
#     #     legendgroup="fits",
#     #     mode='lines', opacity=0.5,
#     #     line=dict(color=colour),
#     # )

#     # log_y_interp = log_boothroyd_func(log_x_interp, k, c)
#     # fig_loge.add_scatter(
#     #     x=100 * np.exp(log_x_interp), y=100 * np.exp(log_y_interp),
#     #     mode='lines',
#     #     opacity=0.5,
#     #     name=interp_name,
#     #     legendgroup="fits",
#     #     line=dict(color=colour),
#     # )
print("Boothroyd & Nittrouer model fits")
display(pd.concat(fits).round(3))

print("in-context vs. out-of-context accuracy and B & N fits")
fig_acc.update_layout(
    xaxis=dict(title=f'{bin_cats[max_bin]} accuracy (%)', range=[0, 100], tickformat='d'),
    yaxis=dict(title='other bin accuracy (%)', range=[0, 100], tickformat='d'),
    margin=MARGINS_PX,
    width=DOUBLE_COL_SIZE_PX, height=DOUBLE_COL_SIZE_PX / 2,
    font=dict(size=FONT_SIZE),
)
fig_acc.show()


print("in-context vs. out-of-context error rates and B & N fits")
fig_loge.update_layout(
    xaxis=dict(title=f'{bin_cats[max_bin]} error (%)', range=[0, 100], tickformat='d'),
    yaxis=dict(title='other bin error (%)', range=[0, 100], tickformat='d'),
    margin=MARGINS_PX,
    width=DOUBLE_COL_SIZE_PX, height=DOUBLE_COL_SIZE_PX / 2,
    font=dict(size=FONT_SIZE),
)
fig_loge.update_xaxes(type='log', range=log_x_lims)
fig_loge.update_yaxes(type='log', range=log_y_lims)
fig_loge.show()


Boothroyd & Nittrouer model fits


Unnamed: 0,names,coef,se,T,pval,r2,adj_r2,CI[2.5%],CI[97.5%],part,y
0,snr,0.011,0.003,3.406,0.002,0.997,0.996,0.004,0.017,ROC,"(2.3,4.0]"
1,"(5.5,12.8]",1.902,0.065,29.401,0.0,0.997,0.996,1.771,2.033,ROC,"(2.3,4.0]"
0,snr,0.004,0.001,3.972,0.0,0.998,0.998,0.002,0.006,PRV,"(2.3,4.0]"
1,"(5.5,12.8]",2.089,0.045,46.81,0.0,0.998,0.998,1.999,2.179,PRV,"(2.3,4.0]"
0,snr,0.006,0.002,2.971,0.005,0.998,0.998,0.002,0.01,ROC,"(4.0,4.5]"
1,"(5.5,12.8]",1.604,0.043,37.429,0.0,0.998,0.998,1.517,1.691,ROC,"(4.0,4.5]"
0,snr,0.004,0.001,4.849,0.0,0.999,0.999,0.002,0.006,PRV,"(4.0,4.5]"
1,"(5.5,12.8]",1.923,0.036,53.209,0.0,0.999,0.999,1.85,1.996,PRV,"(4.0,4.5]"
0,snr,0.005,0.001,3.162,0.003,0.999,0.999,0.002,0.007,ROC,"(4.5,4.9]"
1,"(5.5,12.8]",1.485,0.03,49.955,0.0,0.999,0.999,1.425,1.546,ROC,"(4.5,4.9]"


in-context vs. out-of-context accuracy and B & N fits


in-context vs. out-of-context error rates and B & N fits


In [106]:
# boothroyd's k on SNR

mdl = 'wav2vec2-large-960h-lv60'
latlm = reslm = 'null'

# mdl = 'tdnn_1d_sp'
# latlm = reslm = 'tgsmall'
# reslm = 'tgsmall'

perplm = binlm = 'rnnlm_lstm_1a'

num_bins = 3
num_points = 100
binpart = 'dev-clean'
parts = ('dev-clean', 'ROC', 'PRV')
# parts = ('dev-clean',)
add_intercept = True
include_extrema = True

bounds = bin_series(perp_df.loc[(perp_df['perplm'] == binlm) & (perp_df['part'] == binpart), 'ent'], num_bins)[1]
df = perp_df.loc[(perp_df['perplm'] == perplm) & (perp_df['part'].isin(parts))].copy()
df['ent_bin'] = bin_series(df['ent'], bounds, by_rank=False, fmt="{:.01f}")[0]
bin_cats = df['ent_bin'].dtype.categories

if include_extrema:
    min_bin, max_bin = 0, num_bins - 1
else:
    assert num_bins > 2
    min_bin, max_bin = 1, num_bins - 2
    df = df.loc[df['ent_bin'].isin({bin_cats[bin] for bin in range(min_bin, max_bin + 1)})]

df = df.merge(
    uttwer_df.loc[
        (uttwer_df['reslm'] == reslm) &
        (uttwer_df['latlm'] == latlm) &
        (uttwer_df['mdl'] == mdl)
    ], on=['utt', 'part', 'len'])

df = agg_mean_by_lens(df, 'len', 'wer', ['snr', 'ent_bin', 'part'])
df, df_norm = df.loc[np.isfinite(df['snr'])], df.loc[~np.isfinite(df['snr'])]
df = df.pivot(values='wer', index=['part', 'snr'], columns='ent_bin')
df_norm = df_norm.pivot(values='wer', index=['part'], columns='ent_bin')

fig_acc, fig_loge = go.Figure(), go.Figure()
# log_x_lims = np.log10(100 * df_norm[bin_cats[max_bin]].min()), np.log10(100)
# log_y_lims = np.log10(100 * df_norm[bin_cats[min_bin]].min()), np.log10(100)
# x_interp = np.linspace(1 - df_norm[bin_cats[max_bin]].max(), 1.0, num_points)
# log_x_interp = np.linspace(np.log1p(-df_norm[bin_cats[max_bin]].max()), 0.0, num_points)

# fig_acc.add_scatter(
#     x=[0, 100],
#     y=[0, 100],
#     mode='lines',
#     line_color='gray',
#     showlegend=False,
# )
# fig_loge.add_scatter(
#     x=[10 ** log_y_lims[0], 100],
#     y=[10 ** log_y_lims[0], 100],
#     mode='lines',
#     line_color='gray',
#     showlegend=False,
# )

fits = []
# A, B, C = 1 / 1.023, -2.671, 3.033
X = -logit(df[bin_cats[max_bin]])
# X = inv_zhang_func(1 - df[bin_cats[bin]], A, B, C)
display(X.head())
for colour, part in enumerate(parts):
    x = X.loc[part]
    for symbol, bin in enumerate(range(min_bin, max_bin + 1)):
        # y = inv_zhang_func(1 - df.loc[part, bin_cats[bin]], A, B, C)
        y = -logit(df.loc[part, bin_cats[bin]])
        fig_acc.add_scatter(
            x=x, y=y,
            name=bin_cats[bin],
            mode='markers',
            legendgroup=part,
            legendgrouptitle=dict(text=part),
            marker=dict(color=px.colors.qualitative.Plotly[colour], symbol=symbol),
        )
        # fig_loge.add_scatter(
        #     x=100 * x, y=100 * y,
        #     name=bin_cats[bin],
        #     mode='markers',
        #     legendgroup=part,
        #     legendgrouptitle=dict(text=part),
        #     marker=dict(color=px.colors.qualitative.Plotly[colour], symbol=symbol),
        # )
        fit = pg.linear_regression(x, y, add_intercept)
        # k = fit.loc[fit['names'] == bin_cats[max_bin], 'coef'].iloc[0]
        # c = fit.loc[fit['names'] == 'Intercept', 'coef'].iloc[0] if add_intercept else 0
        # c = np.exp(c)
        fit['part'] = part
        fit['y'] = bin_cats[bin]
        fits.append(fit)
#     y = df[bin_cats[bin]].to_numpy()
#     # k, c = boothroyd_fit(
#     #     np.log(x),
#     #     np.log(y.to_numpy()),
#     #     fit_exponent=fit_exponent,
#     #     add_intercept=add_intercept,
#     #     resample_points=resample_points,
#     # )
#     # fit = pg.linear_regression(np.log(x), np.log(y), add_intercept)
#     # fits.append(fit)
#     # k = fit.loc[fit['names'] == 'x1', 'coef'].iloc[0]
#     # c = np.exp(fit.loc[fit['names'] == 'x1', 'coef'].iloc[0]) if add_intercept else 1.0
#     # yguess = boothroyd_func(x, k, c)
#     # N = len(y)
#     y_interp = boothroyd_func(x_interp, k, c)
#     interp_name = f"k={k:.02f}" + (f", c={c:.02f}" if add_intercept else "")

#     # fig_acc.add_scatter(
#     #     x=100 - 100 * x_interp, y=100 - 100 * y_interp,
#     #     name=interp_name,
#     #     legendgroup="fits",
#     #     mode='lines', opacity=0.5,
#     #     line=dict(color=colour),
#     # )

#     # log_y_interp = log_boothroyd_func(log_x_interp, k, c)
#     # fig_loge.add_scatter(
#     #     x=100 * np.exp(log_x_interp), y=100 * np.exp(log_y_interp),
#     #     mode='lines',
#     #     opacity=0.5,
#     #     name=interp_name,
#     #     legendgroup="fits",
#     #     line=dict(color=colour),
#     # )
print("Boothroyd & Nittrouer model fits")
display(pd.concat(fits).round(3))

print("in-context vs. out-of-context accuracy and B & N fits")
fig_acc.update_layout(
    xaxis=dict(title=f'{bin_cats[max_bin]} pseudo-SNR', tickformat='d'),
    yaxis=dict(title='other bin pseudo-SNR', tickformat='d'),
    margin=MARGINS_PX,
    width=DOUBLE_COL_SIZE_PX, height=DOUBLE_COL_SIZE_PX / 2,
    font=dict(size=FONT_SIZE),
)
fig_acc.show()


# print("in-context vs. out-of-context error rates and B & N fits")
# fig_loge.update_layout(
#     xaxis=dict(title=f'{bin_cats[max_bin]} error (%)', range=[0, 100], tickformat='d'),
#     yaxis=dict(title='other bin error (%)', range=[0, 100], tickformat='d'),
#     margin=MARGINS_PX,
#     width=DOUBLE_COL_SIZE_PX, height=DOUBLE_COL_SIZE_PX / 2,
#     font=dict(size=FONT_SIZE),
# )
# fig_loge.update_xaxes(type='log', range=log_x_lims)
# fig_loge.update_yaxes(type='log', range=log_y_lims)
# fig_loge.show()


part  snr  
PRV   -10.0   -6.277608
      -9.0    -5.788681
      -8.0    -5.040850
      -7.0    -4.561685
      -6.0    -4.046010
Name: (5.1,12.8], dtype: float64

Boothroyd & Nittrouer model fits


Unnamed: 0,names,coef,se,T,pval,r2,adj_r2,CI[2.5%],CI[97.5%],part,y
0,Intercept,0.92,0.012,79.406,0.0,0.999,0.999,0.897,0.944,dev-clean,"(2.3,4.3]"
1,"(5.1,12.8]",1.075,0.004,266.444,0.0,0.999,0.999,1.067,1.083,dev-clean,"(2.3,4.3]"
0,Intercept,0.431,0.016,27.619,0.0,0.999,0.999,0.4,0.463,dev-clean,"(4.3,5.1]"
1,"(5.1,12.8]",1.048,0.005,192.764,0.0,0.999,0.999,1.037,1.059,dev-clean,"(4.3,5.1]"
0,Intercept,-0.0,0.0,-3.023,0.004,1.0,1.0,-0.0,-0.0,dev-clean,"(5.1,12.8]"
1,"(5.1,12.8]",1.0,0.0,1.286459e+17,0.0,1.0,1.0,1.0,1.0,dev-clean,"(5.1,12.8]"
0,Intercept,0.747,0.015,48.664,0.0,0.998,0.998,0.716,0.778,ROC,"(2.3,4.3]"
1,"(5.1,12.8]",1.045,0.007,154.062,0.0,0.998,0.998,1.031,1.059,ROC,"(2.3,4.3]"
0,Intercept,0.492,0.013,37.832,0.0,0.999,0.999,0.466,0.518,ROC,"(4.3,5.1]"
1,"(5.1,12.8]",1.05,0.006,182.572,0.0,0.999,0.999,1.038,1.061,ROC,"(4.3,5.1]"


in-context vs. out-of-context accuracy and B & N fits


In [None]:
# Klakow and Peters (2002). "Testing the correlation of word error rate and perplexity"
# "... slope a is smaller for tasks that are acoustically more challenging. Hence on
# those tasks larger reductions in PP are needed to obtain a given reduction in WER." 

num_bins = 5
num_points = 100
perplm = binlm = 'fglarge'
binpart = 'dev-clean'

# mdl = 'wav2vec2-large-960h-lv60'
# latlm = reslm = 'null'

mdl = 'tdnn_1d_sp'
latlm = reslm = 'tgsmall'

part = 'dev-clean'
max_bin = num_bins -1  

print(
    f"mdl {mdl}, part {part} lattice lm {latlm}, rescore lm {reslm} perplexity LM "
    f"{perplm}, bin part {binpart}, bin LM {binlm}"
)

df = perp_df.loc[(perp_df['perplm'] == perplm) & (perp_df['part'] == part)].copy()
bounds = bin_series(perp_df.loc[(perp_df['perplm'] == binlm) & (perp_df['part'] == binpart), 'ent'], num_bins)[1]
bins = bin_series(df['ent'], bounds, by_rank=False, fmt="{:.01f}")[0]
df['ent_bin'] = bins
bin_cats = df['ent_bin'].dtype.categories

df_ent = agg_mean_by_lens(df, 'len', 'ent', 'ent_bin')
print('entropy by bin')
display(df_ent.round(3))
x = df_ent['ent']

df = df.merge(uttwer_df.loc[
    (uttwer_df['reslm'] == reslm) &
    (uttwer_df['latlm'] == latlm) &
    (uttwer_df['mdl'] == mdl) &
    np.isfinite(uttwer_df['snr'])
], on=['utt', 'part', 'len'])
display(df.head())
snr_50 = agg_mean_by_lens(df, 'len', 'wer', ['snr'])
snr_50 = snr_50.loc[snr_50['wer'] < .5, 'snr'].min()
df = agg_mean_by_lens(df, 'len', 'wer', ['snr', 'ent_bin'])

snrs = df['snr'].unique()
snrs.sort()
curve_params_list = []
for snr in snrs:
    snr_mask = df['snr'] == snr
    y = np.log(df.loc[df['snr'] == snr, "wer"])
    # fit = klakow_fit(x, y, add_intercept=True)
    fit = pg.linear_regression(x, y, True)
    curve_params_list.append({
        "snr": snr,
        "a": fit.loc[fit['names'] == 'ent', 'coef'].iloc[0],
        "b": np.exp(fit.loc[fit['names'] == 'Intercept', 'coef'].iloc[0]),
        "se": fit.loc[fit['names'] == 'ent', 'se'].iloc[0]
    })

snr_mini, snr_midi, snr_maxi = 0, 16, len(snrs) - 1
df = df.loc[(df['snr'] >= snrs[snr_mini]) & (df['snr'] <= snrs[snr_maxi])]
df['wer'] *= 100

print("WER by (PP, SNR) with select K & P fits")
fig = px.bar(df, x='ent_bin', y='wer', color='snr', barmode='overlay', color_continuous_scale="viridis", opacity=1.0)
for dict_ in (curve_params_list[snr_mini], curve_params_list[snr_midi], curve_params_list[snr_maxi]):
    y = klakow_func(np.exp(x), dict_['a'], dict_['b']) * 100
    interp_name = f"a={dict_['a']:.03f}, b={dict_['b']:.03f} WER ∈ [{y.min():.02f},{y.max():.02f}]"
    fig.add_scatter(
        x=bins.dtype.categories,
        y=y,
        showlegend=False,
        name=interp_name,
        mode='markers+lines',
        marker=dict(color='red'), line=dict(color='red'))
    fig.add_annotation(
        x=bins.dtype.categories[0], y=y.iloc[0],
        text=interp_name,
        showarrow=True,
        opacity=1,
        font=dict(color="black"),
        bgcolor='white',
    )
fig.update_layout(
    yaxis_range=[0, 100]
)
fig.show()
f

df = pd.DataFrame.from_records(curve_params_list)
df['logb'] = np.log(df['b'])
df['logb/a'] = df['logb'] / df['a']
df['b^(1/a)'] = np.exp(df['logb/a'])
print('K & P model parameter ratio by snr')
fig = px.scatter(df, x='snr', y='logb/a')
fig.add_vline(x=snr_50, line_dash='dash', line_color='black', annotation_text='50% acc')
fig.show()
print("K & P model parameters by SNR")
df = pd.melt(df, ['snr'], ['a', 'b', 'b^(1/a)'], var_name='param', value_name='val')
fig = px.scatter(df, x='snr', y='val', color='param')
fig.add_vline(x=snr_50, line_dash='dash', line_color='black', annotation_text='50% acc')
fig.update_layout(yaxis_range=[0, 1])
fig.show()

print("Predicted k by bin and snr")
records = []
ent_out = x[num_bins - 1]
for snri, dict_ in enumerate(curve_params_list):
    a, b = dict_['a'], dict_['b']
    snr = int(snrs[snri])
    log_b = np.log(b)
    lwer_out = a * ent_out + log_b
    for bin_in in range(num_bins):
        ent_in = x[bin_in]
        ratio_name = f'{bin_cats[bin_in]} over {bin_cats[num_bins - 1]}'
        lwer_in = a * ent_in + log_b
        k = lwer_in / lwer_out
        records.append(dict(snr=snr, k=k, ratio_name=ratio_name))
df = pd.DataFrame.from_records(records)
fig = px.scatter(df, x='snr', y='k', color='ratio_name')
fig.add_vline(x=snr_50, line_dash='dash', line_color='black', annotation_text='50% acc')
fig.show()
fig.write_image('foo.pdf')


In [None]:
# boothroyd prediction
num_bins = 7
train_mdl = 'tdnn_1d_sp'
train_part = 'dev-clean'
train_latlm = train_reslm = 'tgsmall'
train_perplm = 'fglarge'
test_mdls = ('tri6b', 'wav2vec2-large-960h-lv60', 'wav2vec2-base-960h')
test_parts = ('dev-other',)
test_reslms = ('fglarge',)
test_perplms = ('tgmed', 'fglarge')
add_intercept = False

# determine SNRs which don't have extremal values. It is more important to set the
# max, as high values tend to inflate correlations (i.e. 0.99^k ~= 0.99)
# min_wer, max_wer = 0.0, 0.20
# df = wer_df.replace(dict(latlm=dict(null=train_latlm), reslm=dict(null=train_reslm)))
# df = df.loc[
#     (df['mdl'] == train_mdl) &
#     (df['latlm'] == train_latlm) &
#     (df['reslm'] == train_reslm) &
#     (df['part'] == train_part) &
#     np.isfinite(df['snr'])
# ].groupby('snr')['wer'].agg(['min', 'max'])
# good_snrs = df.index[(df['min'] >= min_wer) & (df['max'] <= max_wer)]
# good_snr_min, good_snr_max = good_snrs.min(), good_snrs.max()
# good_snr_mid = (good_snr_min + good_snr_max) / 2
# print(f"good SNRs: [{good_snr_min}, {good_snr_max}]")

# # all records we'll consider
# df = perp_df.copy()
# bounds = bin_series(
#     perp_df.loc[
#         (perp_df['perplm'] == train_perplm) &
#         (perp_df['part'] == train_part)
#     , 'ent'], num_bins)[1]
# df['perp_bin'] = bin_series(df['ent'], bounds, by_rank=False, fmt="{:.01f}")[0]
# bin_cats = df['perp_bin'].dtype.categories

# df = df.merge(
#     uttwer_df.loc[
#         np.isinf(uttwer_df['snr']) |
#         ((uttwer_df['snr'] >= good_snr_min) & (uttwer_df['snr'] <= good_snr_max))
#     ], on=['utt', 'part', 'len'])
# df = agg_mean_by_lens(
#     df,
#     'len',
#     ['wer', 'ent', 'len'],
#     ['snr', 'perp_bin', 'perplm', 'reslm', 'latlm', 'mdl', 'part'],
# )
# df['lwer'] = np.log(df['wer'])
# df = df.replace(dict(latlm=dict(null=train_latlm), reslm=dict(null=train_reslm)))
# df = df.dropna()

# train_df = df.loc[
#     (df['latlm'] == train_latlm) &
#     (df['reslm'] == train_reslm) &
#     (df['perplm'] == train_perplm) &
#     (df['mdl'] == train_mdl) &
#     (df['part'] == train_part)
# ]



print('train entropy by bin')
display(train_df.groupby('perp_bin', observed=False)[['ent']].mean().round(3))
ent_fit = dict()
for in_bin in range(num_bins):
    ent_in = train_df.loc[train_df['perp_bin'] == bin_cats[in_bin], 'ent'].iloc[0]
    for out_bin in range(num_bins):
         ent_out = train_df.loc[train_df['perp_bin'] == bin_cats[out_bin], 'ent'].iloc[0]
         ent_fit[(in_bin, out_bin)] = (12 - ent_in) / (12 - ent_out), 0

def train(df : pd.DataFrame) -> dict[tuple[int, int], tuple[float,float]]:
    fits = dict()
    df = df.loc[np.isfinite(df['snr'])]
    for in_bin in range(num_bins):
        df_in = df.loc[df['perp_bin'] == bin_cats[in_bin], ['snr', 'lwer']]
        for out_bin in range(num_bins):
            df_out = df.loc[df['perp_bin'] == bin_cats[out_bin], ['snr', 'lwer']]
            df_in_out = df_in.merge(df_out, on='snr', suffixes=('_in', '_out'))
            print(df_in_out.head())
            k, c = boothroyd_fit(
                df_in_out['lwer_out'].to_numpy(),
                df_in_out['lwer_in'].to_numpy(),
                add_intercept=add_intercept,
            )
            print(k, c)
            fits[(in_bin, out_bin)] = k, c
    return fits

def test(df: pd.DataFrame, fits : dict[(int, int), tuple[float, float]], plot : bool = False) -> pd.DataFrame:
    res = dict()
    is_inf = np.isinf(df['snr'])
    df_nonoise, df = df.loc[is_inf], df.loc[~is_inf]
    for in_bin in range(num_bins):
        df_in = df.loc[df['perp_bin'] == bin_cats[in_bin]]
        wer_true = df_nonoise.loc[df_nonoise['perp_bin'] == bin_cats[in_bin], 'wer'].iloc[0]
        df_in = df_in[['snr', 'lwer']]
        for out_bin in range(num_bins):
            df_out = df.loc[df['perp_bin'] == bin_cats[out_bin]]
            k, c = fits[(in_bin, out_bin)]
            wer_pred = df_nonoise.loc[df_nonoise['perp_bin'] == bin_cats[out_bin], 'wer'].iloc[0] ** k
            df_out = df_out[['snr', 'lwer']]
            df_in_out = df_in.merge(df_out, on='snr', suffixes=('_in', '_out'))
            y_true = df_in_out['lwer_in'].to_numpy()
            y_pred = k * df_in_out['lwer_out'].to_numpy() + c
            r2 = r2_score(y_true, y_pred)
            res[(in_bin, out_bin)] = r2, 100 * wer_true, 100 * wer_pred
    df = pd.DataFrame.from_dict(res, orient='index', columns=['r2', 'wer_true', 'wer_pred'])
    df.sort_index()
    df.index = pd.MultiIndex.from_product([bin_cats] * 2, names=['in_bin', 'out_bin'])
    if plot:
        im = df.reset_index().pivot(values='r2', columns='out_bin', index='in_bin')
        fig = px.imshow(
            im,
            labels=dict(x="out-of-context bin", y="in-context bin", z="R^2"),
            x=bin_cats,
            y=bin_cats,
            zmin=-1,
            text_auto=".3f",
            color_continuous_scale='BrBG',
        )
        fig.show()
    return df

def display_test(df: pd.DataFrame, groupby=None):
    df = df.reset_index()
    df = df.reset_index().loc[df['in_bin'] != df['out_bin']].copy()
    df['wer_diff'] = np.abs(df['wer_pred'] - df['wer_true'])
    df['wer_prop'] = df['wer_diff'] / df['wer_true'] * 100
    if groupby:
        df_with = df.groupby(groupby)
    else:
        df_with = df
    df_with = df_with[['r2', 'wer_diff', 'wer_true', 'wer_prop']].describe()
    df = df.loc[
        (df['in_bin'] != bin_cats[0]) &
        (df['in_bin'] != bin_cats[-1]) &
        (df['out_bin'] != bin_cats[0]) &
        (df['out_bin'] != bin_cats[-1])
    ]
    if groupby:
        df_wo = df.groupby(groupby)
    else:
        df_wo = df
    df_wo = df_wo[['r2', 'wer_diff', 'wer_true', 'wer_prop']].describe()
    df = pd.concat([df_with, df_wo], keys=['w/ extreme bins', 'w/o extreme bins'])
    display(df.transpose().round(3))


# print('all equal fit on train')
# display_test(test(
#     train_df,
#     dict((key, (1, 0)) for key in product(range(num_bins), repeat=2)),
#     True
# ))

print('entropy fit on train')
display_test(test(train_df, ent_fit, True))

fit = train(train_df)

print('train and test on self')
display_test(test(train_df, fit, True))

# for test_mdl in test_mdls:
#     test_df = df.loc[
#         (df['latlm'] == train_latlm) &
#         (df['reslm'] == train_reslm) &
#         (df['perplm'] == train_perplm) &
#         (df['mdl'] == test_mdl) &
#         (df['part'] == train_part)
#     ]

#     print(f"train on {train_mdl}, test on {test_mdl}")
#     display_test(test(test_df, fit, True))

#     print(f"entropy fit on {test_mdl}")
#     display_test(test(test_df, ent_fit, True))


# for test_part in test_parts:
#     test_df = df.loc[
#         (df['latlm'] == train_latlm) &
#         (df['reslm'] == train_reslm) &
#         (df['perplm'] == train_perplm) &
#         (df['mdl'] == train_mdl) &
#         (df['part'] == test_part)
#     ]

#     print(f"train on {train_part}, test on {test_part}")
#     display_test(test(test_df, fit, True))

#     print(f"entropy fit on {test_part}")
#     display_test(test(test_df, ent_fit, True))

# for test_reslm in test_reslms:
#     test_df = df.loc[
#         (df['latlm'] == train_latlm) &
#         (df['reslm'] == test_reslm) &
#         (df['perplm'] == train_perplm) &
#         (df['mdl'] == train_mdl) &
#         (df['part'] == train_part)
#     ]

#     print(f"train on {train_reslm}-rescored, test on {test_reslm}-rescored")
#     display_test(test(test_df, fit, True))

#     print(f"entropy fit on {test_reslm} rescore")
#     display_test(test(test_df, ent_fit, True))

# for test_perplm in test_perplms:
#     test_df = df.loc[
#         (df['latlm'] == train_latlm) &
#         (df['reslm'] == train_reslm) &
#         (df['perplm'] == test_perplm) &
#         (df['mdl'] == train_mdl) &
#         (df['part'] == train_part)
#     ]

#     print(f"partitioned with {train_perplm}, test on {test_perplm} bins")
#     display_test(test(test_df, fit))

#     print(f"entropy fit on {test_perplm} bins")
#     display_test(test(test_df, ent_fit))


In [None]:
# klakow prediction
num_bins = 7
train_mdl = 'tdnn_1d_sp'
train_part = 'dev-clean'
train_latlm = train_reslm = train_perplm = 'tgsmall'
test_mdls = ('tri6b',)
test_parts = ('dev-other',)
test_perplms = ('tgmed', 'fglarge')

df = perp_df.copy()
bounds = bin_series(
    perp_df.loc[
        (perp_df['perplm'] == train_perplm) &
        (perp_df['part'] == train_part)
    , 'ent'], num_bins)[1]
df['perp_bin'] = bin_series(df['ent'], bounds, by_rank=False, fmt="{:.01f}")[0]
bin_cats = df['perp_bin'].dtype.categories

# Klakow's model doesn't have anything to do with SNR
df = df.merge(
    uttwer_df.loc[
        np.isinf(uttwer_df['snr'])
    ], on=['utt', 'part', 'len'])
df = agg_mean_by_lens(
    df,
    'len',
    ['wer', 'ent', 'len'],
    ['perp_bin', 'perplm', 'reslm', 'latlm', 'mdl', 'part'],
)
df['lwer'] = np.log(df['wer'])

train_df = df.loc[
    (df['latlm'] == train_latlm) &
    (df['reslm'] == train_reslm) &
    (df['perplm'] == train_perplm) &
    (df['mdl'] == train_mdl) &
    (df['part'] == train_part)
]

def train(df: pd.DataFrame) -> tuple[float, float]:
    fit = pg.linear_regression(df['ent'], df['lwer'])
    a = fit.loc[fit['names'] == 'ent', 'coef'].iloc[0]
    log_b = fit.loc[fit['names'] == 'Intercept', 'coef'].iloc[0]
    return a, log_b

def test(df: pd.DataFrame, fit: tuple[float, float]) -> dict[str, float]:
    wer_true = (df['wer'] * df['len']).sum() / df['len'].sum() * 100
    ent = (df['ent'] * df['len']).sum() / df['len'].sum()
    wer_pred = np.exp(fit[0] * ent + fit[1]) * 100
    y_true = df['lwer'].to_numpy()
    y_pred = fit[0] * df['ent'].to_numpy() + fit[1]
    if len(y_pred) > 1:
        r2 = r2_score(y_true, y_pred)
    else:
        r2 = None
    return dict(r2=r2, wer_true=wer_true, wer_pred=wer_pred)

def display_test(records : list[dict], groupby=None):
    df = pd.DataFrame.from_records(records)
    df['wer_diff'] = np.abs(df['wer_pred'] - df['wer_true'])
    df['wer_prop'] = df['wer_diff'] / df['wer_true'] * 100
    if groupby:
        df = df.groupby(groupby)
    df = df[['r2', 'wer_diff', 'wer_true', 'wer_prop']]
    if len(records) > 1:
        df = df.describe()
        df = df.transpose()
    display(df.round(3))

print(f"{num_bins}-fold cross-validation")
records = []
for test_bin in range(num_bins):
    test_mask = train_df['perp_bin'] == bin_cats[test_bin]
    records.append(test(train_df[test_mask], train(train_df.loc[~test_mask])))
display_test(records)

print("train and test on self")
fit = train(train_df)
display_test([test(train_df, fit)])

for test_mdl in test_mdls:
    test_df = df.loc[
        (df['latlm'] == train_latlm) &
        (df['reslm'] == train_reslm) &
        (df['perplm'] == train_perplm) &
        (df['mdl'] == test_mdl) &
        (df['part'] == train_part)
    ]

    print(f"train on {train_mdl}, test on {test_mdl}")
    display_test([test(test_df, fit)])


for test_part in test_parts:
    test_df = df.loc[
        (df['latlm'] == train_latlm) &
        (df['reslm'] == train_reslm) &
        (df['perplm'] == train_perplm) &
        (df['mdl'] == train_mdl) &
        (df['part'] == test_part)
    ]

    print(f"train on {train_part}, test on {test_part}")
    display_test([test(test_df, fit)])

for test_perplm in test_perplms:
    test_df = df.loc[
        (df['latlm'] == train_latlm) &
        (df['reslm'] == train_reslm) &
        (df['perplm'] == test_perplm) &
        (df['mdl'] == train_mdl) &
        (df['part'] == train_part)
    ]

    print(f"train on {train_part}, test on {test_perplm}")
    display_test([test(test_df, fit)])


## Thoughts

- $k$ is relatively stable to changes in partition, SNR; moreso than $a,b$
    - $R^2$ is inflated by low SNRs by virtue of being near to the intercept
- $k$ can probably be inferred from $a,b$
    - $\log b / a$ stabilizes as SNR increases. Why?
        - Check if $\log b / a$ converges to something else on `dev-other`. Perhaps it's close enough to the `dev-clean` ratio that drastic changes in entropy dominate?
        - Maybe this is compensatory? 
    - Based on its current trajectory, the ratio of $\log b / a \approx 12$ will never be exceeded by the entropy of the partition. The corresponding perplexity is in the vicinity of $162,000$.
- $k$ can be estimated by a ratio of entropies
    - As speech becomes cleaner, errors are more likely to occur one at a time. Guesswork more closely resembles the perplexity computations, which are conditioned on single words.
- Serious problem with ratio estimates (Curran-Everett). $k$ may be compromised.
    - Easy solution is to include intercepts. Regardless, $k$ can be used to predict with or without explaining.
- Klakow's model predicts accuracy $b$ with $0$ entropy. However, $0$ entropy ought to be $0$ errors.