# create a results dataframe and table to export to latex

In [None]:
import pandas as pd
from parallelspaper.config.paths import DATA_DIR
import numpy as np
from parallelspaper import model_fitting as mf

### load data

In [None]:
MI_DF = pd.read_pickle((DATA_DIR / 'MI_DF/language/language_MI_DF_fitted.pickle'))

In [None]:
# subset dataset to only look at the major units
subset_MI_DF = MI_DF[[(row.unit in ['phonetic', 'phonemes', 'phoneme', 'ortho-phonetic']) & (row.analysis in ['session'])  for idx, row in MI_DF.iterrows()]]

In [None]:
subset_MI_DF

### Calculate R2

In [None]:
R2 = subset_MI_DF[['R2_exp', 'R2_concat', 'R2_power']]
R2.columns = ['exp', 'combined', 'power-law']
R2.index = subset_MI_DF.language.values
R2 = R2.T
R2

#### AICc

In [None]:
AICcs = subset_MI_DF[['AICc_exp', 'AICc_concat', 'AICc_power']]
AICcs.index = subset_MI_DF.language.values
AICcs.columns = ['exp', 'combined.', 'power-law']
AICcs.index = subset_MI_DF.language.values
AICcs = AICcs.T
AICcs

#### $\Delta$AIC

In [None]:
delta_AICcs = AICcs.T - np.repeat(np.min(AICcs.T.values, axis=1),3).reshape(4,3)
delta_AICcs = delta_AICcs.T
delta_AICcs

#### Relative likelihood

In [None]:
relative_likelihoods = mf.relative_likelihood(delta_AICcs)
relative_likelihoods

#### Relative probability of models

In [None]:
prob_models = mf.Prob_model_Given_data_and_models(relative_likelihoods)
prob_models

#### Join tables

In [None]:
AICcs['superlabel'] = 'AICc'
relative_likelihoods['superlabel'] = 'Relative likelihood'
R2['superlabel'] = '$r^2$'
prob_models['superlabel'] = 'Relative probability'

In [None]:
results_table = pd.concat([
    AICcs,
    R2,
    relative_likelihoods,
    prob_models
]).round(3).replace(0, '<0.001').replace(1, '>0.999')
results_table[''] = results_table.index
results_table.set_index(['superlabel', ''], inplace=True)
results_table

#### print in latex format

In [None]:
results_string = results_table.to_latex(bold_rows=True, escape=False)\
      .replace('>', '$>$')\
      .replace('<', '$<$')\
      .replace('superlabel', '')\
     .replace('\n\\textbf', '\n\midrule\n\\textbf')
print(results_string)

### Curvature minimum and maximum

In [None]:
german_stats = pd.read_pickle(DATA_DIR/'stats_df/GECO_stats_df.pickle')
german_stats['Language'] = 'German'

italian_stats = pd.read_pickle(DATA_DIR/'stats_df/AsiCA_stats_df.pickle')
italian_stats['Language'] = 'Italian'

english_stats = pd.read_pickle(DATA_DIR/'stats_df/BUCKEYE_stats_df.pickle')
english_stats['Language'] = 'English'

japanese_stats = pd.read_pickle(DATA_DIR/'stats_df/CSJ_stats_df.pickle')
japanese_stats['Language'] = 'Japanese'

stats_df = pd.concat([german_stats, italian_stats, english_stats, japanese_stats])

In [None]:
stats_df

In [None]:
curvature_dist = np.logspace(0,np.log10(100), base=10, num=1000)

In [None]:
for idx, row in subset_MI_DF.iterrows():
    langrow = stats_df.Language.values == row.language.capitalize()
    phone_lens = stats_df[langrow].phone_duration_s.values[0]
    median_phone_len = np.median(phone_lens)
    curvature_len = curvature_dist[int(row.min_peak)]
    curv_max = curvature_dist[np.argmax(row.curvature)]
    pct_within_max = np.sum(stats_df[langrow].word_length_phones.values[0] < curv_max)/len(stats_df[langrow].word_length_phones.values[0])
    print(row.language)
    print('\tmin curv phones:', round(curvature_len,3))
    print('\tmin curv seconds:', round(curvature_len*median_phone_len,3))
    print('\tmax curv phones:', round(curv_max,3))
    print('\tmax curv seconds:', round(curv_max*median_phone_len,3))
    print('\tmedian word phones:', np.median(stats_df[langrow].word_length_phones.values[0]))
    print('\tpct within max word len phones:', round(pct_within_max, 3))
    print()