In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
from childes_mi.utils.paths import DATA_DIR, FIGURE_DIR
from childes_mi.utils.general import flatten,save_fig

In [3]:
from childes_mi.information_theory import model_fitting as mf

In [4]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from tqdm.autonotebook import tqdm



In [5]:
MI_DF = pd.read_pickle(DATA_DIR/'mi/epic_kitchens.pickle')

In [6]:
MI_DF[:3]

Unnamed: 0,MI,MI_var,shuff_MI,shuff_MI_var,distances
0,"[10.15572628962066, 10.019879244923246, 9.9284...","[0.013130092979006703, 0.013108715406449277, 0...","[9.869473399261516, 9.848040773923104, 9.82892...","[0.012926912993544148, 0.01299377689737788, 0....","[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14..."


In [7]:
MI_DF = MI_DF.assign(**{i:np.nan for i in ['exp_results', 'pow_results', 'concat_results',
     'R2_exp', 'R2_concat', 'R2_power', 'AICc_exp',
     'AICc_concat', 'AICc_power', 'bestfitmodel', 'curvature', 'min_peak']})
MI_DF['curvature'] = MI_DF['curvature'].astype(object)

In [8]:
n = 100 # max distance for computation
for idx, row in tqdm(MI_DF.iterrows(), total=len(MI_DF)):
    # get signal
    sig = np.array(row.MI-row.shuff_MI)
    distances = row.distances
    sig = sig
    
    # fit models
    results_power, results_exp, results_pow_exp, best_fit_model = mf.fit_models(distances, sig)
    
    # get fit results
    R2_exp, R2_concat, R2_power, AICc_exp, \
        AICc_pow, AICc_concat = mf.fit_results(sig, distances, 
                                              results_exp, results_power,
                                              results_pow_exp)
    
    
    
    
    # get model y
    distances_mod = np.logspace(0,np.log10(n), base=10, num=1000)
    if best_fit_model == 'pow_exp':
        y_model = mf.get_y(mf.pow_exp_decay, results_pow_exp, distances_mod)
    elif best_fit_model == 'exp':
        y_model = mf.get_y(mf.exp_decay, results_exp, distances_mod)
    elif best_fit_model == 'pow':
        y_model = mf.get_y(mf.powerlaw_decay, results_power, distances_mod)
    
    # get curvature of model_y
    curvature_model = mf.curvature(np.log(y_model))
    
    # if the best fit model is pow_exp, then grab the min peak
    if best_fit_model == 'pow_exp':
        # get peaks of curvature
        peaks = np.where((
            (curvature_model[:-1] < curvature_model[1:])[1:] & (curvature_model[1:] < curvature_model[:-1])[:-1]
        ))
        min_peak = peaks[0][0]
    else:
        min_peak = np.nan

    # get save model fit results to MI_DF
    MI_DF.loc[idx, np.array(['exp_results', 'pow_results', 'concat_results',
                         'R2_exp', 'R2_concat', 'R2_power', 'AICc_exp',
                         'AICc_concat', 'AICc_power', 'bestfitmodel', 'curvature', 'min_peak'])] = [
        results_exp, results_power, results_pow_exp,
        R2_exp, R2_concat, R2_power, AICc_exp,
        AICc_concat, AICc_pow, best_fit_model,
        curvature_model, min_peak
    ]


HBox(children=(IntProgress(value=0, max=1), HTML(value='')))

  return np.abs(np.log(y_true) - np.log(y_model)) * (1 / (np.log(1 + x)))





In [9]:
MI_DF

Unnamed: 0,MI,MI_var,shuff_MI,shuff_MI_var,distances,exp_results,pow_results,concat_results,R2_exp,R2_concat,R2_power,AICc_exp,AICc_concat,AICc_power,bestfitmodel,curvature,min_peak
0,"[10.15572628962066, 10.019879244923246, 9.9284...","[0.013130092979006703, 0.013108715406449277, 0...","[9.869473399261516, 9.848040773923104, 9.82892...","[0.012926912993544148, 0.01299377689737788, 0....","[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14...",<lmfit.minimizer.MinimizerResult object at 0x7...,<lmfit.minimizer.MinimizerResult object at 0x7...,<lmfit.minimizer.MinimizerResult object at 0x7...,0.946105,0.967834,0.966792,-165.572335,-183.303103,-186.878455,pow,"[6.4391085990021975e-15, 9.658662898503297e-15...",


In [19]:
labels = ['Cooking']

### Calculate R2

In [20]:
R2 = MI_DF[['R2_exp', 'R2_concat', 'R2_power']]
R2.columns = ['exp', 'combined', 'power-law']
R2.index = labels
R2 = R2.T
R2

Unnamed: 0,Cooking
exp,0.946105
combined,0.967834
power-law,0.966792


### AICc

In [21]:
AICcs = MI_DF[['AICc_exp', 'AICc_concat', 'AICc_power']]
AICcs.columns = ['exp', 'combined.', 'power-law']
AICcs.index = labels
AICcs = AICcs.T
AICcs

Unnamed: 0,Cooking
exp,-165.572335
combined.,-183.303103
power-law,-186.878455


#### $\Delta$AIC

In [22]:
delta_AICcs = AICcs.T - np.repeat(np.min(AICcs.T.values, axis=1),3).reshape(len(labels),3)
delta_AICcs = delta_AICcs.T
delta_AICcs

Unnamed: 0,Cooking
exp,21.30612
combined.,3.575352
power-law,0.0


### relative likelihood

In [23]:
relative_likelihoods = mf.relative_likelihood(delta_AICcs)
relative_likelihoods

Unnamed: 0,Cooking
exp,2.4e-05
combined.,0.167349
power-law,1.0


### relative probability of models

In [24]:
prob_models = mf.Prob_model_Given_data_and_models(relative_likelihoods)
prob_models

Unnamed: 0,Cooking
exp,2e-05
combined.,0.143355
power-law,0.856625


#### Join tables

In [25]:
AICcs['superlabel'] = 'AICc'
relative_likelihoods['superlabel'] = 'Relative likelihood'
R2['superlabel'] = '$r^2$'
prob_models['superlabel'] = 'Relative probability'

In [26]:
results_table = pd.concat([
    AICcs,
    R2,
    relative_likelihoods,
    prob_models
]).round(3).replace(0, '<0.001').replace(1, '>0.999')
results_table[''] = results_table.index
results_table.set_index(['superlabel', ''], inplace=True)
results_table

Unnamed: 0_level_0,Unnamed: 1_level_0,Cooking
superlabel,Unnamed: 1_level_1,Unnamed: 2_level_1
AICc,exp,-165.572
AICc,combined.,-183.303
AICc,power-law,-186.878
$r^2$,exp,0.946
$r^2$,combined,0.968
$r^2$,power-law,0.967
Relative likelihood,exp,<0.001
Relative likelihood,combined.,0.167
Relative likelihood,power-law,>0.999
Relative probability,exp,<0.001


In [29]:
results_string = results_table.to_latex(bold_rows=True, escape=False)\
      .replace('>', '$>$')\
      .replace('<', '$<$')\
      .replace('superlabel', '')\
     .replace('\n\\textbf', '\n\midrule\n\\textbf')
print(results_string)

\begin{tabular}{lll}
\toprule
                     &           &  Cooking \\
\midrule
\textbf{} & {} &          \\
\midrule
\midrule
\textbf{AICc} & \textbf{exp} & -165.572 \\
                     & \textbf{combined.} & -183.303 \\
                     & \textbf{power-law} & -186.878 \\
\midrule
\textbf{$r^2$} & \textbf{exp} &    0.946 \\
                     & \textbf{combined} &    0.968 \\
                     & \textbf{power-law} &    0.967 \\
\midrule
\textbf{Relative likelihood} & \textbf{exp} &   $<$0.001 \\
                     & \textbf{combined.} &    0.167 \\
                     & \textbf{power-law} &   $>$0.999 \\
\midrule
\textbf{Relative probability} & \textbf{exp} &   $<$0.001 \\
                     & \textbf{combined.} &    0.143 \\
                     & \textbf{power-law} &    0.857 \\
\bottomrule
\end{tabular}

