In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
from childes_mi.utils.paths import DATA_DIR, FIGURE_DIR
from childes_mi.utils.general import flatten,save_fig

In [3]:
from childes_mi.information_theory import model_fitting as mf

In [4]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import gridspec
from tqdm.autonotebook import tqdm



In [5]:
MI_DF = pd.read_pickle(DATA_DIR/'mi/drosophila_mi_1000.pickle')

In [6]:
MI_DF[:3]

Unnamed: 0,MI,MI_var,shuff_MI,shuff_MI_var,distances
0,"[11.491092426440758, 11.113848299482346, 10.94...","[0.0032096905199801226, 0.0031487970998960533,...","[10.163121505352322, 10.162245550612901, 10.16...","[0.0030501281007033225, 0.003050425348712849, ...","[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14..."


In [7]:
MI_DF = MI_DF.assign(**{i:np.nan for i in ['exp_results', 'pow_results', 'concat_results',
     'R2_exp', 'R2_concat', 'R2_power', 'AICc_exp',
     'AICc_concat', 'AICc_power', 'bestfitmodel', 'curvature', 'min_peak']})
MI_DF['curvature'] = MI_DF['curvature'].astype(object)

In [8]:
n = 100 # max distance for computation
for idx, row in tqdm(MI_DF.iterrows(), total=len(MI_DF)):
    # get signal
    sig = np.array(row.MI-row.shuff_MI)
    distances = row.distances
    sig = sig
    
    # fit models
    results_power, results_exp, results_pow_exp, best_fit_model = mf.fit_models(distances, sig)
    
    # get fit results
    R2_exp, R2_concat, R2_power, AICc_exp, \
        AICc_pow, AICc_concat = mf.fit_results(sig, distances, 
                                              results_exp, results_power,
                                              results_pow_exp)
    
    
    
    
    # get model y
    distances_mod = np.logspace(0,np.log10(n), base=10, num=1000)
    if best_fit_model == 'pow_exp':
        y_model = mf.get_y(mf.pow_exp_decay, results_pow_exp, distances_mod)
    elif best_fit_model == 'exp':
        y_model = mf.get_y(mf.exp_decay, results_exp, distances_mod)
    elif best_fit_model == 'pow':
        y_model = mf.get_y(mf.powerlaw_decay, results_power, distances_mod)
    
    # get curvature of model_y
    curvature_model = mf.curvature(np.log(y_model))
    
    # if the best fit model is pow_exp, then grab the min peak
    if best_fit_model == 'pow_exp':
        # get peaks of curvature
        peaks = np.where((
            (curvature_model[:-1] < curvature_model[1:])[1:] & (curvature_model[1:] < curvature_model[:-1])[:-1]
        ))
        min_peak = peaks[0][0]
    else:
        min_peak = np.nan

    # get save model fit results to MI_DF
    MI_DF.loc[idx, np.array(['exp_results', 'pow_results', 'concat_results',
                         'R2_exp', 'R2_concat', 'R2_power', 'AICc_exp',
                         'AICc_concat', 'AICc_power', 'bestfitmodel', 'curvature', 'min_peak'])] = [
        results_exp, results_power, results_pow_exp,
        R2_exp, R2_concat, R2_power, AICc_exp,
        AICc_concat, AICc_pow, best_fit_model,
        curvature_model, min_peak
    ]


HBox(children=(IntProgress(value=0, max=1), HTML(value='')))




In [9]:
MI_DF

Unnamed: 0,MI,MI_var,shuff_MI,shuff_MI_var,distances,exp_results,pow_results,concat_results,R2_exp,R2_concat,R2_power,AICc_exp,AICc_concat,AICc_power,bestfitmodel,curvature,min_peak
0,"[11.491092426440758, 11.113848299482346, 10.94...","[0.0032096905199801226, 0.0031487970998960533,...","[10.163121505352322, 10.162245550612901, 10.16...","[0.0030501281007033225, 0.003050425348712849, ...","[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14...",<lmfit.minimizer.MinimizerResult object at 0x7...,<lmfit.minimizer.MinimizerResult object at 0x7...,<lmfit.minimizer.MinimizerResult object at 0x7...,0.950601,0.999464,0.995967,-6457.149146,-10977.006191,-8962.49419,pow_exp,"[2.672156311246409e-07, 4.009647498847741e-07,...",883.0


In [10]:
labels = ['Drosophila']

### Calculate R2

In [11]:
R2 = MI_DF[['R2_exp', 'R2_concat', 'R2_power']]
R2.columns = ['exp', 'combined', 'power-law']
R2.index = labels
R2 = R2.T
R2

Unnamed: 0,Drosophila
exp,0.950601
combined,0.999464
power-law,0.995967


### AICc

In [12]:
AICcs = MI_DF[['AICc_exp', 'AICc_concat', 'AICc_power']]
AICcs.columns = ['exp', 'combined.', 'power-law']
AICcs.index = labels
AICcs = AICcs.T
AICcs

Unnamed: 0,Drosophila
exp,-6457.149146
combined.,-10977.006191
power-law,-8962.49419


#### $\Delta$AIC

In [13]:
delta_AICcs = AICcs.T - np.repeat(np.min(AICcs.T.values, axis=1),3).reshape(len(labels),3)
delta_AICcs = delta_AICcs.T
delta_AICcs

Unnamed: 0,Drosophila
exp,4519.857045
combined.,0.0
power-law,2014.512002


### relative likelihood

In [14]:
relative_likelihoods = mf.relative_likelihood(delta_AICcs)
relative_likelihoods

Unnamed: 0,Drosophila
exp,0.0
combined.,1.0
power-law,0.0


### relative probability of models

In [15]:
prob_models = mf.Prob_model_Given_data_and_models(relative_likelihoods)
prob_models

Unnamed: 0,Drosophila
exp,0.0
combined.,1.0
power-law,0.0


#### Join tables

In [16]:
AICcs['superlabel'] = 'AICc'
relative_likelihoods['superlabel'] = 'Relative likelihood'
R2['superlabel'] = '$r^2$'
prob_models['superlabel'] = 'Relative probability'

In [17]:
results_table = pd.concat([
    AICcs,
    R2,
    relative_likelihoods,
    prob_models
]).round(3).replace(0, '<0.001').replace(1, '>0.999')
results_table[''] = results_table.index
results_table.set_index(['superlabel', ''], inplace=True)
results_table

Unnamed: 0_level_0,Unnamed: 1_level_0,Drosophila
superlabel,Unnamed: 1_level_1,Unnamed: 2_level_1
AICc,exp,-6457.15
AICc,combined.,-10977
AICc,power-law,-8962.49
$r^2$,exp,0.951
$r^2$,combined,0.999
$r^2$,power-law,0.996
Relative likelihood,exp,<0.001
Relative likelihood,combined.,>0.999
Relative likelihood,power-law,<0.001
Relative probability,exp,<0.001


In [18]:
results_string = results_table.to_latex(bold_rows=True, escape=False)\
      .replace('>', '$>$')\
      .replace('<', '$<$')\
      .replace('superlabel', '')\
     .replace('\n\\textbf', '\n\midrule\n\\textbf')
print(results_string)

\begin{tabular}{lll}
\toprule
                     &           & Drosophila \\
\midrule
\textbf{} & {} &            \\
\midrule
\midrule
\textbf{AICc} & \textbf{exp} &   -6457.15 \\
                     & \textbf{combined.} &     -10977 \\
                     & \textbf{power-law} &   -8962.49 \\
\midrule
\textbf{$r^2$} & \textbf{exp} &      0.951 \\
                     & \textbf{combined} &      0.999 \\
                     & \textbf{power-law} &      0.996 \\
\midrule
\textbf{Relative likelihood} & \textbf{exp} &     $<$0.001 \\
                     & \textbf{combined.} &     $>$0.999 \\
                     & \textbf{power-law} &     $<$0.001 \\
\midrule
\textbf{Relative probability} & \textbf{exp} &     $<$0.001 \\
                     & \textbf{combined.} &     $>$0.999 \\
                     & \textbf{power-law} &     $<$0.001 \\
\bottomrule
\end{tabular}

