# Calculate MI for each species and plot goodness of fit by length of analysis
1. load datasets
2. calculate MI

In [None]:
import pandas as pd
import numpy as np
from parallelspaper.config.paths import DATA_DIR, FIGURE_DIR
from parallelspaper.birdsong_datasets import MI_seqs, compress_seq, BCOL_DICT
from parallelspaper import information_theory as it 
from tqdm.autonotebook import tqdm
from parallelspaper import model_fitting as mf
from parallelspaper.utils import save_fig

In [None]:
import matplotlib.pyplot as plt
%matplotlib inline

### Load data

In [None]:
MI_DF = pd.read_pickle(DATA_DIR / 'MI_DF/birdsong/birdsong_MI_DF_long.pickle')

In [None]:
MI_DF

In [None]:
fit_df= pd.read_pickle(DATA_DIR / 'MI_DF/birdsong/birdsong_fit_df_long.pickle')

In [None]:
fit_df[:3]

### Plot results

In [None]:
import matplotlib.pyplot as plt

##### R2 full concatenative model

In [None]:
spc = {'Starling':'starling', 'CAVI': 'vireo', 'CATH':'thrasher', 'BF':'finch'}

In [None]:
ncol = len(np.unique(fit_df.species))
zoom = 4
fig, axs = plt.subplots(ncols=ncol, figsize= (ncol*zoom, zoom))
for sax, species in enumerate(np.unique(fit_df.species)):
    color = BCOL_DICT[species]
    ax = axs.flatten()[sax]
    spec_fit_df = fit_df[fit_df.species == species]
    
    ax.plot(spec_fit_df.d, spec_fit_df.R2_concat.values, lw=4, color=color)
    ax.set_title(spc[species], fontsize=16)
    ax.set_ylim([0.5,1.01])
    ax.set_xlim([np.min(spec_fit_df.d), np.max(spec_fit_df.d)])
    spec_fit_df = spec_fit_df[spec_fit_df.d > 100]
    d = spec_fit_df.d.values[np.where(spec_fit_df.R2_concat.values > (spec_fit_df.R2_concat.values[0]*.999))[0][-1]]
    print(d)
    #ax.axvline(d,color='k', ls='dashed', alpha=0.5)
    #ax.set_ylim(np.exp(sig_lims))
    ax.tick_params(which='both', direction='in')
    ax.tick_params(which='major', length=10, width =3)
    ax.tick_params(which='minor', length=5, width =2)
    ax.set_xlabel('Distance (syllables)', fontsize=18)
    ax.set_xscale( "log" , basex=10)
    for axis in ['top','bottom','left','right']:
        ax.spines[axis].set_linewidth(3)
        ax.spines[axis].set_color('k')

axs[0].set_ylabel('$r^2$ comp. model', fontsize=18)
plt.tight_layout()

save_fig(FIGURE_DIR/'R2_song')

##### R2 of power-law compenent

In [None]:
ncol = len(np.unique(fit_df.species))
zoom = 4
fig, axs = plt.subplots(ncols=ncol, figsize= (ncol*zoom, zoom))
for sax, species in enumerate(np.unique(fit_df.species)):
    color = BCOL_DICT[species]
    ax = axs.flatten()[sax]
    spec_fit_df = fit_df[fit_df.species == species]
    ax.plot(spec_fit_df.d, spec_fit_df.R2_pow_comp.values, lw=4, color=color)
    #ax.plot(spec_fit_df.d, spec_fit_df.R2_pow_comp.values, lw=4, color=color)
    ax.set_title(spc[species], fontsize=16)
    ax.set_ylim([0.5,1.01])
    ax.set_xlim([np.min(spec_fit_df.d), np.max(spec_fit_df.d)])
    
    #ax.axvline(d,color='k', ls='dashed', alpha=0.5)
    #ax.set_ylim(np.exp(sig_lims))
    ax.tick_params(which='both', direction='in')
    ax.tick_params(which='major', length=10, width =3)
    ax.tick_params(which='minor', length=5, width =2)
    ax.set_xlabel('Distance (syllables)', fontsize=18)
    ax.set_xscale( "log" , basex=10)
    for axis in ['top','bottom','left','right']:
        ax.spines[axis].set_linewidth(3)
        ax.spines[axis].set_color('k')

axs[0].set_ylabel('$r^2$ power-law component', fontsize=18)
plt.tight_layout()


save_fig(FIGURE_DIR/'r2_powerlaw_song')


##### $\Delta$AICc composite vs expon

In [None]:
ncol = len(np.unique(fit_df.species))
zoom = 4
fig, axs = plt.subplots(ncols=ncol, figsize= (ncol*zoom, zoom))
for sax, species in enumerate(np.unique(fit_df.species)):
    color = BCOL_DICT[species]
    ax = axs.flatten()[sax]
    spec_fit_df = fit_df[fit_df.species == species]
    ax.plot(spec_fit_df.d, spec_fit_df.AICc_concat.values - spec_fit_df.AICc_exp.values, lw=4, color=color)
    ax.set_title(spc[species], fontsize=16)
    ax.tick_params(which='both', direction='in')
    ax.tick_params(which='major', length=10, width =3)
    ax.tick_params(which='minor', length=5, width =2)
    ax.set_xlabel('Distance (syllables)', fontsize=18)
    ax.set_xscale( "log" , basex=10)
    for axis in ['top','bottom','left','right']:
        ax.spines[axis].set_linewidth(3)
        ax.spines[axis].set_color('k')
    ax.set_xlim([np.min(spec_fit_df.d), np.max(spec_fit_df.d)])

axs[0].set_ylabel('$\Delta$AICc (comp.- exp.)', fontsize=18)
plt.tight_layout()

save_fig(FIGURE_DIR/'delta_AIC_song')