# Calculate MI for each individual songbird
1. load datasets
2. calculate MI

In [None]:
import pandas as pd
import numpy as np
from parallelspaper.config.paths import DATA_DIR, FIGURE_DIR
from parallelspaper.birdsong_datasets import MI_seqs, compress_seq, BCOL_DICT
from parallelspaper import information_theory as it 
from tqdm.autonotebook import tqdm
from parallelspaper import model_fitting as mf
import scipy.stats
import seaborn as sns
from parallelspaper.utils import save_fig

In [None]:
import matplotlib.pyplot as plt
%matplotlib inline

### Load data

In [None]:
MI_DF = pd.read_pickle((DATA_DIR / 'MI_DF/birdsong/birdsong_MI_DF_individual.pickle'))

### Correlation between dataset size and $\Delta$AIC

In [None]:
np.unique(MI_DF.type)

In [None]:
np.unique(MI_DF[MI_DF.type != 'compress'].type)

In [None]:
letters = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z']

In [None]:
fontsize=18
fig, axs = plt.subplots(nrows=2,ncols=2,figsize=(10, 10))

for sti, stype in enumerate(np.unique(MI_DF[MI_DF.type != 'compress'].type)):
    ax = axs.flatten()[sti]
    ax.annotate(letters[sti], xy=(-0.05, 1.1), xycoords="axes fraction", size=20, fontweight='bold', fontfamily='Arial')
    MI_DF_withinbout = MI_DF[MI_DF.type==stype].sort_values(by='n_elements',ascending=False)
    MI_DF_withinbout['delta_AIC'] = MI_DF_withinbout['AICc_exp'] - MI_DF_withinbout['AICc_concat']
    MI_DF_withinbout['n_elements'] = MI_DF_withinbout.n_elements.astype('float32')
    MI_DF_withinbout['log_n_elements'] = np.log2(MI_DF_withinbout['n_elements'].values)
    print(stype, scipy.stats.pearsonr(MI_DF_withinbout.log_n_elements.values, MI_DF_withinbout.delta_AIC.values), len(MI_DF_withinbout))
    

    MI_DF_withinbout['delta_AIC'] = MI_DF_withinbout['AICc_exp'] - MI_DF_withinbout['AICc_concat']
    MI_DF_withinbout['n_elements'] = MI_DF_withinbout.n_elements.astype('float32')
    MI_DF_withinbout['log_n_elements'] = np.log2(MI_DF_withinbout['n_elements'].values)

    for species in np.unique(MI_DF_withinbout.species.values):
        MI_DF_species = MI_DF_withinbout[MI_DF_withinbout.species.values==species]
        ax.scatter(MI_DF_species.log_n_elements, MI_DF_species.delta_AIC, color = BCOL_DICT[species])

    sns.regplot(x='log_n_elements', y='delta_AIC', ax = ax, data=MI_DF_withinbout, color='k', scatter=False, ci=95)#='species')
    ax.set_xlabel('Number of syllables in dataset')
    ax.axhline(0, color='k', ls='dashed')
    ax.set_xlabel('Dataset length (syllables)', labelpad=10, fontsize=fontsize)
    ax.set_ylim([-50,200])
    if sti in [0,2]:
        ax.set_ylabel('$AICc_{comp.}-AICc_{exp.}$', labelpad=5, fontsize=fontsize)
    else:
        ax.set_ylabel('')
        ax.set_yticks([])
    for axis in ['top','bottom','left','right']:
        ax.spines[axis].set_linewidth(3)
        ax.spines[axis].set_color('k')
    ax.tick_params(which='both', direction='in')

    ax.set_xticklabels(np.round(np.exp2(ax.get_xticks())).astype('int'), fontsize = 12)
    ax.yaxis.set_tick_params(labelsize=12)
    #ax.set_yscale( "symlog" , basey=10)
    stype_formatted = {'day':'Day', 'shuffled_within': 'Shuffled within song', 'shuffled_between':'Shuffled between songs', 'song': 'Within song'}[stype]
    ax.set_title(stype_formatted, fontsize=16)
plt.tight_layout()

save_fig(FIGURE_DIR/'song_aic_dset_size')

In [None]:
len(MI_DF_withinbout)