# MI + model fitting for each songbird
1. load datasets
...

In [None]:
import pandas as pd
import numpy as np
from parallelspaper.config.paths import DATA_DIR, FIGURE_DIR
from parallelspaper.birdsong_datasets import MI_seqs, compress_seq, BCOL_DICT
from parallelspaper import information_theory as it 
from sklearn.externals.joblib import Parallel, delayed
from parallelspaper import model_fitting as mf
from parallelspaper.utils import save_fig

In [None]:
from parallelspaper.quickplots import plot_model_fits
import matplotlib.pyplot as plt
%matplotlib inline

In [None]:
from tqdm.autonotebook import tqdm

### Load data

In [None]:
MI_DF = pd.read_pickle((DATA_DIR / 'MI_DF/birdsong/birdsong_MI_DF_longest_seq.pickle'))

In [None]:
MI_DF[:3]

In [None]:
len(MI_DF)

### Calculate and plot MIs of longest sequences

In [None]:
import matplotlib.gridspec as gridspec

In [None]:
show = True
zoom = 3
ncol = 2
nrow = 9
iii=0

fig = plt.figure(figsize=(ncol*3*zoom, nrow*zoom))
plt.subplots_adjust(wspace=0.1)
outer = gridspec.GridSpec(1, ncol, width_ratios=[1 for i in range(ncol)])

for col in tqdm(range(ncol)):
    gs = gridspec.GridSpecFromSubplotSpec(
        nrow, 3, subplot_spec=outer[col], wspace=0.1, hspace=0.15, width_ratios=[1, 1, 1])
    for ri, (idx, row) in tqdm(enumerate(MI_DF[col*nrow:(col+1)*nrow].iterrows()), leave=False):
        print(ri)
        ax0 = fig.add_subplot(gs[ri, 0])
        ax1 = fig.add_subplot(gs[ri, 1])
        ax2 = fig.add_subplot(gs[ri, 2])
        
        
        if not show: continue
        
        iii += 1
        color = BCOL_DICT[row.species]
        distances = np.unique(np.linspace(
            start=1, stop=75, num=100).astype(int))

        MI = row.MI
        MI_shuff = row.MI_shuff
        sig = row.MI-row.MI_shuff
        results_power, results_exp, results_pow_exp, best_fit_model = mf.fit_models(
            distances, sig)



        ax = ax0
        ax.scatter(distances, MI-MI_shuff, color=color)
        bfm = {'pow_exp':'comp.', 'exp':'exp.', 'pow': 'power law'}
        spc = {'CATH': 'thrasher', 'CAVI': 'vireo', 'Starling': 'starling', 'BF': 'finch'}
        ax.set_title(' | '.join([spc[row.species], row.indv[:12], bfm[best_fit_model]]), fontsize=14)

        dist_s = np.linspace(distances[0], distances[-1], 1000)
        fit = mf.get_y(mf.pow_exp_decay, results_pow_exp, dist_s)
        ax.plot(dist_s, fit, color=color, alpha=0.5, lw=10)
        fit = mf.get_y(mf.exp_decay, results_pow_exp, distances)
        ax.plot(distances, fit, color='k', alpha=0.5, lw=4)
        fit = mf.get_y(mf.powerlaw_decay, results_pow_exp, distances)
        ax.plot(distances, fit, color='k', alpha=0.5, lw=4)
        sig = MI-MI_shuff
        sig_lims = np.log([np.min(sig[sig > 0]), np.nanmax(sig)])
        sig_lims = [sig_lims[0] - (sig_lims[1]-sig_lims[0])/10,
                    sig_lims[1] + (sig_lims[1]-sig_lims[0])/10]
        ax.set_ylim(np.exp(sig_lims))
        ylim = ax.get_ylim()
        if col == 0:
            ax0.set_ylabel('MI (bits)',
                           labelpad=5, fontsize=18)

        ax = ax1
        fit = mf.get_y(mf.exp_decay, results_pow_exp, distances)
        ax.scatter(distances, MI-MI_shuff-fit, color=color)
        fit = mf.get_y(mf.powerlaw_decay, results_pow_exp, distances)
        ax.plot(distances,
                fit - results_pow_exp.params['intercept'].value,
                color=color, alpha=0.5, lw=10)
        ax.set_title('power-law comp.', fontsize=14)
        ax.set_ylim(np.exp(sig_lims))

        ax = ax2
        fit = mf.get_y(mf.powerlaw_decay, results_pow_exp, distances)
        ax.scatter(distances, MI-MI_shuff-fit, color=color)
        fit = mf.get_y(mf.exp_decay, results_pow_exp, distances)
        ax.plot(distances,
                fit - results_pow_exp.params['intercept'].value,
                color=color, alpha=0.5, lw=10)
        ax.set_title('exp. comp.', fontsize=14)

        for axi, ax in enumerate([ax0, ax1, ax2]):
            ax.set_ylim(np.exp(sig_lims))
            ax.set_xscale("log", basex=10)
            ax.set_yscale("log", basey=10)
            ax.tick_params(axis='both', labelsize=10, pad=7)
            for axis in ['top', 'bottom', 'left', 'right']:
                ax.spines[axis].set_linewidth(3)
                ax.spines[axis].set_color('k')
            ax.grid(False)
            ax.tick_params(which='both', direction='in')
            ax.tick_params(which='major', length=10, width=3)
            ax.tick_params(which='minor', length=5, width=2)
            ax.set_xlim([1, distances[-1]])
            # ax.set_ylim([-1e-4,1e-4])
            ax.set_xscale("log", basex=10)
            if ri == nrow-1:
                ax.set_xticks([1, 10, 100])
                ax.set_xticklabels(['1', '10', '100'])
            else:
                ax.set_xticks([])

            if ri == nrow-1:
                ax.set_xlabel('Distance (syllables)',
                              labelpad=5, fontsize=14)
            ax.set_xlim([1, distances[-1]])
        ax1.set_yticklabels([])
        ax2.set_yticklabels([])

plt.tight_layout()
save_fig(FIGURE_DIR / 'voc_indv')
plt.show()