# "Record" song heuristics to use as predictors of behavior

We do this computationally by giving each song heuristic the same role as one of our surrogate neural activity vectors.

In [1]:
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
import os
import pandas as pd
from scipy import signal
import sys

from aux import get_seg
from disp import set_plot

cc = np.concatenate

FPS = 30.03
DT = 1/FPS

WDWS = [1, 10, 30, 60, 120, 1800]  # time windows over which to compute heuristic song features

SAVE_DIR_HEUR = 'data/simple/mlv/heuristic/heuristics'
SAVE_PFX_HEUR = 'heuristics'

Load behavioral data from Coen et al 2014

In [2]:
df_behav = pd.read_csv('data/simple/w_song_f_behav.csv')
df_behav

Unnamed: 0,ID,FRAME,T,Q,S,P,F,FFV,FFA,FLS,FRS,FMFV,FMLS
0,0,0,0.000000,1,0,0,0,3.844019,-0.138725,0.841459,3.102308,3.663209,1.244644
1,0,1,0.033300,1,0,0,0,2.645045,0.217158,0.958827,1.644406,2.143699,1.633865
2,0,2,0.066600,1,0,0,0,2.817165,-0.170636,0.799927,0.591572,2.212304,1.895512
3,0,3,0.099900,1,0,0,0,4.238035,0.000825,0.455475,0.842094,3.535255,2.372842
4,0,4,0.133200,1,0,0,0,6.156643,0.280813,0.516347,1.218568,5.392901,2.986841
...,...,...,...,...,...,...,...,...,...,...,...,...,...
2333241,83,11022,367.032967,0,0,0,1,-0.058612,0.000000,0.240739,0.721101,0.063511,0.234946
2333242,83,11023,367.066267,0,0,0,1,0.381769,0.000000,0.156364,0.000000,-0.366270,0.191376
2333243,83,11024,367.099567,0,0,0,1,0.196337,0.000000,0.482724,0.839733,-0.179289,0.481899
2333244,83,11025,367.132867,1,0,0,0,-0.403877,0.000000,0.139698,1.868385,0.410206,0.148874


In [3]:
# split big df into dfs for individual trials
n_tr = np.max(df_behav.ID) + 1
dfs_tr = [df_behav[df_behav.ID == i] for i in range(n_tr)]

df_behav = None

In [4]:
# functions for getting song heuristics
def get_n_s_onsets(df_tr, wdw):
    s = np.array(df_tr['S']).astype(int)
    s_onsets = np.diff(cc([[0], s])) > 0
    s_onsets_wdw = signal.fftconvolve(s_onsets, np.ones(int(round(wdw/DT))), mode='full')[:len(df_tr)]
    return np.round(s_onsets_wdw).astype(int)
    
def get_n_p_onsets(df_tr, wdw):
    p = np.array(df_tr['P']|df_tr['F']).astype(int)
    p_onsets = np.diff(cc([[0], p])) > 0
    p_onsets_wdw = signal.fftconvolve(p_onsets, np.ones(int(round(wdw/DT))), mode='full')[:len(df_tr)]
    return np.round(p_onsets_wdw).astype(int)

def get_n_b_onsets(df_tr, wdw):
    b = np.array(df_tr['S']|df_tr['P']|df_tr['F']).astype(int)
    b_onsets = np.diff(cc([[0], b])) > 0
    b_onsets_wdw = signal.fftconvolve(b_onsets, np.ones(int(round(wdw/DT))), mode='full')[:len(df_tr)]
    return np.round(b_onsets_wdw).astype(int)

def get_s_total(df_tr, wdw):
    s = np.array(df_tr['S']).astype(int)
    s_total = signal.fftconvolve(s, np.ones(int(round(wdw/DT))), mode='full')[:len(df_tr)]
    return s_total*DT

def get_p_total(df_tr, wdw):
    p = np.array(df_tr['P']|df_tr['F']).astype(int)
    p_total = signal.fftconvolve(p, np.ones(int(round(wdw/DT))), mode='full')[:len(df_tr)]
    return p_total*DT
    
def get_b_total(df_tr, wdw):
    b = np.array(df_tr['S']|df_tr['P']|df_tr['F']).astype(int)
    b_total = signal.fftconvolve(b, np.ones(int(round(wdw/DT))), mode='full')[:len(df_tr)]
    return b_total*DT

def get_mn_s_dur(df_tr, wdw):
    s_total = get_s_total(df_tr, wdw)
    n_s_onsets = get_n_s_onsets(df_tr, wdw)
    mvalid = n_s_onsets > .5
    mn_s_dur = np.nan * np.zeros(len(df_tr))
    mn_s_dur[mvalid] = s_total[mvalid]/n_s_onsets[mvalid]
    return mn_s_dur

def get_mn_p_dur(df_tr, wdw):
    p_total = get_p_total(df_tr, wdw)
    n_p_onsets = get_n_p_onsets(df_tr, wdw)
    mvalid = n_p_onsets > .5
    mn_p_dur = np.nan * np.zeros(len(df_tr))
    mn_p_dur[mvalid] = p_total[mvalid]/n_p_onsets[mvalid]
    return mn_p_dur

def get_mn_b_dur(df_tr, wdw):
    b_total = get_b_total(df_tr, wdw)
    n_b_onsets = get_n_b_onsets(df_tr, wdw)
    mvalid = n_b_onsets > .5
    mn_b_dur = np.nan * np.zeros(len(df_tr))
    mn_b_dur[mvalid] = b_total[mvalid]/n_b_onsets[mvalid]
    return mn_b_dur

def get_sp_ratio(df_tr, wdw):
    s_total = get_s_total(df_tr, wdw)
    p_total = get_p_total(df_tr, wdw)
    mvalid = p_total > (.5*DT)
    sp_ratio = np.nan * np.zeros(len(df_tr))
    sp_ratio[mvalid] = s_total[mvalid]/p_total[mvalid]
    return sp_ratio

SONG_HEURS = {
    'N_S_ONSETS': get_n_s_onsets, 'N_P_ONSETS': get_n_p_onsets, 'N_B_ONSETS': get_n_b_onsets,
    'S_TOTAL': get_s_total, 'P_TOTAL': get_p_total, 'B_TOTAL': get_b_total,
    'MN_S_DUR': get_mn_s_dur, 'MN_P_DUR': get_mn_p_dur, 'MN_B_DUR': get_mn_b_dur,
    'SP_RATIO': get_sp_ratio,
}

In [5]:
# loop over trials
for ctr, df_tr in enumerate(dfs_tr):
    df_tr_heur = df_tr.copy()
    sys.stdout.write('.')
    
    for kheur, fheur in SONG_HEURS.items():
        for wdw in WDWS:
            col_name = f'{kheur}_{wdw}'
            
            # compute heuristic
            v = fheur(df_tr, wdw).astype(float)
            v[np.isinf(v)] = np.nan # replace infs with nans so they don't mess up fits later
            
            # store as new column in dataframe
            df_tr_heur[col_name] = v
            
    np.save(os.path.join(SAVE_DIR_HEUR, f'{SAVE_PFX_HEUR}_tr_{ctr+276}.npy'), np.array([{'df': df_tr_heur}]))

....................................................................................