Make a small curriculum of miniature fly datasets.

In [1]:
import numpy as np
import os
import pandas as pd
from scipy.signal import fftconvolve
import sys

In [2]:
FPS = 30.03
DT = 1/FPS

FSTRAIN = 'data_raw/fly/strains.csv'

TARG_BHV = 'MTN'
TWDWS = [.03, 1, 60]
TARGS = [f'{TARG_BHV}_MN_{twdw}' for twdw in TWDWS]

STRAINS = ['NM91', 'ZH23']
STRAIN_KEY = '_'.join(STRAINS).lower()

MSTRAINS = [(pd.read_csv(FSTRAIN)['STRAIN'] == strain) for strain in STRAINS]
MSTRAIN = np.any(MSTRAINS, axis=0)
ISTRAIN = MSTRAIN.nonzero()[0]

NTRIAL = MSTRAIN.sum()

# TAU_R = 1
TAU_R = 60

N = 20
PARAMS = {
    'TAU_R': np.random.uniform(TAU_R, TAU_R, N),  # seconds
    'TAU_A': np.random.uniform(.1, 2, N),  # seconds
    'X_S': np.random.uniform(0, 1, N),
    'X_P': np.random.uniform(0, 1, N),
}

LOOK_BACK = 500  # frames (30 FPS)

FDECIM = .005  # how much of the original data to actually keep (1,548,531 samples available total)

PFX = f'data_s5/fly_curric_mini/lookback_{LOOK_BACK}_tau_r_{TAU_R}'

if not os.path.exists(PFX):
    os.makedirs(PFX)
    os.makedirs(os.path.join(PFX, 'scrambled'))

In [3]:
def smlt_ma(i_s, i_p, params, dt):
    """MA: Multiplicative adaptive neuron."""
    tau_rs = params['TAU_R']
    tau_as = params['TAU_A']
    x_ss = params['X_S']
    x_ps = params['X_P']
    
    n = len(tau_rs)
    
    t = np.arange(len(i_s))*dt
    rs = np.nan*np.zeros((len(t), n))
    
    rs[0, :] = 0
    a_s = np.zeros(n)
    a_p = np.zeros(n)
    
    for ct, t_ in enumerate(t[1:], 1):
        a_s += ((dt/tau_as) * (-a_s + i_s[ct]))
        a_p += ((dt/tau_as) * (-a_p + i_p[ct]))
        dr = (dt/tau_rs) * (-rs[ct-1, :] + (1 - a_s)*x_ss*i_s[ct] + (1 - a_p)*x_ps*i_p[ct])
        rs[ct, :] = rs[ct-1, :] + dr
    
    return rs

In [4]:
columns = ['fmtn', 'session', 'frame', 'song']
df_full = pd.read_csv('data_raw/fly/c_song_f_behav_true.csv')

df_trs = [df_full[df_full.ID == i] for i in ISTRAIN]
del df_full

paths_all = []

data_dicts = []
data_dicts_scrambled = []

for df_tr in df_trs:
    sys.stdout.write('.')

    frames = np.array(df_tr['FRAME']).astype(int)
    song = np.repeat('Q', len(df_tr))

    song[np.array(df_tr['S']) == 1] = 'S'
    song[np.array(df_tr['P']) == 1] = 'P'

    song = ''.join(song)

    i_s = (np.array(df_tr['S']) == 1).astype(float)
    i_p = (np.array(df_tr['P']) == 1).astype(float)

    rs = smlt_ma(i_s, i_p, PARAMS, DT)
    fmtn = np.mean(rs, axis=1)

    fmtn_scrambled = fmtn[np.random.permutation(len(fmtn))]

    for cframe, frame in enumerate(frames):
        
        song_till_now = song[:cframe]
        
        if len(song_till_now) < LOOK_BACK:
            prefix = ''.join(np.repeat('Q', LOOK_BACK-len(song_till_now)))
            song_till_now = prefix+song_till_now
            
        song_seg = song_till_now[-LOOK_BACK:]
        
        data_dict = {
            'fmtn': fmtn[cframe],
            'session': np.array(df_tr['ID']).astype(int)[cframe],
            'frame': frame,
            'song': song_seg,
        }

        data_dicts.append(data_dict)
        
        data_dict_scrambled = {
            'fmtn_scrambled': fmtn_scrambled[cframe],
            'session': np.array(df_tr['ID']).astype(int)[cframe],
            'frame': frame,
            'song': song_seg,
        }
        
        data_dicts_scrambled.append(data_dict_scrambled)

.......................................................................................

In [5]:
len(data_dicts)

1548531

In [5]:
# decimate the data dict
idx_decim = (np.random.rand(len(data_dicts)) < FDECIM).nonzero()[0]
data_dicts_dec = [data_dict for idx, data_dict in enumerate(data_dicts) if idx in idx_decim]
data_dicts_scrambled_dec = [data_dict for idx, data_dict in enumerate(data_dicts_scrambled) if idx in idx_decim]

print('')
df = pd.DataFrame(columns=columns, data=data_dicts_dec)
path = os.path.join(PFX, 'full.tsv')
df.to_csv(path, sep='\t', index=False, header=False)

df_scrambled = pd.DataFrame(columns=columns, data=data_dicts_scrambled_dec)
path_scrambled = os.path.join(PFX, 'scrambled', 'full.tsv')
df_scrambled.to_csv(path_scrambled, sep='\t', index=False, header=False)




In [6]:
for path_ in [path, path_scrambled]:
    sys.stdout.write(f'Loading {path_}...\n')
    df = pd.read_csv(path_, sep='\t', header=None)

    # split into training, val, and test (here val and test are same)
    nrow_train = int(len(df)*.8)
    df_train = df.iloc[:nrow_train, :]
    df_val = df.iloc[nrow_train:, :]
    df_test = df.iloc[nrow_train:, :]

    df_train.to_csv(path_[:-8] + 'train.tsv', sep='\t', header=False, index=False)
    df_val.to_csv(path_[:-8] + 'eval.tsv', sep='\t', header=False, index=False)
    df_test.to_csv(path_[:-8] + 'test.tsv', sep='\t', header=False, index=False)

Loading data_s5/fly_curric_mini/lookback_500_tau_r_60/full.tsv...
Loading data_s5/fly_curric_mini/lookback_500_tau_r_60/scrambled/full.tsv...
