In [9]:
# Iman Wahle
# August 2019
# Bootstrap process to identify DSNs and DSNs for cells 
# in SF- and TF- varied drifting-grating experiments

In [None]:
import numpy as np
import pandas as pd
import os
import sys
import h5py
import progressbar

import matplotlib.pyplot as plt
%matplotlib inline

In [10]:
bfp = "/allen/programs/braintv/workgroups/nc-ophys/Iman/direction_flipping/"
sftffp = "/allen/programs/braintv/workgroups/nc-ophys/VisualCoding/analysis/sftf"

In [36]:
# load experiment list
targeted_expts = pd.read_csv(bfp + "targeted_manifest.csv")
sftf = targeted_expts[targeted_expts.stimulus_name=='VisCodingTargetedTFxSF']
sessions = sftf.ophys_session_id.unique()

In [37]:
len(sessions)

78

In [38]:
# load session info once
csv = pd.read_csv(bfp + "resources/dgsftf_events_all.csv")

In [39]:
# given a cell_id, returns a (2,2,n_trials) array
# that has cell responses at ((pref_dir, null_dir) x (pref_sftf, null_sftf) x 15 trials)

def get_cell_trials(mean_sweep_events, stim_table, pref_dir, null_dir, pref_sf, null_sf, pref_tf, null_tf, c):
    cell_trials = np.empty((2,2,12))
    cell_trials[:] = np.NaN
    cell_trials[0,0,:] = mean_sweep_events[(stim_table.Ori==pref_dir)&(stim_table.SF==pref_sf)&(stim_table.TF==pref_tf)][str(c)].values
    cell_trials[1,0,:] = mean_sweep_events[(stim_table.Ori==null_dir)&(stim_table.SF==pref_sf)&(stim_table.TF==pref_tf)][str(c)].values
    cell_trials[0,1,:] = mean_sweep_events[(stim_table.Ori==pref_dir)&(stim_table.SF==null_sf)&(stim_table.TF==null_tf)][str(c)].values
    cell_trials[1,1,:] = mean_sweep_events[(stim_table.Ori==null_dir)&(stim_table.SF==null_sf)&(stim_table.TF==null_tf)][str(c)].values
    return cell_trials

In [40]:
def get_dsi(tf, response):
    pref_dir = 0
    null_dir = 1
    pref = response[pref_dir, tf]
    null = response[null_dir, tf]
    return ((pref - null) / float(pref + null))

In [41]:
def bootstrap(cell_trials, niter, dsi_thresh):

#     # progress bar across iterations
#     bar = progressbar.ProgressBar(maxval=niter, \
#     widgets=[progressbar.Bar('=', '[', ']'), ' ', progressbar.Percentage()])
#     bar.start()
    
    # repeat sampling 1000 times
    drn_accum = 0
    dsn_accum = 0

    for i in range(niter):
        
        # generate response events stats with new sampling
        response_events_sample = np.empty((2, 2)) # (pref_dir=0, null_dir=1) x (pref_tf=0, null_tf=1)
        response_events_sample[:] = np.nan

        # calculate mean response across new sampling at each dir/tf combination
        for d in range(2): # pref_dir, null_dir
            for t in range(2): # pref_tf, null_tf

                # sample n_trials di x tf trials from mean_sweep_events
                n_trials = cell_trials.shape[2]
                sample_trials_idx = np.random.choice(n_trials, n_trials, replace=True)
                sample_mse = cell_trials[d,t,sample_trials_idx]

                # calculate stats
                response_events_sample[d,t] = np.mean(sample_mse)


        # DSI statistics
        DSI_pref = get_dsi(0, response_events_sample)
        DSI_null = get_dsi(1, response_events_sample)
        DSI_ratio = DSI_null / float(DSI_pref)
        
        # apply criteria
        check_drn = (DSI_ratio<0) & (DSI_pref>dsi_thresh)
        check_dsn = (DSI_pref>dsi_thresh)

        drn_accum += check_drn
        dsn_accum += check_dsn
            
#         bar.update(i)
        
#     bar.finish()

    return drn_accum, dsn_accum

In [42]:
niter = 1000
dsi_thresh = 0.25

# progress bar across iterations
bar = progressbar.ProgressBar(maxval=len(sessions), \
widgets=[progressbar.Bar('=', '[', ']'), ' ', progressbar.Percentage()])
bar.start()
    
    
results = pd.DataFrame(columns=('session_id', 'cell_id', 'n_cells', 'drn_accum', 'dsn_accum', 'is_drn', 'is_dsn'))
unprocessed_sessions = []
for s,session_id in enumerate(sessions):
    print session_id
    #get cells in dataframe for this session
    cells = csv[csv.session_id_y==session_id].id.values
    
    #get stim_table and mean_sweep_events
    datafile = os.path.join(sftffp, str(session_id)+"_sftf_analysis.h5")
    stim_table = pd.read_hdf(datafile, 'stim_table')
    mean_sweep_events = pd.read_hdf(datafile, 'mean_sweep_events')

    session_results = np.zeros((len(cells),2))
    # bootstrap each cell
    for c,cell in enumerate(cells):

        # collect pref/null conditions
        csv_idx = np.where(csv['id']==cell)[0][0]
        pref_dir = csv['pref_dir'].iloc[csv_idx]
        null_dir = csv['null_dir'].iloc[csv_idx]#np.mod(pref_dir+180, 360)
        pref_tf = csv['pref_tf'].iloc[csv_idx]
        pref_sf = csv['pref_sf'].iloc[csv_idx]
        null_tf = csv['pref_null_tf'].iloc[csv_idx]
        null_sf = csv['pref_null_sf'].iloc[csv_idx]
        if np.isnan(pref_dir):
            unprocessed_sessions.append(session_id)
            break
            
        # TODO: make sure that rows in stim_table appear in same order 
        cell_trials = get_cell_trials(mean_sweep_events, stim_table, \
                                      pref_dir, null_dir, \
                                      pref_sf, null_sf, \
                                      pref_tf, null_tf, c)
        
        drn_accum, dsn_accum = bootstrap(cell_trials, niter, dsi_thresh)
        
        results = results.append({'session_id': session_id, \
                        'cell_id' : cell, \
                        'n_cells' : len(cells), \
                        'drn_accum' : drn_accum, \
                        'dsn_accum' : dsn_accum, \
                        'is_drn' : drn_accum>niter*.95, \
                        'is_dsn' : dsn_accum>niter*.95}, \
                       ignore_index=True)
        
    bar.update(s)
bar.finish()
print "Could not process some sessions: "
print unprocessed_sessions

[                                                                        ]   0%

693163510
687293842


  
[                                                                        ]   1%

686708257


[=                                                                       ]   2%

691741646


[==                                                                      ]   3%

681698752


[===                                                                     ]   5%

692308988


[====                                                                    ]   6%

689705421


[=====                                                                   ]   7%

692799916




690759629




696247907




693163207




695169813




694862252




697392152




698783733




695633877




696130311




695746063




697608599




700599622




698999602




718830277




718204062




718651940




719893937




720757419




766321179




767441755




766830331




791540398




768253493




768939823




771835340




771658602




770127462




769667739




769667208




770687839




772172502




775065195




777331050




777309974




778292916




778299488




796527576




778165960




797356052




782420243




779940237




799645540




786264766




787347373




788822860




797559679




799774167




797612122




793855640




792316062




797729073




799787572




798725363




799890062




803063130




803237009




800151868




798920347




798938968




798500537




799095423




802548622




789848743




804919130




805615556




806172153




794590232




806835465




795204547




790856463




Could not process some sessions: 
[693163207, 697608599, 778292916, 799645540]





In [19]:
print sessions

[]


In [131]:
csv[csv.session_id_x==session_id].id.values == cells

array([ True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True])

In [43]:
results

Unnamed: 0,session_id,cell_id,n_cells,drn_accum,dsn_accum,is_drn,is_dsn
0,693163510,693314922,27,12,31,False,False
1,693163510,693314920,27,64,148,False,False
2,693163510,693314918,27,311,682,False,False
3,693163510,693314916,27,18,865,False,False
4,693163510,693314914,27,512,966,False,True
5,693163510,693314912,27,682,928,False,False
6,693163510,693314910,27,137,321,False,False
7,693163510,693314908,27,0,1000,False,True
8,693163510,693314906,27,15,740,False,False
9,693163510,693314904,27,485,990,False,True


In [45]:
results.to_csv(bfp + "results/bootstrapping/sftf_results_dsi25.csv")

In [44]:
print "DSI_pref>0.25, SFTF:"
print "    n_drns = " + str(sum(results.is_drn))
print "    n_dsns = " + str(sum(results.is_dsn))
print "    n_cells = " + str(sum(np.unique(results.n_cells)))
print "    n_drns/n_cells = " + str(sum(results.is_drn)/float(sum(np.unique(results.n_cells))))
print "    n_dsns/n_cells = " + str(sum(results.is_dsn)/float(sum(np.unique(results.n_cells))))
print "    n_drns/n_dsns = " + str(sum(results.is_drn)/float(sum(results.is_dsn)))


DSI_pref>0.25, SFTF:
    n_drns = 444
    n_dsns = 2751
    n_cells = 6344
    n_drns/n_cells = 0.0699873896595
    n_dsns/n_cells = 0.433638083228
    n_drns/n_dsns = 0.161395856052


In [121]:
sums=results.sum(axis = 0, skipna = True)


In [119]:
total_cells = sum(results.n_cells.unique())

In [127]:
print sums.is_drn/float(total_cells)
print sums.is_drn/float(sums.is_dsn)
print sums.is_dsn/float(total_cells)

0.06557377049180328
0.1641025641025641
0.39959016393442626
