This is a notebook to check the correspondance between stimulus annotations in Chen and Nadines records of annotations.

In [None]:
import copy
import glob 
from pathlib import Path

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

from keller_zlatic_vnc.data_processing import find_before_and_after_events
from keller_zlatic_vnc.data_processing import generate_standard_id_for_full_annots
from keller_zlatic_vnc.data_processing import read_full_annotations
from keller_zlatic_vnc.data_processing import read_trace_data

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
%matplotlib notebook

## Parameters go here

In [None]:
ps = dict()

# The file specifying which subjects we should include in the analysis
ps['subject_file'] = r'/Volumes/bishoplab/projects/keller_vnc/data/single_cell/subjects.csv'

# Location of files provided by Chen containing the raw fluorescence traces for the single cells
#ps['trace_base_folder'] = r'/Volumes/bishoplab/projects/keller_vnc/data/single_cell/single_cell_traces'
#ps['a00c_trace_folder'] = 'A00c'
#ps['basin_trace_folder'] = 'Basin'
#ps['handle_trace_folder'] = 'Handle'

# Location of folders containing annotations
ps['a4_annot_folder'] = r'/Volumes/bishoplab/projects/keller_vnc/data/full_annotations/behavior_csv_cl_A4'
ps['a9_annot_folder'] = r'/Volumes/bishoplab/projects/keller_vnc/data/full_annotations/behavior_csv_cl_A9'

# Parameters for declaring preceeding quiet behaviors
ps['pre_q_th'] = 50


## Read in the basic data for each subject

In [None]:
# Get the list of all subjects we need to process
subjects = list(pd.read_csv(ps['subject_file'])['Subject'])

In [None]:
#data = read_trace_data(subjects=subjects, 
#                   a00c_trace_folder=Path(ps['trace_base_folder'])/ps['a00c_trace_folder'], 
#                   handle_trace_folder=Path(ps['trace_base_folder'])/ps['handle_trace_folder'], 
#                   basin_trace_folder=Path(ps['trace_base_folder'])/ps['basin_trace_folder'])

## Find stimulus events for the subjects we are analyzing

In [None]:
# Get list of subjects we have annotations for
a4_file_paths = glob.glob(str(Path(ps['a4_annot_folder']) / '*.csv'))
a9_file_paths = glob.glob(str(Path(ps['a9_annot_folder']) / '*.csv'))

n_annot_files = len(a4_file_paths) + len(a9_file_paths)
a4_files = np.zeros(n_annot_files, dtype=np.bool)
a4_files[0:len(a4_file_paths)] = True

annot_file_paths = a4_file_paths + a9_file_paths

annot_file_names = [Path(p).name for p in annot_file_paths]
annot_subjs = [generate_standard_id_for_full_annots(fn) for fn in annot_file_names]

In [None]:
# Get stimulus events for each subject we analyze
analysis_subjs = list(data['subject_id'].unique())
subj_events = pd.DataFrame()

for subj in analysis_subjs:
    
    # Find the annotations for this subject
    ind = np.argwhere(np.asarray(annot_subjs) == subj)
    if len(ind) == 0:
        raise(RuntimeError('Unable to find annotations for subject ' + subj + '.'))
    else:
        ind = ind[0][0]
        
    # Load the annotations for this subject
    tbl = read_full_annotations(annot_file_paths[ind])
    
    # Pull out stimulus events for this subject, noting what comes before and after
    stim_tbl = copy.deepcopy(tbl[tbl['beh'] == 'S'])
    stim_tbl.insert(0, 'subject_id', subj)
    stim_tbl.insert(1, 'event_id', range(stim_tbl.shape[0]))
    if a4_files[ind] == True:
        stim_tbl.insert(2, 'manipulation_tgt', 'A4')
    else:
        stim_tbl.insert(2, 'manipulation_tgt', 'A9')
    before_after_tbl = find_before_and_after_events(events=stim_tbl, all_events=tbl)
    stim_annots = pd.concat([stim_tbl, before_after_tbl], axis=1)
    subj_events = subj_events.append(stim_annots, ignore_index=True)


## Get rid of any events where we could not classify the type of preceeding or succeeding behavior 

In [None]:
subj_events = subj_events.dropna()

## Mark preceeding and succeeding quiet events

In [None]:
delta_before = subj_events['start'] - subj_events['beh_before_end']
before_quiet_inds = delta_before > ps['pre_q_th']
subj_events.loc[before_quiet_inds, 'beh_before'] = 'Q'

## Get the number of time points between start of non-quiet preceeding behaviors and stimulus

In [None]:
non_quiet_inds = subj_events['beh_before'] != 'Q'
# -1 below to get number between 
n_tm_pts_between = subj_events.loc[non_quiet_inds, 'start'] - subj_events.loc[non_quiet_inds, 'beh_before_start'] - 1
n_tm_pts_between = n_tm_pts_between.to_numpy()

In [None]:
plt.figure()
plt.hist(n_tm_pts_between)

In [None]:
print('Mean # of Times Points Between start of behavior and stimulus onset: ' + str(np.mean(n_tm_pts_between)))

In [None]:
sorted_before = np.sort(delta_before.to_numpy())

In [None]:
plt.plot(np.arange(len(sorted_before)), sorted_before, 'r.')