This is a notebook to check the correspondance between stimulus annotations in Chen and Nadines records of annotations.

In [1]:
import copy
import glob 
from pathlib import Path

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

from keller_zlatic_vnc.data_processing import find_before_and_after_events
from keller_zlatic_vnc.data_processing import generate_standard_id_for_full_annots
from keller_zlatic_vnc.data_processing import read_full_annotations
from keller_zlatic_vnc.data_processing import read_trace_data
from keller_zlatic_vnc.data_processing import read_raw_transitions_from_excel



In [2]:
%load_ext autoreload
%autoreload 2

In [3]:
%matplotlib notebook

## Parameters go here

In [4]:
ps = dict()

# The file specifying which subjects we should include in the analysis
ps['subject_file'] = r'/Volumes/bishoplab/projects/keller_vnc/data/single_cell/subjects.csv'

# Location of files provided by Chen containing the raw fluorescence traces for the single cells
#ps['trace_base_folder'] = r'/Volumes/bishoplab/projects/keller_vnc/data/single_cell/single_cell_traces'
#ps['a00c_trace_folder'] = 'A00c'
#ps['basin_trace_folder'] = 'Basin'
#ps['handle_trace_folder'] = 'Handle'

# Location of folders containing annotations from Nadine
#ps['a4_annot_folder'] = r'/Volumes/bishoplab/projects/keller_vnc/data/full_annotations/behavior_csv_cl_A4'
#ps['a9_annot_folder'] = r'/Volumes/bishoplab/projects/keller_vnc/data/full_annotations/behavior_csv_cl_A9'

ps['a4_annot_folder'] = r'/Volumes/bishoplab/projects/keller_vnc/data/full_annotations/behavior_csv_cl_A4'
ps['a9_annot_folder'] = r'/Volumes/bishoplab/projects/keller_vnc/data/full_annotations/behavior_csv_cl_A9'


# Location of file containing annotations from Chen
#ps['chen_file'] = r'/Volumes/bishoplab/projects/keller_vnc/data/extracted_dff_v2/transition_list.xlsx'
ps['chen_file'] = r'/Volumes/bishoplab/projects/keller_vnc/data/extracted_dff_v2/transition_list_CW_11202021.xlsx'

# Parameters for declaring preceeding quiet behaviors
ps['pre_q_th'] = 50


## Read in Nadine's version of annotations for stimulations

In [5]:
# Get the list of all subjects we need to process
subjects = list(pd.read_csv(ps['subject_file'])['Subject'])

In [6]:
# Get list of subjects we have annotations for
a4_file_paths = glob.glob(str(Path(ps['a4_annot_folder']) / '*.csv'))
a9_file_paths = glob.glob(str(Path(ps['a9_annot_folder']) / '*.csv'))

n_annot_files = len(a4_file_paths) + len(a9_file_paths)
a4_files = np.zeros(n_annot_files, dtype=np.bool)
a4_files[0:len(a4_file_paths)] = True

annot_file_paths = a4_file_paths + a9_file_paths

annot_file_names = [Path(p).name for p in annot_file_paths]
annot_subjs = [generate_standard_id_for_full_annots(fn) for fn in annot_file_names]

In [7]:
# Get stimulus events for each subject we analyze
nadine_events = pd.DataFrame()

for subj in subjects:
    
    # Find the annotations for this subject
    ind = np.argwhere(np.asarray(annot_subjs) == subj)
    ind = ind[0][0]
        
    # Load the annotations for this subject
    tbl = read_full_annotations(annot_file_paths[ind])
    
    # Pull out stimulus events for this subject, noting what comes before and after
    stim_tbl = copy.deepcopy(tbl[tbl['beh'] == 'S'])
    stim_tbl.insert(0, 'subject_id', subj)
    stim_tbl.insert(1, 'event_id', range(stim_tbl.shape[0]))
    if a4_files[ind] == True:
        stim_tbl.insert(2, 'manipulation_tgt', 'A4')
    else:
        stim_tbl.insert(2, 'manipulation_tgt', 'A9')
    before_after_tbl = find_before_and_after_events(events=stim_tbl, all_events=tbl)
    stim_annots = pd.concat([stim_tbl, before_after_tbl], axis=1)
    nadine_events = nadine_events.append(stim_annots, ignore_index=True)


## Read in Chen's version of annotations for stimulations

In [8]:
chen_events = read_raw_transitions_from_excel(file=ps['chen_file'])
chen_events = chen_events.rename(columns={'Manipulation Start': 'start', 'Manipulation End': 'end'})

In [9]:
nadine_events['start'] += 1
nadine_events['end'] += 1
chen_events['start'] += 1
chen_events['end'] += 1

## Check for correspondance subject by subject

In [10]:
def tbls_match(tbl1, tbl2):
    """ Check if every event in tabe 1 has a match in table 2"""
    for row_i, row in tbl1.iterrows():
        start_match = (tbl2['start'] == row['start']).to_numpy()
        end_match = (tbl2['end'] == row['end']).to_numpy()
        
        if np.sum(start_match) == 1 and np.sum(end_match) == 1:
            if not np.all(start_match == end_match):
                return False
        else:
            return False
    
    return True

In [11]:
startrow = 0
with pd.ExcelWriter('/Users/bishopw/Desktop/compare_events/comparisons.xlsx') as writer:
    for subj in subjects:
        nadine_tbl = nadine_events[nadine_events['subject_id'] == subj]
        chen_tbl = chen_events[chen_events['subject_id'] == subj]
    
        if not (tbls_match(nadine_tbl, chen_tbl) and tbls_match(chen_tbl, nadine_tbl)):
        
            nadine_tbl[['subject_id', 'start', 'end']].to_excel(writer, startrow=startrow)
            chen_tbl[['subject_id', 'start', 'end']].to_excel(writer, startrow=startrow, startcol=5)
            startrow += nadine_tbl.shape[0] + 3