A notebook for the development of code for the production of pain maps (determining neurons which respond to the pain stimulus) for a single subject

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import glob
import itertools
from pathlib import Path
import pickle

import matplotlib.pyplot as plt
import multiprocessing
import numpy as np
import pandas as pd
#from scipy.stats import ttest_rel

from janelia_core.dataprocessing.dataset import ROIDataset
from janelia_core.stats.regression import linear_regression_ols_estimator
from janelia_core.stats.regression import grouped_linear_regression_acm_stats
from janelia_core.stats.regression import grouped_linear_regression_ols_estimator
from janelia_core.stats.regression import grouped_linear_regression_acm_linear_restriction_stats
#from janelia_core.stats.permutation_tests import paired_grouped_perm_test

from keller_zlatic_vnc.data_processing import calc_dff
from keller_zlatic_vnc.data_processing import count_unique_subjs_per_transition
from keller_zlatic_vnc.data_processing import count_transitions
from keller_zlatic_vnc.data_processing import generate_standard_id_for_full_annots
from keller_zlatic_vnc.data_processing import generate_standard_id_for_volume
from keller_zlatic_vnc.data_processing import get_basic_clean_annotations_from_full
from keller_zlatic_vnc.data_processing import read_full_annotations
from keller_zlatic_vnc.whole_brain.pain import _mean_t_test
from keller_zlatic_vnc.whole_brain.pain import _mean_perm_test
from keller_zlatic_vnc.whole_brain.whole_brain_stat_functions import make_whole_brain_videos_and_max_projs

## Parameters go here

In [23]:
ps = dict()

# Folders containing a4 and a9 annotation data
#ps['annot_folders'] = [r'\\dm11\bishoplab\projects\keller_vnc\data\full_annotations\behavior_csv_cl_A4',
#                      r'\\dm11\bishoplab\projects\keller_vnc\data\full_annotations\behavior_csv_cl_A9',
#                      r'\\dm11\bishoplab\projects\keller_vnc\data\full_annotations\spontaneous_only_annotations']

# Subject we analyze
ps['analyze_subj'] = 'CW_18-02-15-L1'

ps['annot_folders'] = [r'\\dm11\bishoplab\projects\keller_vnc\data\full_annotations\em_volume_behavior_csv']

# File containing locations to registered volumes
#ps['volume_loc_file'] = r'\\dm11\bishoplab\projects\keller_vnc\data\experiment_data_locations.xlsx'
ps['volume_loc_file'] = r'\\dm11\bishoplab\projects\keller_vnc\data\EM_volume_experiment_data_locations.xlsx'

# List subjects we do not want to include in the analysis
ps['exclude_subjs'] = set(['CW_17-11-06-L2'])

# Subfolder containing the dataset for each subject
ps['dataset_folder'] = 'extracted'

# Base folder where datasets are stored 
ps['dataset_base_folder'] =r'K:\\SV4'

# Data to calculate Delta F/F for in each dataset
ps['f_ts_str'] = 'f_1_5_5'
ps['bl_ts_str'] = 'bl_1_5_5_long'

# Parameters for calculating dff
ps['background'] = 100
ps['ep'] = 20

# Specify the min and max duration of stimuli for events we include in the analysis
ps['min_stim_dur'] = 0
ps['max_stim_dur'] = 100

# Length of window we pull dff in from before the stimulus
ps['n_before_tm_pts'] = 3

# Specify if we align the after window to the end of the stimulus or the beginning of the stimulus, can be 
# either 'start' or 'end'
ps['after_aligned'] = 'end'

# Offset from the start of the window for dff after the event and the last stimulus timep point.  An offset of 0, 
# means the first time point in the window will be the last time point the stimulus was delevered
ps['after_offset'] = 1 

# Length of window we pull dff in from after the stimulus
ps['n_after_tm_pts'] = 3 

# Specify the type of test we perform.  Can be either 't' or 'perm'
ps['test_type'] = 'perm'

# Specify the number of permutations to use if we are performing a permutation test
ps['n_perms'] = 1000

# Folder where we should save results
ps['result_folder'] = r'A:\projects\keller_vnc\results\draft_single_subject_pain_maps'

# String to save with file names
ps['save_str'] = 'end_aligned_offset_1_t_perm'

# Roi group we are using - we need to provide this to the image that makes images and movies
ps['roi_group'] = 'rois_1_5_5'

## Get list of all subjects we can analyze

These are those we have registered volumes for and annotations and they are not in the excluded subjects

In [4]:
# Get list of all annotation files and the subjects they correspond to
annot_file_paths = list(itertools.chain(*[glob.glob(str(Path(folder) / '*.csv')) for folder in ps['annot_folders']]))
annot_file_names = [Path(p).name for p in annot_file_paths]
annot_subjs = [generate_standard_id_for_full_annots(fn) for fn in annot_file_names]

In [5]:
# Read in location of all registered volumes
def c_fcn(str):
    return str.replace("'", "")
converters = {0:c_fcn, 1:c_fcn}

volume_locs = pd.read_excel(ps['volume_loc_file'], header=1, usecols=[1, 2], converters=converters)
volume_subjs = [generate_standard_id_for_volume(volume_locs.loc[i,'Main folder'], 
                                                       volume_locs.loc[i,'Subfolder'])  for i in volume_locs.index]
volume_inds = [i for i in volume_locs.index]

## Determine where the annotation and volume data is for the subject we analyze

In [6]:
volume_i = np.argwhere(np.asarray(volume_subjs) == ps['analyze_subj'])[0][0]
annot_i = np.argwhere(np.asarray(annot_subjs) == ps['analyze_subj'])[0][0]

volume_main_folder = volume_locs.loc[volume_inds[volume_i], 'Main folder']
volume_sub_folder = volume_locs.loc[volume_inds[volume_i], 'Subfolder']
annot_file = annot_file_paths[annot_i]

## Read in the annotation data

In [28]:
annotations = read_full_annotations(annot_file)

## Down select to only stimulus events

In [29]:
keep_inds = [i for i in annotations.index if annotations['beh'][i] == 'S']
annotations = annotations.iloc[keep_inds]

In [32]:
durations = annotations['end'] - annotations['start'] + 1
good_durations = (durations >= ps['min_stim_dur']) & (durations <= ps['max_stim_dur'])
annotations = annotations[good_durations]

In [33]:
durations

13      6
69     15
117     6
163    15
198     6
240    15
270     6
306    15
334     6
348    15
377     6
403    15
422     6
424    15
458     6
461    15
487     6
496    15
dtype: int64

## Now we read in the $\frac{\Delta F}{F}$ data for the subject

In [None]:
print('Gathering neural data for subject.')
    
dataset_path = (Path(ps['dataset_base_folder']) / volume_main_folder / volume_sub_folder / 
                Path(ps['dataset_folder']) / '*.pkl')
dataset_file = glob.glob(str(dataset_path))[0]
    
with open(dataset_file, 'rb') as f:
    dataset = ROIDataset.from_dict(pickle.load(f))
            
# Calculate dff
f=dataset.ts_data[ps['f_ts_str']]['vls'][:]
b=dataset.ts_data[ps['bl_ts_str']]['vls'][:]
dff = calc_dff(f=f, b=b, background=ps['background'], ep=ps['ep'])
    
extracted_dff = dict()
for index in annotations.index:
    event_start = annotations['start'][index]
    event_stop = annotations['end'][index] 
        
    dff_before = np.mean(dff[event_start-ps['n_before_tm_pts']:event_start,:], axis=0)
        
    if ps['after_aligned'] == 'start':
        after_start_ind = event_start + ps['after_offset']
    elif ps['after_aligned'] == 'end': 
        after_start_ind = event_stop + ps['after_offset']
    else:
        raise('Unable to recogonize value of ps[after_aligned].')
    after_stop_ind = after_start_ind + ps['n_after_tm_pts']
        
    dff_after = np.mean(dff[after_start_ind:after_stop_ind,:], axis=0)

    extracted_dff[index] = (dff_before, dff_after)

## Remove any events where the $\Delta F /F$ window fell outside of the recorded data

In [None]:
bad_keys = [k for k, vl in extracted_dff.items() if np.all(np.isnan(vl[0]))]
print(bad_keys)
for key in bad_keys:
    del extracted_dff[key]
  
# Drop same events in annotations, even though we don't use this table anymore, just for good house keeping
annotations.drop(bad_keys, axis='index', inplace=True)

## Calculate stats

In [None]:
dff_before = np.stack([extracted_dff[i][0] for i in extracted_dff.keys()])
dff_after = np.stack([extracted_dff[i][1] for i in extracted_dff.keys()])

In [None]:
n_rois = dff_before.shape[1]
before_dff = [dff_before[:, roi_i] for roi_i in range(n_rois)]
after_dff = [dff_after[:, roi_i] for roi_i in range(n_rois)]

if __name__ ==  '__main__': 
    num_processors = multiprocessing.cpu_count()
    if num_processors > 1: 
        num_processors = num_processors - 1 # Leave one processor open for other processing on the machine
    pool=multiprocessing.Pool(processes = num_processors)
    if ps['test_type'] == 'perm':
        print('Performing permtuation tests.')
        mn_stats = pool.starmap(_mean_perm_test, zip(before_dff, after_dff, 
                                                     [ps['n_perms']]*len(before_dff)))
    elif ps['test_type'] == 't':
        print('Performing t tests.')
        mn_stats = pool.starmap(_mean_t_test, zip(before_dff, after_dff))
    else:
        raise(ValueError('test_type not recogonized'))

## Now visualize results

In [None]:
diff_vls = np.zeros(n_rois)
p_values = np.ones(n_rois) # Default value is 1, which is what we leave if we couldn't calculate a p-value b/c the 
                           # means before and after stimulus were too close 

for roi_i in range(n_rois):
    
    diff_vls[roi_i] = mn_stats[roi_i]['after_mn'] - mn_stats[roi_i]['before_mn']
    if not(np.isnan(mn_stats[roi_i]['p'])):  
        p_values[roi_i] = mn_stats[roi_i]['p']
    else:
        p_values[roi_i] = 1.0
        


In [None]:
beh_stats = {'pain': {'beta': diff_vls, 'p_values': p_values}}
rs = {'beh_stats': beh_stats}

In [None]:
make_whole_brain_videos_and_max_projs(rs=rs, save_folder_path=Path(ps['result_folder']),
                                      overlay_files=None, save_supp_str=ps['save_str'],
                                      gen_mean_movie=False, gen_mean_tiff=False, 
                                      gen_coef_movies=False, gen_coef_tiffs=False, 
                                      gen_p_value_movies=False, gen_p_value_tiffs=False, 
                                      gen_filtered_coef_movies=True, gen_filtered_coef_tiffs=True, 
                                      gen_combined_movies=False, gen_combined_tiffs=False, 
                                      gen_combined_projs=False, gen_uber_movies=False, 
                                      p_vl_thresholds = [.05, .95], 
                                      ex_dataset_file=dataset_file, 
                                      roi_group=ps['roi_group'], 
                                      coef_lims=[0.0, 2.0])

In [None]:
l = list(zip([0, 1, 2], 'a'))

In [None]:
l