### This script loads subjects with probes, gets evoked activity for all units, analyzes responsiveness, and saves the files in each subject's data folder.

In [1]:
import os
import sys
import json
import time
import gspread
import pickle
import numpy as np
import pandas as pd
from scipy import signal, fftpack, stats, ndimage
import statsmodels.stats.multitest as multitest

In [2]:
sys.path.append(r'C:\Users\lesliec\code')

In [3]:
from tbd_eeg.tbd_eeg.data_analysis.eegutils import EEGexp
from tbd_eeg.tbd_eeg.data_analysis.Utilities.utilities import (
    get_stim_events,
    get_evoked_traces,
    get_evoked_firing_rates,
    find_nearest_ind
)
from allensdk.brain_observatory.ecephys.lfp_subsampling.subsampling import remove_lfp_offset
from allensdk.core.mouse_connectivity_cache import MouseConnectivityCache

In [4]:
%matplotlib notebook

Load CCF for identifying cortical areas

In [5]:
mcc = MouseConnectivityCache(resolution=10)
str_tree = mcc.get_structure_tree()

Load Zap_Zip-log_exp to get metadata for experiments

In [6]:
_gc = gspread.service_account() # need a key file to access the account
_sh = _gc.open('Zap_Zip-log_exp') # open the spreadsheet
_df = pd.DataFrame(_sh.sheet1.get()) # load the first worksheet
zzmetadata = _df.T.set_index(0).T # put it in a nicely formatted dataframe

Define areas of interest to plot population activity

In [7]:
areas_of_interest = {
    'MO': [
        'MOp1', 'MOp2/3', 'MOp5', 'MOp6a', 'MOp6b',
        'MOs1', 'MOs2/3', 'MOs5', 'MOs6a', 'MOs6b'
    ],
    'ACA': [
        'ACAd1', 'ACAd2/3', 'ACAd5', 'ACAd6a', 'ACAd6b',
        'ACAv1', 'ACAv2/3', 'ACAv5', 'ACAv6a', 'ACAv6b'
    ],
    'SS': [
        'SSp-bfd1', 'SSp-bfd2/3', 'SSp-bfd4', 'SSp-bfd5', 'SSp-bfd6a', 'SSp-bfd6b',
        'SSp-ll1', 'SSp-ll2/3', 'SSp-ll4', 'SSp-ll5', 'SSp-ll6a', 'SSp-ll6b',
        'SSp-tr1', 'SSp-tr2/3', 'SSp-tr4', 'SSp-tr5', 'SSp-tr6a', 'SSp-tr6b'
    ],
    'VIS': [
        'VISp1', 'VISp2/3', 'VISp4', 'VISp5', 'VISp6a', 'VISp6b',
        'VISam1', 'VISam2/3', 'VISam4', 'VISam5', 'VISam6a', 'VISam6b',
        'VISpm1', 'VISpm2/3', 'VISpm4', 'VISpm5', 'VISpm6a', 'VISpm6b',
        'VISrl1', 'VISrl2/3', 'VISrl4', 'VISrl5', 'VISrl6a', 'VISrl6b',
    ],
    'MO-TH': [
        'AV', 'CL', 'MD', 'PO', 'RT', 'VAL', 'VPL', 'VPM', 'VM'
    ],
}

area_colors = {
    'MO': 'blue',
    'ACA': 'deepskyblue',
    'SS': 'blueviolet',
    'VIS': 'green',
    'MO-TH': 'steelblue',
}

In [8]:
state_colors = {
    'awake': (120/255, 156/255, 74/255),
    'anesthetized': (130/255, 122/255, 163/255),
    'recovery': (93/255, 167/255, 229/255)
}

#### Functions

In [9]:
def classify_response(row, alpha=0.05):
    if row['corr_pval'] < alpha and row['delta_spike_count'] > 0:
        return 'excited'
    elif row['corr_pval'] < alpha and row['delta_spike_count'] < 0:
        return 'inhibited'
    else:
        return 'ns'

### Load subjects from file

In [10]:
with open(r'C:\Users\lesliec\OneDrive - Allen Institute\data\all_iso_subjects_wPROBES.json') as subjects_file:
    multi_sub_dict = json.load(subjects_file)

In [11]:
for group, group_subs in multi_sub_dict.items():
    print(group)
    print('')
    for mouse_num, mdata in group_subs.items():
        print(' {}'.format(mouse_num))
        mdata['exp'] = EEGexp(mdata['data_loc'], preprocess=False, make_stim_csv=False)
        print('')
    print('')

MOs_superficial

 546655
Experiment type: electrical and sensory stimulation

 575102
Experiment type: electrical and sensory stimulation
SomnoSuite log file not found.

 571619
Experiment type: electrical stimulation
SomnoSuite log file not found.


MOs_deep

 551399
Experiment type: electrical stimulation
SomnoSuite log file not found.

 551397
Experiment type: electrical and sensory stimulation
Body camera file not found.
Pupil camera file not found.

 569062
Experiment type: electrical and sensory stimulation

 569068
Experiment type: electrical and sensory stimulation

 569069
Experiment type: electrical and sensory stimulation

 569064
Experiment type: electrical and sensory stimulation
SomnoSuite log file not found.

 569073
Experiment type: electrical and sensory stimulation

 571619
Experiment type: electrical stimulation
SomnoSuite log file not found.


SSp_superficial

 571620
Experiment type: electrical and sensory stimulation
SomnoSuite log file not found.

 586466
Experim

## Set parameters

In [12]:
current_level = ['low', 'medium', 'high']
test_states = ['awake', 'anesthetized']

overwrite_existing_files = False
rest_trials_only = True
match_trial_nums = True

raster_window = [-1.0, 1.0]
response_window = {
    'early': [0.002, 0.025],
    'mid': [0.025, 0.15],
    'late': [0.15, 0.3],
}
## For calculating SDF ##
time_bin = 0.0001 # size of time bins (s)
bins = np.arange(raster_window[0], raster_window[1] + time_bin, time_bin)
timex = bins[:-1] + time_bin/2
sigshort = 5 # Gaussian kernel length (Butovas&Schwarz2003 use short kernel for first excitatory response)
siglong = 50 # Gaussian kernel length (Butovas&Schwarz2003 use long kernel for rest of response)

# Gather data for all subjects and all units (RS & FS)

In [13]:
for group, group_subs in multi_sub_dict.items():
    print(group)
    print('')
    for mouse_num, mdata in group_subs.items():
        print(' {}'.format(mouse_num))
        ## mdata['exp'].data_folder is the recording1 folder ##
        
        ## Set filenames and check if exist ##
        evoked_folder = os.path.join(mdata['exp'], 'evoked_data')
        if not os.path.exists(evoked_folder):
            os.mkdir(evoked_folder)
        fn_unit_activity = os.path.join(evoked_folder, 'units_evoked_activity_' + group + '.pkl')
        fn_unit_info_csv = os.path.join(evoked_folder, 'units_info_' + group + '.csv')
        fn_unit_resp_csv = os.path.join(evoked_folder, 'units_info_wresp_' + group + '.csv')
        if overwrite_existing_files:
            pass # will overwrite all subjects' files
        else:
            if os.path.exists(fn_unit_resp_csv):
                print('  {} already exists, skipping analysis.\n'.format(fn_unit_resp_csv))
                continue
        
        
        ## Grab exp metadata from Zap_Zip-log_exp ##
        exp_meta = zzmetadata[(
            (zzmetadata['mouse_name'].str.contains(mdata['exp'].mouse)) &
            (zzmetadata['exp_name'].str.contains(os.path.basename(os.path.dirname(mdata['exp'].experiment_folder))))
        )].squeeze()
        currentstr = exp_meta['Current (uA)'].replace(' ','')
        currents_list = []
        for char in currentstr.split('/'):
            if char.isdecimal():
                currents_list.append(char)

        ## Load stim log and running signal ##
        stim_log = pd.read_csv(mdata['exp'].stimulus_log_file)
        stim_log = stim_log.astype({'parameter': str})
        run_signal, run_timestamps = mdata['exp'].load_running()
        
        ## Load probe data ##
        probe_list = [x.replace('_sorted', '') for x in mdata['exp'].experiment_data if 'probe' in x]
        probe_locs = np.ones((len(probe_list)), dtype=bool)
        probe_unit_data = {}
        for pbi, probei in enumerate(probe_list):
            print(probei)
            probe_unit_data[probei] = {}
            
            ## Load probe_info.json ##
            with open(mdata['exp'].ephys_params[probei]['probe_info']) as data_file:
                data = json.load(data_file)
            npx_allch = np.array(data['channel'])
            surface_ch = int(data['surface_channel'])
            allch_z = np.array(data['vertical_pos'])
            ref_mask = np.array(data['mask'])
            npx_chs = np.array([x for x in npx_allch if ref_mask[x] and x <= surface_ch])
            probe_unit_data[probei]['ch_depths'] = allch_z[surface_ch] - allch_z
            
            ## Select units and get peak chs ##
            select_units, peak_chs, unit_metrics = mdata['exp'].get_probe_units(probei)
            unit_metrics['cell_type'] = unit_metrics['duration'].apply(lambda x: 'FS' if x <= 0.4 else 'RS')
            ## Sort units ##
            probe_unit_data[probei]['units'] = select_units[np.squeeze(np.argsort(peak_chs))]
            probe_unit_data[probei]['chs'] = peak_chs[np.squeeze(np.argsort(peak_chs))]
            probe_unit_data[probei]['cell_type'] = unit_metrics['cell_type'].values[np.squeeze(np.argsort(peak_chs))]
            if 'area' in unit_metrics.columns:
                probe_unit_data[probei]['areas'] = unit_metrics.area.values[np.squeeze(np.argsort(peak_chs))]
            else:
                print('  {} unit metrics file does not have area assignments.'.format(probei))
                probe_locs[pbi] = False
                continue
            ## Load spike times and cluster ids ##
            probe_unit_data[probei]['spike_times'] = np.load(mdata['exp'].ephys_params[probei]['spike_times'])
            probe_unit_data[probei]['spike_clusters'] = np.load(mdata['exp'].ephys_params[probei]['spike_clusters'])
        
        if probe_locs.any():
            pass
        else:
            print('  NO area assignments for any probes, not analyzing.')
            continue
        
        ## Get trial times ##
        all_event_times = {}
        for leveli, parami in zip(current_level, currents_list):
            all_event_times[leveli] = {}
            for statei in test_states:
                sweeps = mdata['states'][statei]
                if sweeps:
                    state_events = []
                    for sweepi in sweeps:
                        events = get_stim_events(stim_log, 'biphasic', parami, sweepi)
                        if len(events) == 0:
                            continue
                        state_events.append(events)
                    state_events = np.concatenate(state_events)
                    ## Get mean speed ##
                    rinds = np.arange(-int(0.5 * 100), int(0.5 * 100))
                    event_inds = np.array([find_nearest_ind(run_timestamps, x) for x in state_events])
                    mean_speed = np.mean(run_signal[np.repeat([rinds], len(event_inds), axis=0).T + event_inds], axis=0)
                    if rest_trials_only:
                        all_event_times[leveli][statei] = state_events[mean_speed == 0]
                    else:
                        all_event_times[leveli][statei] = state_events
        
        ## Get unit info and evoked activity (spikes and firing rate), save files ##
        start = time.time()
        all_units_info = []
        unit_activity = {}
        for probei, unit_data in probe_unit_data.items():
            if 'areas' not in unit_data.keys():
                print('  not including units from {}...no area assignments.'.format(probei))
                continue
            for unitind, uniti in enumerate(unit_data['units']):
                unit_name = probei[-1] + str(uniti)
                unit_info = [] # unit_id, cell_type, depth, region, parent
                unit_activity[unit_name] = {}
                ## Gather unit info ##
                unit_info.append(unit_name) ## get unit_id ##
                unit_info.append(unit_data['cell_type'][unitind]) ## get cell_type ##
                unit_info.append(unit_data['ch_depths'][unit_data['chs'][unitind]]) ## get depth ##
                unit_region = unit_data['areas'][unitind]
                unit_info.append(unit_region) ## get region ##
                parent_region = [key for key in list(areas_of_interest.keys()) if unit_region in areas_of_interest[key]]
                if len(parent_region) == 1:
                    unit_info.append(parent_region[0])
                else:
                    unit_info.append('notROI')
                
                ## Get evoked spikes and SDFs ##
                spikesi = np.squeeze(unit_data['spike_times'][unit_data['spike_clusters'] == uniti])
                baselineFR = np.zeros((len(test_states), len(current_level)), dtype=float)
                for jj, leveli in enumerate(current_level):
                    NUM_TRIALS = min([len(all_event_times[leveli][x]) for x in test_states])
                    unit_activity[unit_name][leveli] = {}
                    for ii, statei in enumerate(test_states):
                        unit_activity[unit_name][leveli][statei] = {}
                        if match_trial_nums:
                            events = all_event_times[leveli][statei][:NUM_TRIALS]
                        else:
                            events = all_event_times[leveli][statei]
                        ## Event spike raster ##
                        event_raster = []
                        for eventi in events:
                            spikeinds = np.nonzero(
                                (spikesi > eventi + raster_window[0]) & (spikesi < eventi + raster_window[1]))[0]
                            event_raster.append(spikesi[spikeinds] - eventi)
                        ## Evoked firing rate ##
                        spike_counts, edges = np.histogram(np.concatenate(event_raster), bins)
                        firing_rate = spike_counts / (time_bin * len(events))
                        ## Store unit activity ##
                        unit_activity[unit_name][leveli][statei]['trial_count'] = len(events)
                        unit_activity[unit_name][leveli][statei]['event_spikes'] = event_raster
                        unit_activity[unit_name][leveli][statei]['SDFshort'] = [
                            timex, ndimage.gaussian_filter(firing_rate, sigma=sigshort, output=float)]
                        unit_activity[unit_name][leveli][statei]['SDFlong'] = [
                            timex, ndimage.gaussian_filter(firing_rate, sigma=siglong, output=float)]
                        ## Get baseline firing rate ##
                        baselineFR[ii,jj] = np.mean(firing_rate[timex < 0])
                unit_info.append(np.mean(baselineFR, axis=1)[0]) ## awake BLFR ##
                unit_info.append(np.mean(baselineFR, axis=1)[1]) ## anesthetized BLFR ##
                ## Add unit_info to all_units_info ##
                all_units_info.append(unit_info)

        all_units_info_df = pd.DataFrame(
            all_units_info, columns=['unit_id', 'cell_type', 'depth', 'region', 'parent', 'BLFR_awake', 'BLFR_anesthetized'])
        ## Save the files to mouse's recordingX folder ##
        pickle.dump(unit_activity, open(fn_unit_activity, 'wb'))
        all_units_info_df.to_csv(fn_unit_info_csv, index=False)
        
        end = time.time()
        print('  Time to get evoked unit activity and save .pkl file: {:.2f} min'.format((end-start)/60))
        del unit_info
        
        start = time.time()
        ## Loop through all units to get responsiveness stats and save ##
        unit_response_stats = []
        for ind, row in all_units_info_df.iterrows():
            for leveli in current_level:
                for statei in test_states:
                    uniti_activity = unit_activity[row.unit_id][leveli][statei]
                    for window, rwin in response_window.items():
                        unit_info = row.tolist()
                        unit_info.extend([leveli, statei, window]) ## add stim level, state, response_stage ##

                        ## Responsive p-value and spike count difference ##
                        pre_spcounts = np.zeros(uniti_activity['trial_count'], dtype=int)
                        post_spcounts = np.zeros(uniti_activity['trial_count'], dtype=int)
                        for j, event_spikes in enumerate(uniti_activity['event_spikes']):
                            pre_spcounts[j] = np.sum((event_spikes >= -rwin[1]) & (event_spikes <= -rwin[0]))
                            post_spcounts[j] = np.sum((event_spikes >= rwin[0]) & (event_spikes <= rwin[1]))
                        wstat, pval = stats.wilcoxon(x=post_spcounts, y=pre_spcounts, zero_method='zsplit')
                        spcount = np.mean(post_spcounts) - np.mean(pre_spcounts)
                        unit_info.extend([spcount, pval]) ## spike count and p-value ##

                        ## Get firing rate for window ##
                        testinds = np.nonzero((timex > rwin[0]) & (timex < rwin[1]))[0]
                        SDFsh = uniti_activity['SDFshort'][1][testinds]
                        SDFl = uniti_activity['SDFlong'][1][testinds]
                        ublfr = row['BLFR_' + statei]
                        if window == 'early':
                            peaks, props = signal.find_peaks(SDFsh, height=ublfr)
                            if len(props['peak_heights']) > 0:
                                unit_info.append(np.max(props['peak_heights']))
                            else:
                                unit_info.append(np.nan)
                        elif window == 'mid':
                            unit_info.append(np.mean(SDFsh))
                        elif window == 'late':
                            peaks, props = signal.find_peaks(SDFl, height=ublfr)
                            if len(props['peak_heights']) > 0:
                                unit_info.append(np.max(props['peak_heights']))
                            else:
                                unit_info.append(np.nan)

                        ## Store unit metrics ##
                        unit_response_stats.append(unit_info)
        all_unit_stats = pd.DataFrame(unit_response_stats, columns=[
            'unit_id', 'cell_type', 'depth', 'region', 'parent', 'BLFR_awake', 'BLFR_anesthetized',
            'stim_level', 'state', 'response_stage', 'delta_spike_count', 'pval', 'evokedFR',
        ])
        ## Adjust p-values for mulitple comparisons ##
        original_pvals = all_unit_stats.pval.values
        rej, corr_pvals = multitest.fdrcorrection(original_pvals)
        all_unit_stats['corr_pval'] = corr_pvals
        ## Classify response (excited/inhibited) (default alpha=0.05) ##
        all_unit_stats['response_type'] = all_unit_stats.apply(lambda row: classify_response(row), axis=1)
        ## Save the files ##
        all_unit_stats.to_csv(fn_unit_resp_csv, index=False)
        
        end = time.time()
        print('  Time to test responsive units and save .csv file: {:.2f} min'.format((end-start)/60))
            
        ### After each subject, delete common variables ###
        del stim_log
        del run_signal
        del run_timestamps
        del probe_unit_data
        del all_event_times
        del all_units_info
        del all_units_info_df
        del unit_activity
        del unit_response_stats
        del all_unit_stats
        print('')

MOs_superficial

 546655
  F:\EEG_exp\mouse546655\estim_vis_2020-10-23_11-01-14\experiment1\recording1\units_info_wresp_MOs_superficial.csv already exists, skipping analysis.

 575102
  F:\EEG_exp\mouse575102\estim_vis_2021-06-03_11-25-01\experiment1\recording1\units_info_wresp_MOs_superficial.csv already exists, skipping analysis.

 571619
  F:\EEG_exp\mouse571619\estim2_2021-03-19_10-09-01\experiment1\recording1\units_info_wresp_MOs_superficial.csv already exists, skipping analysis.

MOs_deep

 551399
  F:\EEG_exp\mouse551399\estim_2021-01-28_13-59-09\experiment1\recording1\units_info_wresp_MOs_deep.csv already exists, skipping analysis.

 551397
  F:\EEG_exp\mouse551397\estim_vis_2021-02-11_10-45-23\experiment1\recording1\units_info_wresp_MOs_deep.csv already exists, skipping analysis.

 569062
  F:\EEG_exp\mouse569062\estim_vis_2021-02-18_11-17-51\experiment1\recording1\units_info_wresp_MOs_deep.csv already exists, skipping analysis.

 569068
  F:\EEG_exp\mouse569068\estim_vis_2021