In [1]:
from datetime import date
from glob import glob
import json
import math
import os
import pickle
import sys
import time

import gspread
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.lines import Line2D
from matplotlib.patches import Patch
from scipy import integrate, interpolate, stats

In [2]:
sys.path.append(r'C:\Users\lesliec\code')

In [3]:
from tbd_eeg.tbd_eeg.data_analysis.eegutils import EEGexp

In [4]:
from allensdk.core.mouse_connectivity_cache import MouseConnectivityCache

In [5]:
%matplotlib notebook

#### Functions

In [6]:
def find_closest_region(sunit_info, struct_tree, annot):
    ## Finds a grey matter region above/below an unknown region ##
    Vind = sunit_info.CCF_DV
    vent_sip = struct_tree.get_structures_by_id([annot[sunit_info.CCF_AP, Vind, sunit_info.CCF_ML]])[0]['structure_id_path']
    while not struct_tree.structure_descends_from(vent_sip[-1], 8):
        Vind += 1
        vent_sip = struct_tree.get_structures_by_id([annot[sunit_info.CCF_AP, Vind, sunit_info.CCF_ML]])[0]['structure_id_path']

    Dind = sunit_info.CCF_DV
    dors_sip = struct_tree.get_structures_by_id([annot[sunit_info.CCF_AP, Dind, sunit_info.CCF_ML]])[0]['structure_id_path']
    while not struct_tree.structure_descends_from(dors_sip[-1], 8):
        Dind -= 1
        dors_sip = struct_tree.get_structures_by_id([annot[sunit_info.CCF_AP, Dind, sunit_info.CCF_ML]])[0]['structure_id_path']

    if (Vind - sunit_info.CCF_DV) <= (sunit_info.CCF_DV - Dind):
        return struct_tree.get_structures_by_id([vent_sip[-1]])[0]['acronym']
    elif (Vind - sunit_info.CCF_DV) > (sunit_info.CCF_DV - Dind):
        return struct_tree.get_structures_by_id([dors_sip[-1]])[0]['acronym']

In [36]:
def get_region_from_children(test_id, parent_id, struct_tree):
    try:
        child_ind = np.nonzero([
            struct_tree.structure_descends_from(test_id, x) for x in struct_tree.child_ids([parent_id])[0]
        ])[0][0]
        return struct_tree.get_structures_by_id([struct_tree.child_ids([parent_id])[0][child_ind]])[0]['acronym']
    except:
        return struct_tree.get_structures_by_id([parent_id])[0]['acronym']

In [8]:
def get_parent_region(region_acronym, struct_tree):
    areas_of_interest = {
        'SM-TH': ['AV', 'CL', 'MD', 'PO', 'PF', 'VAL', 'VPL', 'VPM', 'VM'],
    }
    
    reg_id = struct_tree.get_structures_by_acronym([region_acronym])[0]['id']
    if struct_tree.structure_descends_from(reg_id, 567):
        if struct_tree.structure_descends_from(reg_id, 315):
            return get_region_from_children(reg_id, 315, struct_tree)
        elif struct_tree.structure_descends_from(reg_id, 698):
            return 'OLF'
        elif struct_tree.structure_descends_from(reg_id, 1089):
            return get_region_from_children(reg_id, 1089, struct_tree)
        elif struct_tree.structure_descends_from(reg_id, 703):
            return get_region_from_children(reg_id, 703, struct_tree)
        elif struct_tree.structure_descends_from(reg_id, 477):
            return 'STR'
        elif struct_tree.structure_descends_from(reg_id, 803):
            return 'PAL'
        else:
            return 'unassigned'
    elif struct_tree.structure_descends_from(reg_id, 343):
        if struct_tree.structure_descends_from(reg_id, 1129):
            if region_acronym == 'RT':
                return 'RT-TH'
            elif region_acronym in areas_of_interest['SM-TH']:
                return 'SM-TH'
            else:
                return 'other-TH'
        elif struct_tree.structure_descends_from(reg_id, 1097):
            return 'HY'
        else:
            return get_region_from_children(reg_id, 343, struct_tree)
    else:
        return 'unassigned'

In [9]:
def add_parent_region_to_df(unit_info_df, struct_tree, annot):
    ## First, make sure all names in region column correspond to a CCF region (removes nan values) ##
    adj_regions = unit_info_df['region'].values.copy()
    for indi, rowi in unit_info_df.iterrows():
        try:
            str_info = struct_tree.get_structures_by_acronym([rowi.region])[0]
        except KeyError:
            if rowi.depth <= 0: # unit was placed above brain
                new_region_id = annot[rowi.CCF_AP, np.nonzero(annot[rowi.CCF_AP, :, rowi.CCF_ML])[0][0], rowi.CCF_ML]
                adj_regions[indi] = struct_tree.get_structures_by_id([new_region_id])[0]['acronym']
            else:
                Lind = rowi.CCF_ML
                while annot[rowi.CCF_AP, rowi.CCF_DV, Lind] == 0:
                    Lind -= 1
                new_region_id = struct_tree.get_structures_by_id(
                    [annot[rowi.CCF_AP, rowi.CCF_DV, Lind]])[0]['structure_id_path'][-1]
                adj_regions[indi] = struct_tree.get_structures_by_id([new_region_id])[0]['acronym']
    unit_info_df['adj_region'] = adj_regions
    
    ## Second, re-assign any non-grey matter areas to the closest region ##
    adj_regions = unit_info_df['adj_region'].values.copy()
    for indi, rowi in unit_info_df.iterrows():
        reg_id = struct_tree.get_structures_by_acronym([rowi.adj_region])[0]['id']
        if not struct_tree.structure_descends_from(reg_id, 8):
            adj_regions[indi] = find_closest_region(rowi, struct_tree, annot)
    unit_info_df['adj_region'] = adj_regions
    
    ## Finally, assign a parent region to each adjusted CCF region ##
    parent_regions = unit_info_df['adj_region'].values.copy()
    for indi, rowi in unit_info_df.iterrows():
        parent_regions[indi] = get_parent_region(rowi.adj_region, struct_tree)
    unit_info_df['parent_region'] = parent_regions
    
    return unit_info_df.drop('adj_region', axis=1)

#### Load CCF structure tree

The resolution of the CCF matters!!!

In [10]:
mcc = MouseConnectivityCache(resolution=10)
str_tree = mcc.get_structure_tree()
annot, annot_info = mcc.get_annotation_volume()

### Load subjects

In [11]:
multisub_file = r"C:\Users\lesliec\OneDrive - Allen Institute\data\brain_states_subjects.csv"
subject_df = pd.read_csv(multisub_file, converters={'mouse': str}).astype({'analyze': bool})

In [13]:
subject_df[20:]

Unnamed: 0,exp_type,mouse,experiment,sweep_states,stim_depth,bad_chs,analyze,data_loc,CCF_res,notes
20,isoflurane,569062,estim_vis_2021-02-18_11-17-51,"awake,isoflurane,recovery",deep,613,True,F:\ZZmanuscript_eLife\mouse569062\estim_vis_20...,10,
21,isoflurane,569064,estim_vis_2021-04-08_10-28-24,"awake,isoflurane,recovery,recovery",deep,none,True,F:\ZZmanuscript_eLife\mouse569064\estim_vis_20...,10,
22,isoflurane,569068,estim_vis_2021-03-04_10-51-38,"awake,isoflurane,recovery,recovery",deep,none,True,F:\ZZmanuscript_eLife\mouse569068\estim_vis_20...,10,
23,isoflurane,569069,estim_vis2_2021-03-12_10-52-44,"awake,isoflurane,recovery,recovery",deep,7891011121314,True,F:\ZZmanuscript_eLife\mouse569069\estim_vis2_2...,10,
24,isoflurane,569070,estim1_2021-04-01_10-27-33,"awake,isoflurane,isoflurane,recovery","deep,deep,ignore,ignore",23413,False,F:\ZZmanuscript_eLife\mouse569070\estim1_2021-...,10,"anterior MOs with 2 depths, no brain area assi..."
25,isoflurane,569073,estim_vis_2021-04-15_10-27-22,"awake,isoflurane,isoflurane,recovery,recovery",deep,3413,True,F:\ZZmanuscript_eLife\mouse569073\estim_vis_20...,10,
26,isoflurane,571619,estim2_2021-03-19_10-09-01,"awake,awake,isoflurane,isoflurane","superficial,deep,deep,superficial",2345813,True,F:\ZZmanuscript_eLife\mouse571619\estim2_2021-...,10,anterior MOs with 2 depths
27,awake,666193,pilot_aw_2023-02-15_11-44-11,awake,deep,24,True,F:\psi_exp\mouse666193\pilot_aw_2023-02-15_11-...,25,
28,awake,666194,pilot_aw_2023-02-22_12-32-58,awake,deep,1314,True,F:\psi_exp\mouse666194\pilot_aw_2023-02-22_12-...,25,


## Test with a single subject

In [44]:
subrow = subject_df.iloc[21]
print(subrow.mouse)

if not subrow.analyze:
    print('Skipping {} - {} for now, missing data.\n'.format(subrow.mouse, subrow.exp_type))
else:
    exp = EEGexp(subrow.data_loc, preprocess=False, make_stim_csv=False)

569064
Experiment type: electrical and sensory stimulation


### Load unit info dataframe

In [45]:
## Load unit info ##
fn_units_info = os.path.join(exp.data_folder, 'evoked_data', 'all_units_info.csv')
if os.path.exists(fn_units_info):
    unit_info = pd.read_csv(fn_units_info)
    with open(os.path.join(exp.data_folder, 'evoked_data', 'units_event_spikes.pkl'), 'rb') as unit_file:
        all_unit_event_spikes = pickle.load(unit_file)
else:
    print('{} not found. Not analyzing this subject.'.format(fn_units_stats))

#### Test new function

In [46]:
new_unit_info = add_parent_region_to_df(unit_info, str_tree, annot)

In [47]:
np.unique(new_unit_info['parent_region'].values)

array(['ACA', 'HIP', 'HPF', 'MO', 'SS', 'VIS', 'other-TH'], dtype=object)

### Develop function

In [19]:
np.unique(unit_info['region'].values.astype(str))

array(['ACAd6a', 'ACAv5', 'ACAv6a', 'CA1', 'CA2', 'CA3', 'DG-mo', 'DG-sg',
       'HPF', 'LD', 'LGd-co', 'LGd-ip', 'LGd-sh', 'MOs5', 'MOs6a',
       'SSp-ll6b', 'VISp4', 'VISp5', 'VISp6a', 'VISp6b', 'alv', 'bsc',
       'ccb', 'cing', 'fiber tracts', 'fp', 'or'], dtype='<U12')

## First pass, make all units have a CCF-recognizable region

In [38]:
adj_regions = unit_info['region'].values.copy()
for indi, rowi in unit_info.iterrows():
    try:
        str_info = str_tree.get_structures_by_acronym([rowi.region])[0]
    except KeyError:
        if rowi.depth <= 0: # unit was placed above brain
            new_region_id = annot[rowi.CCF_AP, np.nonzero(annot[rowi.CCF_AP, :, rowi.CCF_ML])[0][0], rowi.CCF_ML]
            adj_regions[indi] = str_tree.get_structures_by_id([new_region_id])[0]['acronym']
        else:
            Lind = rowi.CCF_ML
            while annot[rowi.CCF_AP, rowi.CCF_DV, Lind] == 0:
                Lind -= 1
            new_region_id = str_tree.get_structures_by_id([annot[rowi.CCF_AP, rowi.CCF_DV, Lind]])[0]['structure_id_path'][-1]
            adj_regions[indi] = str_tree.get_structures_by_id([new_region_id])[0]['acronym']
unit_info['adj_region_1'] = adj_regions

In [39]:
np.unique(unit_info['adj_region_1'].values)

array(['ACAd6a', 'ACAv5', 'ACAv6a', 'CA1', 'CA2', 'CA3', 'DG-mo', 'DG-sg',
       'HPF', 'LD', 'LGd-co', 'LGd-ip', 'LGd-sh', 'MOs5', 'MOs6a',
       'SSp-ll6b', 'VISp4', 'VISp5', 'VISp6a', 'VISp6b', 'alv', 'bsc',
       'ccb', 'cing', 'fiber tracts', 'fp', 'or'], dtype=object)

## Second pass, re-label units that fall outside of the grey matter

In [40]:
adj_regions = unit_info['adj_region_1'].values.copy()
for indi, rowi in unit_info.iterrows():
    reg_id = str_tree.get_structures_by_acronym([rowi.adj_region_1])[0]['id']
    if not str_tree.structure_descends_from(reg_id, 8):
        adj_regions[indi] = find_closest_region(rowi, str_tree, annot)
unit_info['adj_region_2'] = adj_regions

In [41]:
np.unique(unit_info['adj_region_2'].values)

array(['ACAd6a', 'ACAv5', 'ACAv6a', 'CA1', 'CA2', 'CA3', 'DG-mo', 'DG-sg',
       'HPF', 'LD', 'LGd-co', 'LGd-ip', 'LGd-sh', 'MOs5', 'MOs6a',
       'SSp-ll6b', 'VISp4', 'VISp5', 'VISp6a', 'VISp6b'], dtype=object)

## Finally, assign a defined parent region to each unit area

In [42]:
parent_regions = unit_info['adj_region_2'].values.copy()
for indi, rowi in unit_info.iterrows():
    parent_regions[indi] = get_parent_region(rowi.adj_region_2, str_tree)
unit_info['parent_region'] = parent_regions

In [43]:
unit_info.head()

Unnamed: 0,unit_id,probe,peak_ch,depth,spike_duration,region,CCF_AP,CCF_DV,CCF_ML,adj_region_1,adj_region_2,parent_region
0,B2,probeB,0,3700,0.467002,VAL,260,179,179,VAL,VAL,SM-TH
1,B3,probeB,1,3700,0.549414,VAL,260,179,179,VAL,VAL,SM-TH
2,B6,probeB,1,3700,0.425796,VAL,260,179,179,VAL,VAL,SM-TH
3,B9,probeB,2,3680,0.480737,VAL,260,178,179,VAL,VAL,SM-TH
4,B10,probeB,4,3660,0.480737,VAL,260,177,179,VAL,VAL,SM-TH


In [43]:
unit_info[155:175]

Unnamed: 0,unit_id,probe,peak_ch,depth,spike_duration,region,CCF_AP,CCF_DV,CCF_ML,adj_region_1,adj_region_2,parent_region
155,C147,probeC,60,2900,0.467002,fiber tracts,779,294,338,fiber tracts,LGd-sh,other-TH
156,C149,probeC,61,2900,0.439531,fiber tracts,779,294,338,fiber tracts,LGd-sh,other-TH
157,C692,probeC,62,2880,0.508208,bsc,779,293,337,bsc,LGd-sh,other-TH
158,C150,probeC,62,2880,0.453266,bsc,779,293,337,bsc,LGd-sh,other-TH
159,C153,probeC,63,2880,0.604355,bsc,779,293,337,bsc,LGd-sh,other-TH
160,C154,probeC,64,2860,0.439531,bsc,780,291,337,bsc,HPF,HPF
161,C155,probeC,64,2860,0.453266,bsc,780,291,337,bsc,HPF,HPF
162,C156,probeC,65,2860,0.439531,bsc,780,291,337,bsc,HPF,HPF
163,C157,probeC,65,2860,0.453266,bsc,780,291,337,bsc,HPF,HPF
164,C161,probeC,67,2840,0.398325,fiber tracts,780,289,337,fiber tracts,HPF,HPF


Structure tree functions

In [30]:
str_tree.get_structures_by_acronym(['HPF'])

[{'acronym': 'HPF',
  'graph_id': 1,
  'graph_order': 454,
  'id': 1089,
  'name': 'Hippocampal formation',
  'structure_id_path': [997, 8, 567, 688, 695, 1089],
  'structure_set_ids': [2,
   112905828,
   691663206,
   12,
   184527634,
   112905813,
   687527670,
   114512891,
   114512892],
  'rgb_triplet': [126, 208, 75]}]

In [32]:
str_tree.get_structures_by_id([0])

[None]