This creates a .csv file containing units from specified experiments. The .csv will be compatible with code to plot all units in brainrender.

In [1]:
from datetime import date
from glob import glob
import json
import os
import sys
import time
from pathlib import Path
import pickle

import gspread
import numpy as np
import pandas as pd

In [2]:
sys.path.append(r'C:\Users\lesliec\code')

In [3]:
from tbd_eeg.tbd_eeg.data_analysis.eegutils import EEGexp
from allensdk.core.mouse_connectivity_cache import MouseConnectivityCache

## Functions

In [4]:
def find_closest_region(sunit_info, struct_tree, annot):
    ## Finds a grey matter region above/below an unknown region ##
    Vind = sunit_info.CCF_DV
    vent_sip = struct_tree.get_structures_by_id([annot[sunit_info.CCF_AP, Vind, sunit_info.CCF_ML]])[0]['structure_id_path']
    while not struct_tree.structure_descends_from(vent_sip[-1], 8):
        Vind += 1
        vent_sip = struct_tree.get_structures_by_id([annot[sunit_info.CCF_AP, Vind, sunit_info.CCF_ML]])[0]['structure_id_path']

    Dind = sunit_info.CCF_DV
    dors_sip = struct_tree.get_structures_by_id([annot[sunit_info.CCF_AP, Dind, sunit_info.CCF_ML]])[0]['structure_id_path']
    while not struct_tree.structure_descends_from(dors_sip[-1], 8):
        Dind -= 1
        dors_sip = struct_tree.get_structures_by_id([annot[sunit_info.CCF_AP, Dind, sunit_info.CCF_ML]])[0]['structure_id_path']

    if (Vind - sunit_info.CCF_DV) <= (sunit_info.CCF_DV - Dind):
        return struct_tree.get_structures_by_id([vent_sip[-1]])[0]['acronym']
    elif (Vind - sunit_info.CCF_DV) > (sunit_info.CCF_DV - Dind):
        return struct_tree.get_structures_by_id([dors_sip[-1]])[0]['acronym']

In [5]:
def get_region_from_children(test_id, parent_id, struct_tree):
    try:
        child_ind = np.nonzero([
            struct_tree.structure_descends_from(test_id, x) for x in struct_tree.child_ids([parent_id])[0]
        ])[0][0]
        return struct_tree.get_structures_by_id([struct_tree.child_ids([parent_id])[0][child_ind]])[0]['acronym']
    except:
        return struct_tree.get_structures_by_id([parent_id])[0]['acronym']

In [6]:
def get_parent_region(region_acronym, struct_tree):
    areas_of_interest = {
        'SM-TH': ['AV', 'CL', 'MD', 'PO', 'PF', 'VAL', 'VPL', 'VPM', 'VM'],
    }
    
    reg_id = struct_tree.get_structures_by_acronym([region_acronym])[0]['id']
    if struct_tree.structure_descends_from(reg_id, 567):
        if struct_tree.structure_descends_from(reg_id, 315):
            return get_region_from_children(reg_id, 315, struct_tree)
        elif struct_tree.structure_descends_from(reg_id, 698):
            return 'OLF'
        elif struct_tree.structure_descends_from(reg_id, 1089):
            return get_region_from_children(reg_id, 1089, struct_tree)
        elif struct_tree.structure_descends_from(reg_id, 703):
            return get_region_from_children(reg_id, 703, struct_tree)
        elif struct_tree.structure_descends_from(reg_id, 477):
            return 'STR'
        elif struct_tree.structure_descends_from(reg_id, 803):
            return 'PAL'
        else:
            return 'unassigned'
    elif struct_tree.structure_descends_from(reg_id, 343):
        if struct_tree.structure_descends_from(reg_id, 1129):
            if region_acronym == 'RT':
                return 'RT-TH'
            elif region_acronym in areas_of_interest['SM-TH']:
                return 'SM-TH'
            else:
                return 'other-TH'
        elif struct_tree.structure_descends_from(reg_id, 1097):
            return 'HY'
        else:
            return get_region_from_children(reg_id, 343, struct_tree)
    else:
        return 'unassigned'

In [7]:
def add_parent_region_to_df(unit_info_df, struct_tree, annot):
    ## First, make sure all names in region column correspond to a CCF region (removes nan values) ##
    adj_regions = unit_info_df['region'].values.copy()
    for indi, rowi in unit_info_df.iterrows():
        try:
            str_info = struct_tree.get_structures_by_acronym([rowi.region])[0]
        except KeyError:
            if rowi.depth <= 0: # unit was placed above brain
                new_region_id = annot[rowi.CCF_AP, np.nonzero(annot[rowi.CCF_AP, :, rowi.CCF_ML])[0][0], rowi.CCF_ML]
                adj_regions[indi] = struct_tree.get_structures_by_id([new_region_id])[0]['acronym']
            else:
                Lind = rowi.CCF_ML
                while annot[rowi.CCF_AP, rowi.CCF_DV, Lind] == 0:
                    Lind -= 1
                new_region_id = struct_tree.get_structures_by_id(
                    [annot[rowi.CCF_AP, rowi.CCF_DV, Lind]])[0]['structure_id_path'][-1]
                adj_regions[indi] = struct_tree.get_structures_by_id([new_region_id])[0]['acronym']
    unit_info_df['adj_region'] = adj_regions
    
    ## Second, re-assign any non-grey matter areas to the closest region ##
    adj_regions = unit_info_df['adj_region'].values.copy()
    for indi, rowi in unit_info_df.iterrows():
        reg_id = struct_tree.get_structures_by_acronym([rowi.adj_region])[0]['id']
        if not struct_tree.structure_descends_from(reg_id, 8):
            adj_regions[indi] = find_closest_region(rowi, struct_tree, annot)
    unit_info_df['adj_region'] = adj_regions
    
    ## Finally, assign a parent region to each adjusted CCF region ##
    parent_regions = unit_info_df['adj_region'].values.copy()
    for indi, rowi in unit_info_df.iterrows():
        parent_regions[indi] = get_parent_region(rowi.adj_region, struct_tree)
    unit_info_df['parent_region'] = parent_regions
    
    return unit_info_df.drop('adj_region', axis=1)

### Set some parameters

In [8]:
data_dir = Path("F:\psi_exp")
save_csv_dir = Path(r"C:\Users\lesliec\OneDrive - Allen Institute\data")

In [9]:
## Load the Templeton log sheet as pd.DataFrame ##
_gc = gspread.service_account() # need a key file to access the account
_sh = _gc.open('Templeton-log_exp') # open the spreadsheet
_df = pd.DataFrame(_sh.sheet1.get()) # load the first worksheet
metadata = _df.T.set_index(0).T # put it in a nicely formatted dataframe

### Test with one subject

In [24]:
subexpind = 84
expmeta = metadata.iloc[subexpind]
print('{}: {}'.format(expmeta['mouse_name'], expmeta['exp_name']))

mouse735049: aw_sal_2024-05-22_11-05-25


In [25]:
dataloc = os.path.join(data_dir, expmeta['mouse_name'], expmeta['exp_name'], 'experiment1', 'recording1')
exp = EEGexp(dataloc, preprocess=False, make_stim_csv=False)
probe_list = [x.replace('_sorted', '') for x in exp.experiment_data if 'probe' in x]
print(probe_list)

Experiment type: electrical and sensory stimulation
['probeB', 'probeC', 'probeD', 'probeF']


In [26]:
all_units_list = []
for probei in probe_list:
    print(' {}'.format(probei))
    select_units, peak_chs, unit_metrics = exp.get_probe_units(probei)
    unit_metrics['unit_name'] = [probei[-1] + str(x) for x in unit_metrics['cluster_id'].values]
    all_units_list.append(unit_metrics)
all_select_units = pd.concat(all_units_list)

 probeB
 probeC
 probeD
 probeF


Cut unnecessary info and add CCF coords

In [27]:
all_units_info = []
for ui, urow in all_select_units.iterrows():
    CCFcoords = [int(x) for x in urow['ccf_coord'].replace('[','').replace(']','').replace(' ','').split(',')]
    all_units_info.append([urow['unit_name'], urow['area'], urow['spike_count'], urow['duration'], CCFcoords[0], CCFcoords[1], CCFcoords[2]])

all_units_df = pd.DataFrame(all_units_info, columns=['unit_name', 'region', 'spike_count', 'spike_duration', 'CCF_AP', 'CCF_DV', 'CCF_ML'])

In [14]:
all_units_df.head()

Unnamed: 0,unit_name,region,spike_count,spike_duration,CCF_AP,CCF_DV,CCF_ML
0,B0,VAL,55327,0.494472,262,174,179
1,B1,VAL,16042,0.549414,262,174,179
2,B8,VAL,28023,0.467002,262,174,179
3,B10,VAL,66776,0.439531,262,174,179
4,B14,VAL,40878,0.453266,262,174,179


In [15]:
print(np.unique(all_units_df['region'].values))

['CA1' 'CA3' 'CP' 'DG-po' 'DG-sg' 'HY' 'LD' 'LGd-co' 'LGd-sh' 'LSr' 'MGd'
 'MGm' 'MGv' 'MOs2/3' 'MOs5' 'MOs6a' 'MOs6b' 'PO' 'SSp-tr5' 'SSp-tr6a'
 'SSp-tr6b' 'SSs5' 'SSs6a' 'SSs6b' 'TH' 'VAL' 'VISa2/3' 'VISa4' 'VISa5'
 'VISal4' 'VISal5' 'VISp4' 'VISp5' 'ZI' 'ccb' 'ccg' 'cing' 'fa' 'ml'
 'root']


Add parent brain regions

In [28]:
sub_CCF_res = 25
mcc = MouseConnectivityCache(resolution=sub_CCF_res)
str_tree = mcc.get_structure_tree()
annot, annot_info = mcc.get_annotation_volume()

In [29]:
all_units_info_df = add_parent_region_to_df(all_units_df, str_tree, annot)

In [30]:
all_units_info_df.head()

Unnamed: 0,unit_name,region,spike_count,spike_duration,CCF_AP,CCF_DV,CCF_ML,parent_region
0,B0,VAL,55327,0.494472,262,174,179,SM-TH
1,B1,VAL,16042,0.549414,262,174,179,SM-TH
2,B8,VAL,28023,0.467002,262,174,179,SM-TH
3,B10,VAL,66776,0.439531,262,174,179,SM-TH
4,B14,VAL,40878,0.453266,262,174,179,SM-TH


### Add colors for each brain region

In [35]:
all_units_info_df[all_units_info_df['parent_region'] == 'STR']

Unnamed: 0,unit_name,region,spike_count,spike_duration,CCF_AP,CCF_DV,CCF_ML,parent_region
427,F0,LSr,20295,0.535678,178,202,221,STR
428,F2,LSr,19184,0.302178,178,201,221,STR
429,F5,LSr,54679,0.604355,178,201,221,STR
430,F8,LSr,53999,0.906533,178,200,221,STR
431,F9,LSr,29854,0.590620,178,200,220,STR
...,...,...,...,...,...,...,...,...
669,F478,LSr,23094,0.494472,172,172,207,STR
670,F479,LSr,50173,0.590620,172,172,207,STR
697,F542,LSr,554,0.714238,176,191,216,STR
710,F573,LSr,4063,0.604355,174,182,212,STR


In [31]:
print(np.unique(all_units_info_df['parent_region'].values))

['HIP' 'MO' 'PTLp' 'SM-TH' 'SS' 'STR' 'VIS' 'other-TH']


In [20]:
region_colors = {
    'HIP': 'CadetBlue',
    'MO': 'Lime',
    'PTLp': 'CadetBlue',
    'SM-TH': 'Crimson',
    'SS': 'Olive',
    'STR': 'CadetBlue',
    'VIS': 'Green',
    'other-TH': 'LightSalmon',
    'RT-TH': 'LightCoral',
}

In [21]:
all_units_info_df['color'] = [region_colors[x] for x in all_units_info_df['parent_region'].values]

In [22]:
all_units_info_df.head()

Unnamed: 0,unit_name,region,spike_count,spike_duration,CCF_AP,CCF_DV,CCF_ML,parent_region,color
0,B0,VAL,55327,0.494472,262,174,179,SM-TH,Crimson
1,B1,VAL,16042,0.549414,262,174,179,SM-TH,Crimson
2,B8,VAL,28023,0.467002,262,174,179,SM-TH,Crimson
3,B10,VAL,66776,0.439531,262,174,179,SM-TH,Crimson
4,B14,VAL,40878,0.453266,262,174,179,SM-TH,Crimson


## Save as .csv to load into brainrender .py code

In [23]:
unit_info_file = os.path.join(save_csv_dir, 'brainrender_test_{}_units_info.csv'.format(expmeta['mouse_name']))
print(unit_info_file)

C:\Users\lesliec\OneDrive - Allen Institute\data\brainrender_test_mouse735049_units_info.csv


In [30]:
all_units_info_df.columns

Index(['unit_name', 'region', 'spike_count', 'spike_duration', 'CCF_AP',
       'CCF_DV', 'CCF_ML', 'parent_region', 'color'],
      dtype='object')

# Explore probe coordinates

In [10]:
subexpind = 84
expmeta = metadata.iloc[subexpind]
print('{}: {}'.format(expmeta['mouse_name'], expmeta['exp_name']))

mouse735049: aw_sal_2024-05-22_11-05-25


In [11]:
dataloc = os.path.join(data_dir, expmeta['mouse_name'], expmeta['exp_name'], 'experiment1', 'recording1')
exp = EEGexp(dataloc, preprocess=False, make_stim_csv=False)
probe_list = [x.replace('_sorted', '') for x in exp.experiment_data if 'probe' in x]
print(probe_list)

Experiment type: electrical and sensory stimulation
['probeB', 'probeC', 'probeD', 'probeF']


In [12]:
probei = 'probeB'

with open(exp.ephys_params[probei]['probe_info']) as data_file:
    data = json.load(data_file)

In [13]:
data.keys()

dict_keys(['air_channel', 'channel', 'offset', 'mask', 'horizontal_pos', 'scaling', 'vertical_pos', 'surface_channel', 'area_ch', 'ccf_coord_ch', 'is_ipsi_ch', 'ccf_resolution'])

In [14]:
if 'ccf_coord_ch' in data.keys():
    print('Yes, this probe has locations.')
    
else:
    print('No locations for this probe.')

Yes, this probe has locations.


In [15]:
channels = np.array(data['channel'])
ch_coords = np.array(data['ccf_coord_ch'])
ccf_res = data['ccf_resolution']

In [21]:
ch_coords.shape

(384, 3)

In [22]:
probe_coords = {}
for probei in probe_list:
    print(' {}'.format(probei))
    with open(exp.ephys_params[probei]['probe_info']) as data_file:
        data = json.load(data_file)

    if 'ccf_coord_ch' in data.keys():
        ch_coords = np.array(data['ccf_coord_ch'])
        ccf_res = data['ccf_resolution']
        probe_coords[probei] = [ch_coords[0], ch_coords[-1]]
    else:
        print('  No locations for this probe.')

 probeB
 probeC
 probeD
 probeF


In [23]:
probe_coords

{'probeB': [array([262, 174, 179]), array([279,   9, 158])],
 'probeC': [array([308, 181, 154]), array([357,  32,  95])],
 'probeD': [array([282, 196, 136]), array([323,  48,  68])],
 'probeF': [array([178, 202, 221]), array([148,  53, 149])]}