In [1]:
from datetime import date
from glob import glob
import json
import os
import sys
import time
from pathlib import Path
import pickle

import gspread
import numpy as np
import pandas as pd

In [2]:
sys.path.append(r'C:\Users\lesliec\code')

In [3]:
from tbd_eeg.tbd_eeg.data_analysis.eegutils import EEGexp
from allensdk.core.mouse_connectivity_cache import MouseConnectivityCache

## Set some parameters

In [4]:
data_dir = Path(r"B:\\")
save_csv_dir = Path(r"C:\Users\lesliec\OneDrive - Allen Institute\data")
subject_csv = Path(r"C:\Users\lesliec\OneDrive - Allen Institute\data\BrainStimpaper_IR_allsubjects.csv")
probe_csv_filename = r"BSpaperIR_08152024_probescoords.csv"
unit_csv_filename = r"BSpaperIR_08152024_unitscoords.csv"

BRCCF = 25
ROI = None # if None, show all probes; ['AV', 'CL', 'MD', 'PO', 'RT', 'VAL', 'VPL', 'VPM', 'VM']
parentROI = None # ['TH']

makeunitscsv = False

# region_colors = {
#     'AV': 'HotPink',
#     'CL': 'Red',
#     'MD': 'Orange',
#     'PO': 'Gold',
#     'RT': 'Sienna',
#     'VAL': 'Purple',
#     'VPL': 'Blue',
#     'VPM': 'Cyan',
#     'VM': 'Green',
# }

In [5]:
all_subexp = pd.read_csv(subject_csv)

In [6]:
all_subexp.head()

Unnamed: 0,mouse,experiment,histology,CCFresolution,close_probe,notes
0,mouse569065,pilot1_tipref_2021-05-21_12-27-05,False,25,B,IR excluded from analysis
1,mouse569065,pilot1_screwref_2021-05-21_13-12-08,False,25,B,IR excluded from analysis
2,mouse577578,pilot2_screwref_2021-07-08_10-25-27,True,25,B,
3,mouse583240,pilot3_screwref_2021-09-09_12-51-47,False,25,C,IR excluded from analysis
4,mouse598180,pilot4_ext_ref_2021-10-21_11-43-48,True,25,B,


### Functions

In [7]:
def find_closest_region(sunit_info, struct_tree, annot):
    ## Finds a grey matter region above/below an unknown region ##
    Vind = sunit_info.CCF_DV
    vent_sip = struct_tree.get_structures_by_id([annot[sunit_info.CCF_AP, Vind, sunit_info.CCF_ML]])[0]['structure_id_path']
    while not struct_tree.structure_descends_from(vent_sip[-1], 8):
        Vind += 1
        vent_sip = struct_tree.get_structures_by_id([annot[sunit_info.CCF_AP, Vind, sunit_info.CCF_ML]])[0]['structure_id_path']

    Dind = sunit_info.CCF_DV
    dors_sip = struct_tree.get_structures_by_id([annot[sunit_info.CCF_AP, Dind, sunit_info.CCF_ML]])[0]['structure_id_path']
    while not struct_tree.structure_descends_from(dors_sip[-1], 8):
        Dind -= 1
        dors_sip = struct_tree.get_structures_by_id([annot[sunit_info.CCF_AP, Dind, sunit_info.CCF_ML]])[0]['structure_id_path']

    if (Vind - sunit_info.CCF_DV) <= (sunit_info.CCF_DV - Dind):
        return struct_tree.get_structures_by_id([vent_sip[-1]])[0]['acronym']
    elif (Vind - sunit_info.CCF_DV) > (sunit_info.CCF_DV - Dind):
        return struct_tree.get_structures_by_id([dors_sip[-1]])[0]['acronym']


def get_region_from_children(test_id, parent_id, struct_tree):
    try:
        child_ind = np.nonzero([
            struct_tree.structure_descends_from(test_id, x) for x in struct_tree.child_ids([parent_id])[0]
        ])[0][0]
        return struct_tree.get_structures_by_id([struct_tree.child_ids([parent_id])[0][child_ind]])[0]['acronym']
    except:
        return struct_tree.get_structures_by_id([parent_id])[0]['acronym']


def get_parent_region(region_acronym, struct_tree):
    areas_of_interest = {
        'SM-TH': ['AV', 'CL', 'MD', 'PO', 'PF', 'VAL', 'VPL', 'VPM', 'VM'],
    }
    
    reg_id = struct_tree.get_structures_by_acronym([region_acronym])[0]['id']
    if struct_tree.structure_descends_from(reg_id, 567):
        if struct_tree.structure_descends_from(reg_id, 315):
            return get_region_from_children(reg_id, 315, struct_tree)
        elif struct_tree.structure_descends_from(reg_id, 698):
            return 'OLF'
        elif struct_tree.structure_descends_from(reg_id, 1089):
            return get_region_from_children(reg_id, 1089, struct_tree)
        elif struct_tree.structure_descends_from(reg_id, 703):
            return get_region_from_children(reg_id, 703, struct_tree)
        elif struct_tree.structure_descends_from(reg_id, 477):
            return 'STR'
        elif struct_tree.structure_descends_from(reg_id, 803):
            return 'PAL'
        else:
            return 'unassigned'
    elif struct_tree.structure_descends_from(reg_id, 343):
        if struct_tree.structure_descends_from(reg_id, 1129):
            return 'TH'
            # if region_acronym == 'RT':
            #     return 'RT-TH'
            # elif region_acronym in areas_of_interest['SM-TH']:
            #     return 'SM-TH'
            # else:
            #     return 'other-TH'
        elif struct_tree.structure_descends_from(reg_id, 1097):
            return 'HY'
        else:
            return get_region_from_children(reg_id, 343, struct_tree)
    else:
        return 'unassigned'


def add_parent_region_to_df(unit_info_df, struct_tree, annot):
    ## First, make sure all names in region column correspond to a CCF region (removes nan values) ##
    adj_regions = unit_info_df['region'].values.copy()
    for indi, rowi in unit_info_df.iterrows():
        try:
            str_info = struct_tree.get_structures_by_acronym([rowi.region])[0]
        except KeyError:
            if rowi.depth <= 0: # unit was placed above brain
                new_region_id = annot[rowi.CCF_AP, np.nonzero(annot[rowi.CCF_AP, :, rowi.CCF_ML])[0][0], rowi.CCF_ML]
                adj_regions[indi] = struct_tree.get_structures_by_id([new_region_id])[0]['acronym']
            else:
                Lind = rowi.CCF_ML
                while annot[rowi.CCF_AP, rowi.CCF_DV, Lind] == 0:
                    Lind -= 1
                new_region_id = struct_tree.get_structures_by_id(
                    [annot[rowi.CCF_AP, rowi.CCF_DV, Lind]])[0]['structure_id_path'][-1]
                adj_regions[indi] = struct_tree.get_structures_by_id([new_region_id])[0]['acronym']
    unit_info_df['adj_region'] = adj_regions
    
    ## Second, re-assign any non-grey matter areas to the closest region ##
    adj_regions = unit_info_df['adj_region'].values.copy()
    for indi, rowi in unit_info_df.iterrows():
        reg_id = struct_tree.get_structures_by_acronym([rowi.adj_region])[0]['id']
        if not struct_tree.structure_descends_from(reg_id, 8):
            adj_regions[indi] = find_closest_region(rowi, struct_tree, annot)
    unit_info_df['adj_region'] = adj_regions
    
    ## Finally, assign a parent region to each adjusted CCF region ##
    parent_regions = unit_info_df['adj_region'].values.copy()
    for indi, rowi in unit_info_df.iterrows():
        parent_regions[indi] = get_parent_region(rowi.adj_region, struct_tree)
    unit_info_df['parent_region'] = parent_regions
    
    return unit_info_df.drop('adj_region', axis=1)

## Loop through all subjects

In [11]:
all_subexp_probe_info = []
all_subexp_units_list = []
for indi, exprow in all_subexp.iterrows():
    print('{}: {}'.format(exprow.mouse, exprow.experiment))
    if not exprow.histology:
        print(" This subject doesn't have histology, skipping.\n")
        continue

    # data_paths = os.path.join(data_dir, '*', exprow.mouse, exprow.experiment, 'experiment1', 'recording1')
    data_paths = os.path.join(data_dir, exprow.mouse, exprow.experiment, 'experiment1', 'recording1')
    if len(glob(data_paths)) == 0:
        print(' This data path does not exist: {}.\n'.format(data_paths))
        continue
        
    exp = EEGexp(glob(data_paths)[0], preprocess=False, make_stim_csv=False)
    mcc = MouseConnectivityCache(resolution=exprow.CCFresolution)
    str_tree = mcc.get_structure_tree()
    annot, annot_info = mcc.get_annotation_volume()
    
    probe_list = [x.replace('_sorted', '') for x in exp.experiment_data if 'probe' in x]
    units_list = []
    for probei in probe_list:
        print(' {}'.format(probei))
        with open(exp.ephys_params[probei]['probe_info']) as data_file:
            data = json.load(data_file)
        if 'ccf_coord_ch' not in data.keys():
            print('  No locations for {}. skipping.'.format(probei))
            continue
        if (ROI is not None) and (np.sum([True if x in ROI else False for x in data['area_ch']]) == 0):
            print('  Missed target regions.')
            continue
        if exprow.CCFresolution != BRCCF:
            CCF25coords = np.array(data['ccf_coord_ch']) * exprow.CCFresolution / BRCCF # for a Line
        else:
            CCF25coords = np.array(data['ccf_coord_ch']) # for a Line
        all_subexp_probe_info.append([
            exprow.mouse, exprow.experiment, probei, probei[-1]==exprow.close_probe,
            CCF25coords[0,0], CCF25coords[0,1], CCF25coords[0,2],
            CCF25coords[-1,0], CCF25coords[-1,1], CCF25coords[-1,2]
        ])
        if makeunitscsv:
            ## Get units for probes that hit ROI ##
            select_units, peak_chs, unit_metrics = exp.get_probe_units(probei)
            unit_metrics['unit_name'] = [probei[-1] + str(x) for x in unit_metrics['cluster_id'].values]
            unit_metrics['probe'] = [probei] * len(unit_metrics)
            units_list.append(unit_metrics)
        
    if len(units_list) == 0:
        print('')
        continue
    all_select_units = pd.concat(units_list)
    all_units_info = []
    for ui, urow in all_select_units.iterrows():
        CCFcoords = [int(x) for x in urow['ccf_coord'].replace('[','').replace(']','').replace(' ','').split(',')]
        all_units_info.append([
            exprow.mouse, exprow.experiment, urow['unit_name'], urow['probe'], urow['area'], urow['spike_count'], urow['duration'],
            CCFcoords[0], CCFcoords[1], CCFcoords[2]
        ])
    all_units_df = pd.DataFrame(
        all_units_info, columns=['mouse', 'experiment', 'unit_name', 'probe', 'region', 'spike_count', 'spike_duration', 'CCF_AP', 'CCF_DV', 'CCF_ML']
    )
    ## Add parent region ##
    all_units_info_df = add_parent_region_to_df(all_units_df, str_tree, annot)
    ## Convert CCF coords ##
    all_units_info_df['CCF_AP'] = all_units_info_df['CCF_AP'].values  * exprow.CCFresolution / BRCCF
    all_units_info_df['CCF_DV'] = all_units_info_df['CCF_DV'].values  * exprow.CCFresolution / BRCCF
    all_units_info_df['CCF_ML'] = all_units_info_df['CCF_ML'].values  * exprow.CCFresolution / BRCCF
    ## Save units df ##
    all_subexp_units_list.append(all_units_info_df)

    print('')
all_subexp_probe_info_df = pd.DataFrame(
    all_subexp_probe_info, columns=['mouse', 'experiment', 'probe', 'close_to_stim', 'tipAP', 'tipDV', 'tipML', 'surfAP', 'surfDV', 'surfML']
)
if makeunitscsv:
    all_subexp_units = pd.concat(all_subexp_units_list)

mouse569065: pilot1_tipref_2021-05-21_12-27-05
 This subject doesn't have histology, skipping.

mouse569065: pilot1_screwref_2021-05-21_13-12-08
 This subject doesn't have histology, skipping.

mouse577578: pilot2_screwref_2021-07-08_10-25-27
This data does not contain an EEG recording.
Experiment type: electrical stimulation
 probeB
 probeF
mouse583240: pilot3_screwref_2021-09-09_12-51-47
 This subject doesn't have histology, skipping.

mouse598180: pilot4_ext_ref_2021-10-21_11-43-48
This data does not contain an EEG recording.
Experiment type: electrical stimulation
 probeB
 probeE
mouse598181: estim_2021-12-15_11-01-52
This data does not contain an EEG recording.
Experiment type: electrical stimulation
 probeB
 probeE
mouse598183: estim_2022-02-04_10-53-34
This data does not contain an EEG recording.
Experiment type: electrical stimulation
 probeB
 probeE
mouse622179: estim_2022-04-14_10-20-42
This data does not contain an EEG recording.
Experiment type: electrical stimulation
 prob

In [12]:
all_subexp_probe_info_df.head()

Unnamed: 0,mouse,experiment,probe,close_to_stim,tipAP,tipDV,tipML,surfAP,surfDV,surfML
0,mouse577578,pilot2_screwref_2021-07-08_10-25-27,probeB,True,282,169,194,260,0,183
1,mouse577578,pilot2_screwref_2021-07-08_10-25-27,probeF,False,179,126,249,145,2,155
2,mouse598180,pilot4_ext_ref_2021-10-21_11-43-48,probeB,True,202,176,166,226,14,172
3,mouse598180,pilot4_ext_ref_2021-10-21_11-43-48,probeE,False,152,73,200,91,-46,98
4,mouse598181,estim_2021-12-15_11-01-52,probeB,False,224,167,189,254,2,178


In [13]:
all_subexp_probe_info_df.tail()

Unnamed: 0,mouse,experiment,probe,close_to_stim,tipAP,tipDV,tipML,surfAP,surfDV,surfML
23,mouse657902,estim_2023-01-18_11-25-28,probeE,True,333,157,197,317,20,117
24,mouse657904,estim_2023-01-20_10-14-38,probeB,False,185,187,156,212,25,191
25,mouse657904,estim_2023-01-20_10-14-38,probeE,True,287,160,212,286,25,111
26,mouse657905,estim_2023-01-31_10-56-45,probeB,False,219,176,189,253,12,198
27,mouse657905,estim_2023-01-31_10-56-45,probeE,True,293,152,209,290,24,113


In [14]:
all_subexp_probe_info_df.to_csv(os.path.join(save_csv_dir, probe_csv_filename), index=False)

#### Subselect ROI units and add colors

In [12]:
## Subselect ROI units ##
ROIunits = all_subexp_units[[True if preg in parentROI else False for preg in all_subexp_units.parent_region.values]].copy()
## Add region colors ##
ROIunits['color'] = [region_colors[regi] if regi in region_colors.keys() else 'Gray' for regi in ROIunits.region.values]

In [13]:
ROIunits.head()

Unnamed: 0,mouse,experiment,unit_name,probe,region,spike_count,spike_duration,CCF_AP,CCF_DV,CCF_ML,parent_region,color
0,mouse546655,estim_vis_2020-10-23_11-01-14,C1,probeC,int,8816,0.164824,280.4,180.8,124.4,TH,Gray
1,mouse546655,estim_vis_2020-10-23_11-01-14,C2,probeC,int,24528,0.233501,280.4,180.8,124.4,TH,Gray
2,mouse546655,estim_vis_2020-10-23_11-01-14,C7,probeC,int,20969,0.370854,281.2,178.8,124.0,TH,Gray
3,mouse546655,estim_vis_2020-10-23_11-01-14,C9,probeC,int,17709,0.357119,281.2,178.0,123.6,TH,Gray
4,mouse546655,estim_vis_2020-10-23_11-01-14,C11,probeC,int,48218,0.274707,281.2,178.0,123.6,TH,Gray


In [14]:
ROIunits.tail()

Unnamed: 0,mouse,experiment,unit_name,probe,region,spike_count,spike_duration,CCF_AP,CCF_DV,CCF_ML,parent_region,color
107,mouse575100,estim_vis_2021-06-10_11-39-06,B229,probeB,LD,31364,0.61809,252.4,123.6,186.0,TH,Gray
108,mouse575100,estim_vis_2021-06-10_11-39-06,B235,probeB,LD,16530,0.494472,252.4,122.0,185.6,TH,Gray
247,mouse575100,estim_vis_2021-06-10_11-39-06,B742,probeB,AV,24357,0.535678,251.2,152.4,190.8,TH,HotPink
248,mouse575100,estim_vis_2021-06-10_11-39-06,B743,probeB,AV,2746,0.480737,251.2,150.8,190.4,TH,HotPink
249,mouse575100,estim_vis_2021-06-10_11-39-06,B744,probeB,AV,5228,0.563149,251.6,144.0,189.2,TH,HotPink


In [15]:
ROIunits.to_csv(os.path.join(save_csv_dir, unit_csv_filename), index=False)

## Test with one subject/exp

In [8]:
subexpind = 4
exprow = all_subexp.iloc[subexpind]

In [9]:
mcc = MouseConnectivityCache(resolution=exprow.CCFresolution)
str_tree = mcc.get_structure_tree()
annot, annot_info = mcc.get_annotation_volume()

2024-08-15 14:46:08,328 allensdk.api.api.retrieve_file_over_http INFO     Downloading URL: http://download.alleninstitute.org/informatics-archive/current-release/mouse_ccf/annotation/ccf_2017/annotation_25.nrrd


In [10]:
if not exprow.histology:
    print("This subject doesn't have histology, skipping.")

In [12]:
data_paths = os.path.join(data_dir, exprow.mouse, exprow.experiment, 'experiment1', 'recording1')
if len(glob(data_paths)) == 0:
    print('This data path does not exist: {}'.format(data_paths))
else:
    dataloc = glob(data_paths)[0]
    print(dataloc)

B:\mouse598180\pilot4_ext_ref_2021-10-21_11-43-48\experiment1\recording1


In [13]:
exp = EEGexp(dataloc, preprocess=False, make_stim_csv=False)
probe_list = [x.replace('_sorted', '') for x in exp.experiment_data if 'probe' in x]
print(probe_list)

This data does not contain an EEG recording.
Experiment type: electrical stimulation
['probeB', 'probeE']


In [14]:
exprow.CCFresolution

25

In [18]:
all_subexp_probe_info = []
units_list = []
for probei in probe_list:
    print(' {}'.format(probei))
    with open(exp.ephys_params[probei]['probe_info']) as data_file:
        data = json.load(data_file)
    if 'ccf_coord_ch' not in data.keys():
        print('  No locations for this probe.')
        continue

    if (ROI is not None) and (np.sum([True if x in ROI else False for x in data['area_ch']]) == 0):
            print('  Missed target regions.')
            continue
    if exprow.CCFresolution != BRCCF:
        CCF25coords = np.array(data['ccf_coord_ch']) * exprow.CCFresolution / BRCCF # for a Line
    else:
        CCF25coords = np.array(data['ccf_coord_ch']) # for a Line
    all_subexp_probe_info.append([
        exprow.mouse, exprow.experiment, probei,
        CCF25coords[0,0], CCF25coords[0,1], CCF25coords[0,2],
        CCF25coords[-1,0], CCF25coords[-1,1], CCF25coords[-1,2]
    ])
    if makeunitscsv:
        ## Get units for probes that hit ROI ##
        select_units, peak_chs, unit_metrics = exp.get_probe_units(probei)
        unit_metrics['unit_name'] = [probei[-1] + str(x) for x in unit_metrics['cluster_id'].values]
        unit_metrics['probe'] = [probei] * len(unit_metrics)
        units_list.append(unit_metrics)

# all_select_units = pd.concat(units_list)

 probeB
 probeE


In [19]:
all_subexp_probe_info_df = pd.DataFrame(
    all_subexp_probe_info, columns=['mouse', 'experiment', 'probe', 'tipAP', 'tipDV', 'tipML', 'surfAP', 'surfDV', 'surfML']
)
all_subexp_probe_info_df.head()

Unnamed: 0,mouse,experiment,probe,tipAP,tipDV,tipML,surfAP,surfDV,surfML
0,mouse598180,pilot4_ext_ref_2021-10-21_11-43-48,probeB,202,176,166,226,14,172
1,mouse598180,pilot4_ext_ref_2021-10-21_11-43-48,probeE,152,73,200,91,-46,98


In [44]:
all_units_info = []
for ui, urow in all_select_units.iterrows():
    CCFcoords = [int(x) for x in urow['ccf_coord'].replace('[','').replace(']','').replace(' ','').split(',')]
    all_units_info.append([urow['unit_name'], urow['probe'], urow['area'], urow['spike_count'], urow['duration'], CCFcoords[0], CCFcoords[1], CCFcoords[2]])
all_units_df = pd.DataFrame(all_units_info, columns=['unit_name', 'probe', 'region', 'spike_count', 'spike_duration', 'CCF_AP', 'CCF_DV', 'CCF_ML'])

## Add parent region ##
all_units_info_df = add_parent_region_to_df(all_units_df, str_tree, annot)
## Convert CCF coords ##
all_units_info_df['CCF_AP'] = all_units_info_df['CCF_AP'].values  * exprow.CCFresolution / BRCCF
all_units_info_df['CCF_DV'] = all_units_info_df['CCF_DV'].values  * exprow.CCFresolution / BRCCF
all_units_info_df['CCF_ML'] = all_units_info_df['CCF_ML'].values  * exprow.CCFresolution / BRCCF

In [46]:
## Subselect ROI units ##
ROIunits = all_units_info_df[[True if preg in parentROI else False for preg in all_units_info_df.parent_region.values]].copy()
## Add region colors ##
ROIunits['color'] = [region_colors[regi] if regi in region_colors.keys() else 'Gray' for regi in ROIunits.region.values]

In [47]:
ROIunits.head()

Unnamed: 0,unit_name,probe,region,spike_count,spike_duration,CCF_AP,CCF_DV,CCF_ML,parent_region,color
0,B1,probeB,VAL,117676,0.480737,262.8,182.0,198.8,TH,Purple
1,B2,probeB,VAL,130246,0.467002,262.8,182.0,198.8,TH,Purple
2,B4,probeB,VAL,84230,0.535678,262.8,182.0,198.8,TH,Purple
3,B6,probeB,VAL,97127,0.508208,262.8,181.2,198.8,TH,Purple
4,B8,probeB,VAL,42135,0.494472,262.8,180.0,198.8,TH,Purple


#### Testing probe coords stuff

#### Loop through probes to get units

In [14]:
all_units_list = []
for probei in probe_list:
    print(' {}'.format(probei))
    select_units, peak_chs, unit_metrics = exp.get_probe_units(probei)
    unit_metrics['unit_name'] = [probei[-1] + str(x) for x in unit_metrics['cluster_id'].values]
    unit_metrics['probe'] = [probei] * len(unit_metrics)
    all_units_list.append(unit_metrics)
all_select_units = pd.concat(all_units_list)

 probeB
 probeC
 probeF


In [15]:
all_select_units.head()

Unnamed: 0.1,cluster_id,label,Unnamed: 0,firing_rate,presence_ratio,isi_viol,amplitude_cutoff,isolation_distance,l_ratio,d_prime,nn_hit_rate,nn_miss_rate,silhouette_score,max_drift,cumulative_drift,epoch_name_quality_metrics,epoch_name_waveform_metrics,peak_channel,snr,duration,halfwidth,PT_ratio,repolarization_slope,recovery_slope,amplitude,spread,velocity_above,velocity_below,area,ccf_coord,is_ipsi,spike_count,unit_name,probe
1,1,good,1,14.390373,0.99,0.000221,0.00015,83.069955,4.283891e-06,8.710092,1.0,0.0,0.255259,14.25,156.3,complete_session,complete_session,1,5.3981,0.480737,0.233501,0.540909,0.994367,-0.181715,333.148335,40.0,0.549414,,VAL,"[657, 455, 497]",True,117676,B1,probeB
2,2,good,2,15.927534,0.99,0.038845,0.020897,36.637732,0.0609863,2.689852,0.648667,0.059455,0.184881,18.44,247.64,complete_session,complete_session,1,3.781699,0.467002,0.247236,0.586993,0.621722,-0.13584,216.581625,30.0,0.343384,,VAL,"[657, 455, 497]",True,130246,B2,probeB
4,4,good,4,10.300325,0.99,0.089859,0.046249,24.777305,0.08301468,2.185968,0.454,0.04997,0.013958,19.09,350.86,complete_session,complete_session,1,2.947209,0.535678,0.274707,0.424358,0.474888,-0.034324,168.494235,30.0,0.343384,,VAL,"[657, 455, 497]",True,84230,B4,probeB
6,6,good,6,11.877475,0.99,0.003249,0.000799,136.541202,2.856958e-10,8.05964,1.0,0.0,0.24998,7.5,211.31,complete_session,complete_session,2,3.147361,0.508208,0.38459,0.744261,0.34052,-0.083489,130.317525,40.0,-1.167504,,VAL,"[657, 453, 497]",True,97127,B6,probeB
7,8,good,7,5.152608,0.99,0.0,0.000258,91.171232,0.0001302647,7.825974,0.99557,0.000103,0.389185,4.95,83.52,complete_session,complete_session,5,4.76744,0.494472,0.164824,0.48876,0.772734,-0.175879,244.253685,50.0,0.343384,0.343384,VAL,"[657, 450, 497]",True,42135,B8,probeB


In [16]:
all_units_info = []
for ui, urow in all_select_units.iterrows():
    CCFcoords = [int(x) for x in urow['ccf_coord'].replace('[','').replace(']','').replace(' ','').split(',')]
    all_units_info.append([urow['unit_name'], urow['probe'], urow['area'], urow['spike_count'], urow['duration'], CCFcoords[0], CCFcoords[1], CCFcoords[2]])

all_units_df = pd.DataFrame(all_units_info, columns=['unit_name', 'probe', 'region', 'spike_count', 'spike_duration', 'CCF_AP', 'CCF_DV', 'CCF_ML'])

In [17]:
# sub_CCF_res = 25
mcc = MouseConnectivityCache(resolution=exprow.CCFresolution)
str_tree = mcc.get_structure_tree()
annot, annot_info = mcc.get_annotation_volume()

2024-07-17 12:56:30,757 allensdk.api.api.retrieve_file_over_http INFO     Downloading URL: http://download.alleninstitute.org/informatics-archive/current-release/mouse_ccf/annotation/ccf_2017/annotation_10.nrrd


In [29]:
all_units_info_df = add_parent_region_to_df(all_units_df, str_tree, annot)

all_units_info_df['CCF_AP'] = all_units_info_df['CCF_AP'].values  * exprow.CCFresolution / BRCCF
all_units_info_df['CCF_DV'] = all_units_info_df['CCF_DV'].values  * exprow.CCFresolution / BRCCF
all_units_info_df['CCF_ML'] = all_units_info_df['CCF_ML'].values  * exprow.CCFresolution / BRCCF

In [30]:
all_units_info_df.head()

Unnamed: 0,unit_name,probe,region,spike_count,spike_duration,CCF_AP,CCF_DV,CCF_ML,parent_region
0,B1,probeB,VAL,117676,0.480737,262.8,182.0,198.8,TH
1,B2,probeB,VAL,130246,0.467002,262.8,182.0,198.8,TH
2,B4,probeB,VAL,84230,0.535678,262.8,182.0,198.8,TH
3,B6,probeB,VAL,97127,0.508208,262.8,181.2,198.8,TH
4,B8,probeB,VAL,42135,0.494472,262.8,180.0,198.8,TH


In [38]:
ROImask = [True if preg in parentROI else False for preg in all_units_info_df.parent_region.values]
ROIunits = all_units_info_df[ROImask].copy()

In [39]:
ROIunits

Unnamed: 0,unit_name,probe,region,spike_count,spike_duration,CCF_AP,CCF_DV,CCF_ML,parent_region
0,B1,probeB,VAL,117676,0.480737,262.8,182.0,198.8,TH
1,B2,probeB,VAL,130246,0.467002,262.8,182.0,198.8,TH
2,B4,probeB,VAL,84230,0.535678,262.8,182.0,198.8,TH
3,B6,probeB,VAL,97127,0.508208,262.8,181.2,198.8,TH
4,B8,probeB,VAL,42135,0.494472,262.8,180.0,198.8,TH
...,...,...,...,...,...,...,...,...,...
488,C587,probeC,PO,61035,0.590620,299.2,158.8,176.4,TH
489,C589,probeC,PO,98540,0.494472,301.2,148.0,173.6,TH
490,C590,probeC,Eth,121184,0.508208,302.8,140.4,172.0,TH
491,C593,probeC,LP,65281,0.590620,306.8,119.6,167.2,TH


In [40]:
ROIunits['color'] = [region_colors[regi] if regi in region_colors.keys() else 'Gray' for regi in ROIunits.region.values]
ROIunits.head()

Unnamed: 0,unit_name,probe,region,spike_count,spike_duration,CCF_AP,CCF_DV,CCF_ML,parent_region,color
0,B1,probeB,VAL,117676,0.480737,262.8,182.0,198.8,TH,Purple
1,B2,probeB,VAL,130246,0.467002,262.8,182.0,198.8,TH,Purple
2,B4,probeB,VAL,84230,0.535678,262.8,182.0,198.8,TH,Purple
3,B6,probeB,VAL,97127,0.508208,262.8,181.2,198.8,TH,Purple
4,B8,probeB,VAL,42135,0.494472,262.8,180.0,198.8,TH,Purple
