In [82]:
def load_hdf_group(data_dir, hdf_filename, group="/"):
    '''
    Loads any datasets from the given hdf group into a dictionary. Also will
    recursively load other groups if any exist under the given group

    Args:
        data_dir (str): folder where data is located
        hdf_filename (str): name of hdf file
        group (str): name of the group to load
    
    Returns:
        dict: all the datasets contained in the given group
    '''
    full_file_name = os.path.join(data_dir, hdf_filename)
    hdf = h5py.File(full_file_name, 'r')
    if group not in hdf:
        raise ValueError('No such group in file {}'.format(hdf_filename))

    # Recursively load groups until datasets are reached
    def _load_hdf_group(hdf):
        keys = hdf.keys()
        data = dict()
        for k in keys:
            if isinstance(hdf[k], h5py.Group):
                data[k] = _load_hdf_group(hdf[k])
            else:
                k_, v = _load_hdf_dataset(hdf[k], k)
                data[k_] = v
        return data

    data = _load_hdf_group(hdf[group])
    hdf.close()
    return data

def _load_hdf_dataset(dataset, name):
    '''
    Internal function for loading hdf datasets. Decodes json and unicode data automatically.

    Args:
        dataset (hdf object): dataset to load
        name (str): name of the dataset

    Returns:
        tuple: Tuple containing:
            | **name (str):** name of the dataset (might be modified)
            | **data (object):** loaded data
    '''
    data = dataset[()]
    if '_json' in name:
        import json
        name = name.replace('_json', '')
        data = json.loads(data)
    try:
        data = data.decode('utf-8')
    except:
        pass
    return name, data

In [83]:
import numpy as np
import matplotlib.pyplot as plt
import aopy
import os
import pandas as pds
from db import dbfunctions as db
from ipywidgets import interactive, widgets
import scipy
import h5py
from tqdm.auto import tqdm 
import seaborn as sn
import sklearn
from sklearn.decomposition import PCA, FactorAnalysis
from itertools import compress
import multiprocessing as mp
import time
import math
from scipy.fft import fft
import glob
from datetime import date

# Set parameters

In [84]:
save_figs = False
base_save_dir = "/media/moor-data/results/Ryan/neuropixel_targeting/"
np_preproc_data_folder = 'np_analysis_preproc_data'
ecog_dec_acc_file_name = 'ecog_decoding_maps/npinsert_ecog_decoding'

subject = 'affi'
align_events = ['TARGET ONSET', 'GO CUE', 'MOVEMENT ONSET']

In [85]:
# Decoding calculation parameters
tbefore = 0.5
tafter = 1
nlda_lags = 1
niter_match = 50
min_trial_prop = .85
ntrial_bin_size = 96
nfolds = 4

# Visualization parameters
colors = sn.color_palette(n_colors=9)
recording_brain_areas={'M1': [30, 56, 47, 40, 121, 48, 120, 98], 'PM':[11, 9, 18, 22, 10, 45]}
day_colors = ['dodgerblue', 'indigo', 'violet', 'lightblue', 'mediumorchid',
              'purple', 'steelblue', 'dodgerblue', 'lightblue', 'red', 'black', 'green', 'purple', 'cyan', 'gray', 'yellow'] 

save_dir = "/media/moor-data/results/Ryan/neuropixel_targeting/np_analysis_preproc_data"

# Load and extract relevant data

In [86]:
start = time.time()
aopy.utils.release_memory_limit()
df, rasters, preproc_metadata = aopy.data.base.pkl_read(f"{subject}_np_preprocessed", os.path.join(base_save_dir, np_preproc_data_folder))
print(f"{np.round((time.time()-start)/60)} min to load preprocessed data")
nrecs = preproc_metadata['nrecs']
recording_site = preproc_metadata['recording_sites'] # will be the same for all align events
implants = ['NPinsert72' if preproc_metadata['implant'][irec] == 'NP_Insert72' else 'NPinsert137' for irec in range(len(preproc_metadata['implant']))] #Rename because name in bmi3d is slightly different (TODO)
dates = np.unique(df['date'])

13.0 min to load preprocessed data


In [87]:
ecog_dec_acc = load_hdf_group(base_save_dir, ecog_dec_acc_file_name)

In [90]:
qc_results, ksdrift = aopy.data.base.pkl_read(f"{subject}_QCunits", os.path.join(base_save_dir, np_preproc_data_folder))
# stable_unit_labels = [qc_results['final_good_unit_labels'][irec] for irec in range(nrecs)]
# stable_unit_idx = [qc_results['final_good_unit_idx'][irec] for irec in range(nrecs)]
# nstable_unit = np.array([len(qc_results['final_good_unit_idx'][irec]) for irec in range(nrecs)])
# neuron_pos = [qc_results['position'][irec] for irec in range(nrecs)]

# if subject == 'affi':
stable_unit_labels = [qc_results['manual_good_unit_labels'][irec] for irec in range(nrecs)]
stable_unit_idx = [qc_results['manual_good_unit_idx'][irec] for irec in range(nrecs)]
nstable_unit = np.array([len(qc_results['manual_good_unit_idx'][irec]) for irec in range(nrecs)])
neuron_pos = [qc_results['manual_position'][irec] for irec in range(nrecs)]

In [93]:
if subject == 'beignet':
    surface_pos = np.ones(nrecs)*3840
elif subject == 'affi':
    surface_pos = np.array([3840, 3840, 3400, 3250,3840,3800,3800,3900,3500,3700,2500,3250,2100,3840,3100,1600,3250,2500,3100,3300,3800,3100,3250,2750,2900, 2750])
print(surface_pos.shape, nrecs, surface_pos)

(26,) 26 [3840 3840 3400 3250 3840 3800 3800 3900 3500 3700 2500 3250 2100 3840
 3100 1600 3250 2500 3100 3300 3800 3100 3250 2750 2900 2750]


In [94]:
# compile dataframe of unit information
# Need columns for: date, recording site, unit_label, absolute y-pos, relative y-pos, waveform
unit_info = {'rec_number': [], 'date': [], 'rec_site': [], 'rec_xpos': [], 'rec_ypos': [], 'rec_rcaxis': [], 'implant': [],
             'unit_label': [], 'unit_idx': [], 'abs_depth': [], 'rel_depth': [], 'waveform': [], 'surface_pos': []}
# unit_info = {'rec_number': [], 'date': [], 'rec_site': [], 'rec_xpos': [], 'rec_ypos': [], 'implant': [],
#              'unit_label': [], 'unit_idx': []}
surface_buffer = 200
for irec in tqdm(range(nrecs)):
    for iunit in range(nstable_unit[irec]):
        # Only save units that are below the estimated surface of the brain
        if neuron_pos[irec][iunit] <= (surface_pos[irec]+surface_buffer):   
        
            unit_info['rec_number'].append(irec) # Recording_number
            unit_info['date'].append(np.unique(df['date'])[irec]) # date (assumes each recording is done on a different day)
            unit_info['rec_site'].append(recording_site[irec]) # Recording site
            unit_info['rec_xpos'].append(ecog_dec_acc[subject]['rec_locations'][irec,0]) # Recording site x pos in chamber
            unit_info['rec_ypos'].append(ecog_dec_acc[subject]['rec_locations'][irec,1]) # Recording site y pos in chamber
            unit_info['rec_rcaxis'].append(ecog_dec_acc[subject]['rc_axis'][irec,0]) # Recording site y pos in chamber
            unit_info['implant'].append(implants[irec]) # Implant
            unit_info['unit_label'].append(stable_unit_labels[irec][iunit]) # unit label output from kilosort
            unit_info['unit_idx'].append(stable_unit_idx[irec][iunit]) # unit label output from kilosort
            unit_info['abs_depth'].append(surface_pos[irec] - neuron_pos[irec][iunit]) #Absolute depth [um]
            unit_info['rel_depth'].append(3840-neuron_pos[irec][iunit]-np.min(3840-neuron_pos[irec])) #Relative depth [um] (to top detected neuron)
            unit_info['waveform'].append(qc_results['mean_wfs'][irec][iunit,:]) #waveform
            unit_info['surface_pos'].append(surface_pos[irec])
            # unit_info['waveform'].append(qc_results['manual_wfs'][irec][iunit,:]) #waveform

  0%|          | 0/26 [00:00<?, ?it/s]

In [95]:
unit_df = pds.DataFrame(unit_info)

# Create pseudopopulations

In [96]:
pp_configs = ['random', 'column', 'depth'] # Pseudopopulation cofigurations 

## Random

In [97]:
# Create random neuron populations
nrandom_units = 5 # Number of random units chosen in each group
nrandom_groups = 1000 # Number of random groups to make
random_group_info = []
for igroup in range(nrandom_groups):
    group_idx = np.random.choice(np.arange(len(unit_df)), size=(nrandom_units), replace=False)
    random_group_info.append(unit_df.loc[group_idx])

## Column

In [98]:
temp_unit_label = unit_df.loc[(unit_df['rec_xpos']==pos[0]) & (unit_df['rec_ypos']==pos[1])].iloc[0]['unit_label']
print(temp_unit_label)
unit_df.loc[(unit_df['rec_xpos']==pos[0]) & (unit_df['rec_ypos']==pos[1]) & (unit_df['unit_label']==temp_unit_label)]

7


Unnamed: 0,rec_number,date,rec_site,rec_xpos,rec_ypos,rec_rcaxis,implant,unit_label,unit_idx,abs_depth,rel_depth,waveform,surface_pos
92,7,2024-06-05,74,3.799992,3.799992,4.654021,NPinsert137,7,5,2660.0,2340.0,"[0.5647804139801271, 0.42161704029400976, 0.32...",3900


In [99]:
# Create random neuron populations from each recording_location
min_probe_depth = 2000 #[um]
unique_rec_pos = np.unique(ecog_dec_acc[subject]['rec_locations'], axis=0)
column_group_info = []
for ipos in range(unique_rec_pos.shape[0]):
    pos = unique_rec_pos[ipos,:]
    new_df = unit_df.loc[(unit_df['rec_xpos']==pos[0]) & (unit_df['rec_ypos']==pos[1]) & (unit_df['surface_pos']>min_probe_depth)].reset_index()
    
    # Very hacky to just add one unit and make code work
    if len(new_df)==0:
        temp_unit_label = unit_df.loc[(unit_df['rec_xpos']==pos[0]) & (unit_df['rec_ypos']==pos[1])].iloc[0]['unit_label']
        new_df = unit_df.loc[(unit_df['rec_xpos']==pos[0]) & (unit_df['rec_ypos']==pos[1]) & (unit_df['unit_label']==temp_unit_label)].reset_index()
        
    column_group_info.append(new_df) #only use sites whose surface position is greater than a threshold

    # print(unit_df.loc[(unit_df['rec_xpos']==pos[0]) & (unit_df['rec_ypos']==pos[1])].reset_index())
    # print(len(column_group_info[ipos]),column_group_info[ipos]['rec_site'][0])

In [100]:
print('nUnits:  ', [len(column_group_info[ii]) for ii in range(len(column_group_info))])
print('Rec Site:', [column_group_info[ii]['rec_site'][0] for ii in range(len(column_group_info))])
print(len(unique_rec_pos))
print(unique_rec_pos)
print(recording_site)

nUnits:   [4, 18, 5, 13, 5, 14, 8, 2, 14, 25, 3, 1, 21, 1, 4, 33, 22, 97, 11]
Rec Site: [29, 19, 10, 31, 21, 55, 44, 33, 69, 70, 58, 46, 95, 71, 107, 72, 73, 98, 74]
19
[[-3.79999184  0.75999837]
 [-3.79999184  2.2799951 ]
 [-3.79999184  3.79999184]
 [-2.2799951   2.2799951 ]
 [-2.2799951   3.79999184]
 [-1.51999674  0.        ]
 [-0.75999837  2.2799951 ]
 [-0.75999837  3.79999184]
 [ 0.          0.        ]
 [ 0.75999837  0.75999837]
 [ 0.75999837  2.2799951 ]
 [ 0.75999837  3.79999184]
 [ 1.51999674 -1.51999674]
 [ 1.51999674  1.51999674]
 [ 2.2799951  -2.2799951 ]
 [ 2.2799951   2.2799951 ]
 [ 3.03999347  3.03999347]
 [ 3.79999184  0.75999837]
 [ 3.79999184  3.79999184]]
[ 19  29  55  58  33  98  10  74  72 107  70  44  98  46  31  71  21  69
  95  31  31  70  31  72  72  73]


## Depth

In [101]:
# depth_ranges = [(0,1000), (1000, 2000), (2000, 100000)] #Boundary distinguishing shallow and deep neurons 

In [102]:
# Create random neuron populations from neurons at different depths

depth_ranges = [(0,1000),(250,1250),(500,1500),(750,1750),(1000,2000),(1250,2250),(1500,2500),(1750,2750),(2000,3000),(2250,3250),(2500,3500),(2750,3750),(np.max(unit_df['rel_depth'])-1000,np.max(unit_df['rel_depth']))] #Boundary distinguishing shallow and deep neurons 

depth_group_info = []
for irange, depth_range in enumerate(depth_ranges):
    depth_group_info.append(unit_df.loc[(unit_df['rel_depth'] >= depth_range[0]) & (unit_df['rel_depth'] < depth_range[1])].reset_index())

In [103]:
#Create random neuron populations for each recording site from neurons at different depths

depth_group_info_by_site = {}
unique_sites = np.unique(unit_df['rec_site'])
for isite, site in enumerate(unique_sites):
    depth_group_info_by_site[site] = []
    for irange, depth_range in enumerate(depth_ranges):
        depth_mask = (unit_df['rel_depth'] >= depth_range[0]) & (unit_df['rel_depth'] < depth_range[1])
        site_mask = unit_df['rec_site'] == site
        depth_group_info_by_site[site].append(unit_df.loc[depth_mask*site_mask].reset_index())

In [104]:
for isite, site in enumerate(unique_sites):
    print(site, [len(depth_group_info_by_site[site][idepth]) for idepth in range(len(depth_group_info_by_site[site]))])

10 [3, 1, 1, 2, 2, 2, 1, 0, 0, 0, 0, 0, 0]
19 [7, 7, 5, 5, 8, 9, 9, 6, 3, 1, 0, 0, 0]
21 [0, 1, 2, 2, 4, 3, 2, 3, 1, 1, 1, 0, 1]
29 [3, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0]
31 [9, 7, 6, 5, 4, 2, 1, 0, 0, 0, 0, 0, 0]
33 [1, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0]
44 [6, 5, 5, 3, 2, 1, 0, 0, 0, 0, 0, 0, 0]
46 [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
55 [6, 4, 2, 1, 0, 1, 1, 1, 5, 5, 7, 7, 7]
58 [2, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0]
69 [10, 8, 8, 5, 4, 3, 1, 0, 0, 0, 0, 0, 0]
70 [13, 10, 8, 5, 4, 5, 7, 9, 8, 5, 3, 0, 3]
71 [22, 26, 22, 15, 7, 1, 0, 0, 0, 0, 0, 0, 0]
72 [5, 3, 10, 16, 17, 21, 13, 10, 11, 7, 7, 4, 7]
73 [3, 7, 15, 20, 19, 14, 5, 0, 0, 0, 0, 0, 0]
74 [3, 3, 3, 1, 2, 5, 7, 7, 6, 2, 0, 0, 0]
95 [3, 8, 8, 8, 7, 3, 3, 5, 5, 9, 9, 7, 9]
98 [28, 30, 35, 42, 42, 38, 27, 19, 19, 20, 16, 13, 15]
107 [3, 2, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0]


In [105]:
print(column_group_info[-4])

    index  rec_number        date  rec_site  rec_xpos  rec_ypos  rec_rcaxis  \
0     103           8  2024-06-06        72  2.279995  2.279995    2.792412   
1     104           8  2024-06-06        72  2.279995  2.279995    2.792412   
2     105           8  2024-06-06        72  2.279995  2.279995    2.792412   
3     106           8  2024-06-06        72  2.279995  2.279995    2.792412   
4     107           8  2024-06-06        72  2.279995  2.279995    2.792412   
5     108           8  2024-06-06        72  2.279995  2.279995    2.792412   
6     109           8  2024-06-06        72  2.279995  2.279995    2.792412   
7     110           8  2024-06-06        72  2.279995  2.279995    2.792412   
8     282          23  2024-09-14        72  2.279995  2.279995    2.792412   
9     283          23  2024-09-14        72  2.279995  2.279995    2.792412   
10    284          23  2024-09-14        72  2.279995  2.279995    2.792412   
11    285          23  2024-09-14        72  2.27999

# Save

In [106]:
pseudopopulation_metadata = {'nrandom_units': nrandom_units, 'nrandom_groups': nrandom_groups, 'depth_ranges': depth_ranges}
print(pseudopopulation_metadata)

{'nrandom_units': 5, 'nrandom_groups': 1000, 'depth_ranges': [(0, 1000), (250, 1250), (500, 1500), (750, 1750), (1000, 2000), (1250, 2250), (1500, 2500), (1750, 2750), (2000, 3000), (2250, 3250), (2500, 3500), (2750, 3750), (2520.0, 3520.0)]}


In [107]:
# Save data
aopy.data.base.pkl_write(f"{subject}_np_psuedopopulations", (random_group_info, column_group_info, depth_group_info, depth_group_info_by_site, pseudopopulation_metadata, unit_df), save_dir)

In [None]:
print(len(depth_group_info[0]))
print(len(depth_group_info[1]))
print(len(depth_group_info[2]))


In [None]:
aopy.utils.get_memory_available_gb()
type(df['unit_labels'][df['date']==dates[0]][0])