# BrainPatch in-vivo electrophysiology code
All of the following code is for analysis of the in-vivo recordings 

## Import python packages
All of these should be working properly if you've used the provided conda environment file. However, if there are any versioning issues just reach out to me on github @kb0dkin and we'll get it sorted out!

In [1]:
import ephys_utils # loading and basic processing code.
from matplotlib import pyplot as plt
from matplotlib.patches import Polygon # this is a nice way to show error bars and standard deviations

# data analysis standards
import numpy as np
import pandas as pd
from scipy import signal
import os, glob, re
from pathlib import Path

# csv writing
import csv

# tqdm
from tqdm.notebook import tqdm

# open the plots with QT
%matplotlib qt

  from pkg_resources import get_distribution, DistributionNotFound


## Data pre-processing and conversion

This is a couple step process:

1. Load the raw data using the Open EPhys Python data loaders. 
    
    *Do not change the directory structure, since the functions look for specific directories (ie Raw_Data) and get current and distance information from the subdirectory names*

2. Remove stimulation artifacts using the ERAASR algorithm (see **Methods** for more information), then filters with a 300-6000 hz BPF

3. Use Kilosort4 to extract spike times


For each of these steps, the code will create a new numpy file for each recording in the "Processed_Data" directory. The next step will look for the appropriate numpy file

You will need to change the value of ``` base_dir ``` to point to where you downloaded the data. Other than that, you should not need to change any of the code


**NOTE**
This is a fairly long process! By default, this code looks to see if the processed data already exists and does not reproduce the processed data. If you want it to reprocess the data, set the "reconvert" flag to True

In [2]:
# local copy of the data
base_dir = 'Z:/BrainPatch/Published_Data' # this is where you downloaded the data
base_dir = Path(base_dir) if not isinstance(base_dir, os.PathLike) else base_dir # turn it into a Path object

# do we want to reprocess the data if it already exists?
reconvert = False # change if you want to run through the whole process


# if the structure of "base_dir is correct"
if ephys_utils.base_dir_structure_check(base_dir) == 1:

    # probe and settings for Kilosort4
    probe_path = base_dir / Path("64-4shank-poly-brainpatch-chanMap.mat")
    kilosort_settings = {'probe_name':probe_path,
                'n_chan_bin':64, # 64 channel probe
                'nearest_chans':0, # the electrodes are far enough apart we shouldn't get shared signal
                'drift_correction':False
                }

    # run through bulk_preprocess.
    ephys_utils.bulk_preprocess(raw_data_dir = base_dir / Path('Raw_Data'),
                                processed_data_dir= base_dir / Path('Processed_Data'),
                                probe_path= probe_path,
                                kilosort_settings=kilosort_settings)




[bulk_preprocess] processing files:   0%|          | 0/68 [00:00<?, ?it/s]

[bulk process] loading raw data            *
[bulk process] cleaning artifacts            *
[bulk process] filtering            *
[bulk process] loading raw data            *
[bulk process] cleaning artifacts            *
[bulk process] filtering            *
[bulk process] loading raw data            *
[bulk process] cleaning artifacts            *
[bulk process] filtering            *
[bulk process] kilosort                      

kilosort.run_kilosort: Kilosort version 4.0.16
kilosort.run_kilosort: Sorting Z:\BrainPatch\Published_Data\Processed_Data\Crimson__2024-08-21_13-44-07__10mA_2ms_400um\sig_filter.npy
kilosort.run_kilosort: ----------------------------------------
kilosort.run_kilosort: Using GPU for PyTorch computations. Specify `device` to change this.
kilosort.run_kilosort:  
kilosort.run_kilosort: Computing preprocessing variables.
kilosort.run_kilosort: ----------------------------------------
kilosort.run_kilosort: N samples: 2630400
kilosort.run_kilosort: N seconds: 87.68
kilosort.run_kilosort: N batches: 44
kilosort.run_kilosort: Preprocessing filters computed in  2.88s; total  2.88s
kilosort.run_kilosort:  
kilosort.run_kilosort: Computing drift correction.
kilosort.run_kilosort: ----------------------------------------
kilosort.spikedetect: Re-computing universal templates from data.
Found Intel OpenMP ('libiomp') and LLVM OpenMP ('libomp') loaded at
the same time. Both libraries are known to b

IndexError: index 0 is out of bounds for axis 0 with size 0

In [None]:
import torch

In [None]:
# torch.__version__


Next calculate firing rates, and find the differences between pre-stimulation firing rates and post-stimulation firing rates

In [None]:
fr_df = ephys_utils.firingRate_dataframe(directories=)

## Figure 3

Example waveforms from the kilosort data. One from each channel. 

The example waveforms are from units that have a pre-stimulation mean firing rate of at least 0.5Hz

In [None]:
# a per-channel index of the waveform from the FR dataframe
# loaded from a csv
wf_mapping = pd.read_csv()
wf_mapping = {
    41: 620, # could also be 211, 481, or 340
    34: 208, # or 745
    56: 113, # or 487
    62: 751,
    0: 350,
    6: 220,
    23: 120,
    28: 358,
    32: 232,
    45: 20,
    58: 363,
    54: 236,
    8: 370,
    19: 243,
    36:126, 
    37: 33,
    63: 255,
    1: 387,
    2: 388,
    26: None,
    38: 524,
    49: 786,
    52: 526,
    10: 149,
    3: 538,
    24: None,
    35: 286,
    39: 412,
    57: 159,
    49: 556,
    5: 818, 
    25: None,
    42: 569,
    51: 689,
    44: 570,
    50: 306,
    18: 76, # only exists in one recording, might toss
    20: None,
    43: 312,
    33: 710,
    46: 83,
    9: None,
    31: 192,
    47: 457,
    53: 195,
    40: 464,
    48: 868,
    14: 739,
    11: 335,
    17: 338,
    60: 383,
    27: None,
    61: 400,
    29: None,
    12: 576,
    4: 239,
    30: 373,
    7: 422,
    16: None,
    15: None,
    22: 736,
}

# Waveforms, laid out according to the probe mapping from NeuroNexus
probe_grid = plt.GridSpec(16,4, wspace=.5, hspace=.7)

fig_probe = plt.figure()

ax_probe = dict()
for i_channel, (channel,waveform) in enumerate(wf_dict.items()):
    row = int(probe['yc'][channel]/50)
    col = int(probe['kcoords'][channel]) - 1

    ax_probe[channel] = fig_probe.add_subplot(probe_grid[row,col])
    ax_probe[channel].plot(waveform)
    ax_probe[channel].set_title(f'channel')
    print(channel)

    for spine in ax_probe[channel].spines:
        ax_probe[channel].spines[spine].set_visible(False)

    ax_probe[channel].set_xticks([])
    ax_probe[channel].set_yticks([])

# 3g
firing rates vs current at different depths

In [None]:

base_dirs = ['Z://BrainPatch//20241002//lateral//',
             'Z://BrainPatch//20240925//',
             'Z:BrainPatch//20240821']

# all 2ms stimulations at 400 um in the base_dirs
directories = [os.path.join(base_dir,directory) for base_dir in base_dirs for directory in os.listdir(base_dir) if os.path.isdir(os.path.join(base_dir,directory)) and '2ms' in directory and '400um' in directory]

# probe map
probe_name = "Z:\\BrainPatch\\20241002\\64-4shank-poly-brainpatch-chanMap.mat"

# settings for kilosort
settings = {'probe_name':probe_name,
            'n_chan_bin':64,
            'nearest_chans':1}


# have to define this somewhere -- probably in the utils
FR_df = gimme_a_FR_df(directories, probe_name, settings) 



fig_amp_scatter, ax_amp_scatter = plt.subplots(nrows = 4, ncols=2, sharex=True, sharey=True)
fig_amp_line, ax_amp_line = plt.subplots(nrows = 4, ncols = 2, sharex=True, sharey=True)

fig_amp_line.set_size_inches(6, 10)

fid_response = open('Z:\\BrainPatch\\Current_vs_responses_stim_responses.csv','w')
fid_means = open('Z:\\BrainPatch\\Current_vs_responses_prestim_means.csv','w')

for i_current, current in enumerate(FR_df['current'].unique()):
    # first 2ms firing rate -- scatter
    FR_df.loc[FR_df['current'].eq(current)].plot.scatter(ax = ax_amp_scatter[i_current, 1], x = 'poststim_first', y = 'depth', s=2)
    ax_amp_scatter[i_current,1].set_title(f'{current} mA stimulation responses')
    
    # line plot of response mean and std 
    summary = FR_df.loc[FR_df['current'].eq(current)].groupby('depth')['poststim_first'].agg(['mean','std']) #.plot(ax = ax_amp_line[i_current, 1], x = 'mean', y='depth', xerr = 'std')
    ax_amp_line[i_current, 1].plot(summary['mean'], summary.index)
    ax_amp_line[i_current, 1].fill_betweenx(summary.index, np.maximum(summary['mean'] - summary['std'],0), summary['mean'] + summary['std'], alpha=0.2)
    ax_amp_line[i_current,1].set_title(f'{current} mA stimulation responses')

    # stim to csv
    summary['current'] = current
    summary.to_csv(fid_response, header=(i_current==0))


    # pre-stimulation mean
    FR_df.loc[FR_df['current'].eq(current)].plot.scatter(ax = ax_amp_scatter[i_current, 0], x = 'prestim_mean', y = 'depth', s=2)
    ax_amp_scatter[i_current,0].set_title(f'{current} mA pre-stimulation means')
    
    # line plot of mean and std
    summary = FR_df.loc[FR_df['current'].eq(current)].groupby('depth')['prestim_mean'].agg(['mean','std']) #.plot(ax = ax_amp_line[i_current, 1], x = 'mean', y='depth', xerr = 'std')
    ax_amp_line[i_current, 0].plot(summary['mean'], summary.index)
    ax_amp_line[i_current, 0].fill_betweenx(summary.index, np.maximum(summary['mean'] - summary['std'],0), summary['mean'] + summary['std'], alpha=0.2)
    ax_amp_line[i_current,0].set_title(f'{current} mA pre-stimulation means')
    ax_amp_line[i_current,0].set_ylabel('Depth $\mu$m')

    # stim to csv
    summary['current'] = current
    summary.to_csv(fid_means, header=(i_current==0))
    
    # remove the spines from the axes
    for spine in ax_amp_line[i_current,0].spines:
        ax_amp_line[i_current,0].spines[spine].set_visible(False)
        ax_amp_line[i_current,1].spines[spine].set_visible(False)

fid_means.close()
fid_response.close()

fig_amp_line.savefig('Z://BrainPatch//current_vs_response.svg')


# Supplementary Figure 
This figure shows some of the preprocessing steps, and goes into the LFP responses at different distances.


First, let's look at the artifacts that are produced solely by the LED/current source. This dataset is recorded from a mouse without ChrimsonR

In [None]:
base_dir = 'Z:\\BrainPatch\\Raw_Data\\Wildtype_Artifacts'

fig_raw,ax_raw = plt.subplots()
fig_eraasr,ax_eraasr = plt.subplots()

# for the artifact
csv_file_raw = open('Z:\\BrainPatch\\Figures\\Supplemental\\Artifacts_raw.csv', 'w')
csv_writer_raw = csv.writer(csv_file_raw)
csv_writer_raw.writerow(['current','','trace'])

# errasr'd artifact
csv_file_eraasr = open('Z:\\BrainPatch\\Figures\\Supplemental\\Artifacts_eraasr.csv', 'w')
csv_writer_eraasr = csv.writer(csv_file_eraasr)
csv_writer_eraasr.writerow(['current','','trace'])

for i_directory,directory in enumerate([dd for dd in os.listdir(base_dir) if os.path.isdir(os.path.join(base_dir,dd))]):
    # distance information from the directory name
    distance = re.search('(\d{4})um', directory)[0]

    # load the data - previously loaded if available
    sig, timestamps, stim, stim_ts = openephys_utils.open_sig_stims(os.path.join(base_dir,directory))
    sig_eraasr = openephys_utils.ERAASR(sig, stim, save=False)

    # plot the mean waveform for each stimulation distance
    openephys_utils.plot_mean_LFP(sig, stim, channel = 45, pre_stim=1, ax=ax_raw, show_stim=i_directory==0, label=distance, len_ms = 10, align_stim=False)
    openephys_utils.plot_mean_LFP(sig_eraasr, stim, channel = 45, pre_stim=1, ax=ax_eraasr, show_stim=i_directory==0, label=distance, len_ms = 10, align_stim=False)


# get the traces and put them into the csv file
for child in ax_raw.lines:
    data = np.stack(child.get_data())
    csv_writer_raw.writerow(data)
    
for child in ax_eraasr.lines:
    data = np.stack(child.get_data())
    csv_writer_eraasr.writerow(data)


# clean up the plot
ax_raw.legend() # add a plot
ax_eraasr.legend()
for spine in ax_raw.spines: # turn off the box around the axis
    ax_raw.spines[spine].set_visible(False) 
    ax_eraasr.spines[spine].set_visible(False) 

ax_raw.set_xlabel('Time (ms)')
ax_raw.set_ylabel('Voltage (mV)')
ax_eraasr.set_xlabel('Time (ms)')
ax_eraasr.set_ylabel('Voltage (mV)')

ax_eraasr.set_ylim(ax_raw.get_ylim())





Looking at the LFP from a recording

In [None]:
# plot it with openephys_utils
openephys_utils.LFP_stim_bulk('Z:/BrainPatch/Raw_Data/20241002')


In [None]:
# from matplotlib import collections

# csv
csv_file_min = open('Z:\\BrainPatch\\Figures\\Supplemental\\mean_response_min_time.csv', 'w')
csv_writer_min = csv.writer(csv_file_min)
csv_writer_min.writerow(['current','time', 'distance'])

fig = plt.gcf()
for ax in fig.get_axes():
    ax_child = ax.get_children()
    for child in ax_child:
        if type(child) == collections.PathCollection:
            offs = child.get_offsets().data
            offs = np.append(offs, np.ones((offs.shape[0],1))*int(ax.get_title().strip(' mm')), axis=1)
            csv_writer_min.writerows(offs)


csv_file_min.close()

In [None]:

# from matplotlib import collections

# csv
csv_file_min = open('Z:\\BrainPatch\\Figures\\Supplemental\\mean_response_min_depth.csv', 'w')
csv_writer_min = csv.writer(csv_file_min)
csv_writer_min.writerow(['magnitude','distance', 'current'])

fig = plt.gcf()
for ax in fig.get_axes():
    ax_child = ax.get_children()
    for child in ax_child:
        if type(child) == collections.PathCollection:
            offs = child.get_offsets().data
            offs = np.append(offs, np.ones((offs.shape[0],1))*int(ax.get_title().strip(' mA')), axis=1)
            csv_writer_min.writerows(offs)


csv_file_min.close()