In [7]:
import pyabf
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import glob

In [18]:
# these are a set of convenience functions for working with Homeira and Lihua's files 


# this function tries to guess the gain on the response (voltage) channel
# the logic is to compare the voltage value at time 0 and compare that to the listed RMP
# the best gain minimizes that difference
CHECK_RESPONSE_GAINS = np.array([.25, .33, .5, 1, 2, 20, 25, 50]) # these are the valid gains that Homeira /Lihua uses
def guess_response_gain(resp_vec, stated_rmp):
    # this function is a bit of logic that tries to guess the gain given an rmp value
    # try to figure out gain on response channel by comparing to RMP

    abs_diff_vec = np.abs(resp_vec[0] * CHECK_RESPONSE_GAINS - cell_rmp)
    best_gain_ind = np.argmin(abs_diff_vec)
    
    rmp_abs_error = abs_diff_vec[best_gain_ind]
    best_gain = CHECK_RESPONSE_GAINS[best_gain_ind]
    return(best_gain, rmp_abs_error)

# try to figure out the gain on the stimulus channel
def get_stim_gain(stim_vec):
    min_stim = np.min(stim_vec)
    if min_stim > -1:
        stim_gain = 1000
    else:
        stim_gain = 1
    return stim_gain

# parse relevant info related to stimulus, including duration, and amplitudes
def get_stim_info(abf, stim_channel_num = 1, stim_gain = 1):
    num_sweeps = abf.sweepCount
    stim_amps = np.zeros(num_sweeps) 
    stim_start_time = None
    stim_end_time = None
    for i in range(0, num_sweeps):
        abf.setSweep(i, channel=stim_channel_num)
        sampling_rate = abf.dataRate
        stim_vec = abf.sweepC * stim_gain
        stim_amp = stim_vec[5000]

        stim_amps[i] = round(stim_amp)
        non_zero_inds = np.where(stim_vec != 0)
        stim_duration = np.shape(non_zero_inds)[1] * 1/sampling_rate
        if stim_duration == 0:
            continue
        stim_start_ind = non_zero_inds[0][0]
        stim_end_ind = non_zero_inds[0][-1]
        
        stim_start_time = abf.sweepX[stim_start_ind]
        stim_end_time = abf.sweepX[stim_end_ind]
    sampling_rate = int(round(1/(abf.sweepX[2] - abf.sweepX[1]))) # manually calculate the sampling rate

    ret_dict = {'stim_amp_vec' : stim_amps, 'stim_duration' : stim_duration, 
                'stim_start_time' : stim_start_time, 'stim_end_time' : stim_end_time, 'num_sweeps' : num_sweeps,
               'stim_sampling_rate' : sampling_rate}
    return(ret_dict)

# gets all relevant info about stimulus, including channel, duration, etc. and returns as dictionary
def get_stim_dict(meta_row, cell_meta_df = cell_meta_df):
    # returns path of abf file containing stim info
    # stim channel index
    # stim gain
    # other info like num sweeps, which 
    
    row = meta_row
    f = row['cell_id'].values[0]
    fn = row['full_path'].values[0]
    recorder_name = row['recorder_name'].values[0]
    abf = pyabf.ABF(fn) # loads in the abf file

    stim_info_dict = {}    

    # figure out stim channel
    stim_chan = len(abf.channelList)-1 # this seems to be generally true
    abf.setSweep(0, channel=stim_chan)
    stim_vec = abf.sweepC
    stim_gain = get_stim_gain(stim_vec)

    # this infers some basic info about stim amplitudes, durations, etc.
    stim_info_dict = get_stim_info(abf, stim_chan, stim_gain = stim_gain)
    stim_amps = stim_info_dict['stim_amp_vec']
    sampling_rate = stim_info_dict['stim_sampling_rate']
    if np.std(stim_amps) == 0 and recorder_name == 'Homeira':
        stim_chan = 0
        abf.setSweep(0, channel=stim_chan)
        stim_vec = abf.sweepC
        stim_gain = get_stim_gain(stim_vec)
        print(stim_gain)
        stim_info_dict = get_stim_info(abf, stim_chan, stim_gain = stim_gain)
    elif np.std(stim_amps) == 0 and recorder_name == 'Lihua' and num_sweeps == 30:
        # logic here is that if abf file meets these criteria, we should replace the stimulus with the one from
        # a specific abf file with available info
        abf_file_name = '14617300.abf'
        curr_row = cell_meta_df.loc[cell_meta_df['cell_id'] == abf_file_name]
        row = curr_row
        return get_stim_dict(row) # woo recursion
    stim_amps = stim_info_dict['stim_amp_vec']
    if np.std(stim_amps) == 0:
        valid_stim = False
    else:
        valid_stim = True
    ret_dict = {'stim_chan' : stim_chan, 'stim_gain' : stim_gain, 'stim_path' : fn, 'valid_stim' : valid_stim}
    
    ret_dict.update(stim_info_dict)
    return ret_dict

In [19]:
# read in cell metadata and processed ephys property dictionary
# this is needed because we need to know what the RMP values are per cell

summary_table_csv = '/Users/stripathy/rstudio_projects/valiante_ih/summary_tables/all_cells.csv'
#excel_file = file_base_base_path + 'valiante_lab_ephys_mar_2020/L23/Homeira/Total2Homeira-Lastversion_March 10_2020.xlsx'
cell_info = pd.read_csv(open(summary_table_csv, 'rb'))  
len(cell_info)

208

In [20]:
# define directories containing abf files from Homeira and Lihua's experiments

### change the directory paths below to where the files are on your local directory
file_base_base_path = '/Users/stripathy/Downloads/'
file_base_path = 'valiante_lab_ephys_mar_2020/'

path = file_base_base_path + file_base_path

# get all abf files in directories
files = [f for f in glob.glob(path + "**/*.abf", recursive=True)]


In [21]:
import os
print(os.path.basename(files[0]))

# traverse directories and start keeping track of important metadata
files_list = list()
for f in files:
    base_file_name = os.path.basename(f)
    ret_dict = {'cell_id' : base_file_name, 'full_path' : f}
    files_list.append(ret_dict)
    
file_dict = pd.DataFrame(files_list)
len(file_dict)

# drop file duplicates
file_dict = file_dict.drop_duplicates(subset=['cell_id'])
len(file_dict)
#file_dict.sort_values(by = ['abf_version'])

19122022.abf


230

In [22]:
# merge the metadata and file info pandas data frames
cell_meta_df = cell_info.merge(file_dict, on='cell_id', how='left')

In [23]:
# test the usage of the convience functions defined above on a single abf file

abf_file_name = '14617300.abf'
curr_row = cell_meta_df.loc[cell_meta_df['cell_id'] == abf_file_name]
get_stim_dict(curr_row, cell_meta_df)

{'stim_chan': 0,
 'stim_gain': 1,
 'stim_path': '/Users/stripathy/Downloads/valiante_lab_ephys_mar_2020/L5/Lihua/TotalL5-Lihua-Lastversion/14617300.abf',
 'valid_stim': True,
 'stim_amp_vec': array([-400., -375., -350., ...,  275.,  300.,  325.]),
 'stim_duration': 0.6,
 'stim_start_time': 0.0656,
 'stim_end_time': 0.66556,
 'num_sweeps': 30,
 'stim_sampling_rate': 25000}

In [24]:
# sweep through all listed abf files and extract all relevant metadata required to parse raw data

path = file_base_base_path + file_base_path

files = [f for f in glob.glob(path + "**/*.abf", recursive=True)]

#lihua_stim_abf = 
dict_list = list()

for abf_file in cell_meta_df.cell_id:
    print(abf_file)
    row = cell_meta_df.loc[cell_meta_df['cell_id'] == abf_file]
    
    f = row['cell_id'].values[0]
    #
    fn = row['full_path'].values[0]
    #print(fn)
    resp_chan = 0
    stim_chan = 0
    stim_info_dict = {}
    
    cell_rmp = row['rmp'].values[0]
    recorder_name = row['recorder_name'].values[0]

    
    abf = pyabf.ABF(fn) # loads in the abf file
    num_sweeps = abf.sweepCount
    #print(recorder_name)
    #print(num_sweeps)
    
    if len(abf.channelList) > 2:
        print('skipping file because too many abf channels')
        continue
        stim_chan = 3
        abf.setSweep(0, channel=stim_chan)
        stim_vec = abf.sweepY
        
        resp_chan = 2
        abf.setSweep(0, channel=resp_chan)
        resp_vec = abf.sweepY
        
        stim_gain = .2
        
    else:

        # figure out response channel
        abf.setSweep(0, channel=resp_chan)
        resp_vec = abf.sweepY
        resp_sampling_rate = int(round(1/(abf.sweepX[2] - abf.sweepX[1]))) # manually calculate the sampling rate

        # try to figure out gain on response channel by comparing to RMP
        (best_gain, rmp_abs_error) = guess_response_gain(resp_vec, cell_rmp)

        stim_dict = get_stim_dict(row, cell_meta_df)
        
    
    ret_dict = {'cell_id' : f, 'resp_chan' : resp_chan, 
                    'resp_gain' : best_gain, 'rmp_error' : rmp_abs_error, 
                'rmp_val' : cell_rmp,
                'abf_version' : abf.abfVersionString, 'resp_sampling_rate' : resp_sampling_rate}
    #ret_dict = dict(ret_dict.items() + stim_info_dict.items())
    ret_dict.update(stim_dict)
    #print(ret_dict)

    dict_list.append(ret_dict)
    

cell_raw_file_meta_df = pd.DataFrame(dict_list)



2019_11_28_0079.abf
2019_11_28_0093.abf
2020_01_06_0017.abf
2020_01_06_0048.abf
2020_01_06_0063.abf
2020_01_06_0082.abf
2020_01_06_0090.abf
2020_01_06_0095.abf
2020_01_27_0038.abf
19122022.abf
skipping file because too many abf channels
19122024.abf
skipping file because too many abf channels
19129040.abf
skipping file because too many abf channels
19129047.abf
skipping file because too many abf channels
19129043.abf
skipping file because too many abf channels
2015_11_09_0003.abf
2015_11_09_0017.abf
2015_11_09_0053.abf
2015_11_09_0085.abf
2015_11_09_0106.abf
2015_11_09_0107.abf
2016_02_25_0005.abf
2016_02_25_0007.abf
2016_02_25_0073.abf
2016_02_25_0082.abf
2016_02_25_0134.abf
2016_02_25_0255.abf
2016_03_01_0000.abf
2016_03_01_0047.abf
2016_03_03_0002.abf
2016_03_03_0054.abf
2016_03_03_0100.abf
2016_03_03_0103.abf
19122017.abf
skipping file because too many abf channels
19129057.abf
skipping file because too many abf channels
19129069.abf
skipping file because too many abf channels
1932

In [25]:
# merge together cell_raw_file_meta_df with some other cell metadata from the imported csv

cell_meta_df_small = cell_meta_df[['cell_id', 'expt_date', 'layer_name', 'recorder_name', 'full_path']]
cell_final_raw_meta_df = cell_meta_df_small.merge(cell_raw_file_meta_df, on='cell_id', how='left')

In [27]:
cell_final_raw_meta_df.sort_values(by = ['expt_date'])

Unnamed: 0,cell_id,expt_date,layer_name,recorder_name,full_path,abf_version,num_sweeps,resp_chan,resp_gain,resp_sampling_rate,...,rmp_val,stim_amp_vec,stim_chan,stim_duration,stim_end_time,stim_gain,stim_path,stim_sampling_rate,stim_start_time,valid_stim
46,13n05011.abf,2013-11-05,L5,Homeira,/Users/stripathy/Downloads/valiante_lab_ephys_...,1.8.3.0,15.0,0.0,20.00,10000.0,...,-54.937742,"[-400.0, -350.0, -300.0, -250.0, -200.0, -150....",0.0,0.6,0.73210,1000.0,/Users/stripathy/Downloads/valiante_lab_ephys_...,10000.0,0.1322,True
47,13n21007.abf,2013-11-21,L5,Homeira,/Users/stripathy/Downloads/valiante_lab_ephys_...,1.8.3.0,15.0,0.0,50.00,10000.0,...,-59.158322,"[-400.0, -350.0, -300.0, -250.0, -200.0, -150....",0.0,0.6,0.73210,1000.0,/Users/stripathy/Downloads/valiante_lab_ephys_...,10000.0,0.1322,True
40,13d02049.abf,2013-12-02,L5,Homeira,/Users/stripathy/Downloads/valiante_lab_ephys_...,1.8.3.0,15.0,0.0,20.00,10000.0,...,-57.000000,"[-400.0, -350.0, -300.0, -250.0, -200.0, -150....",0.0,0.6,0.76470,1000.0,/Users/stripathy/Downloads/valiante_lab_ephys_...,10000.0,0.1648,True
39,13d02022.abf,2013-12-02,L5,Homeira,/Users/stripathy/Downloads/valiante_lab_ephys_...,1.8.3.0,15.0,0.0,20.00,10000.0,...,-64.428708,"[-400.0, -350.0, -300.0, -250.0, -200.0, -150....",0.0,0.6,0.76470,1000.0,/Users/stripathy/Downloads/valiante_lab_ephys_...,10000.0,0.1648,True
38,13d02004.abf,2013-12-02,L5,Homeira,/Users/stripathy/Downloads/valiante_lab_ephys_...,1.8.3.0,15.0,0.0,20.00,10000.0,...,-59.069821,"[-400.0, -350.0, -300.0, -250.0, -200.0, -150....",0.0,0.6,0.76470,1000.0,/Users/stripathy/Downloads/valiante_lab_ephys_...,10000.0,0.1648,True
45,13d03040.abf,2013-12-03,L5,Homeira,/Users/stripathy/Downloads/valiante_lab_ephys_...,1.8.3.0,15.0,0.0,20.00,10000.0,...,-60.000000,"[-400.0, -350.0, -300.0, -250.0, -200.0, -150....",0.0,0.6,0.76470,1000.0,/Users/stripathy/Downloads/valiante_lab_ephys_...,10000.0,0.1648,True
44,13d03029.abf,2013-12-03,L5,Homeira,/Users/stripathy/Downloads/valiante_lab_ephys_...,1.8.3.0,15.0,0.0,20.00,10000.0,...,-56.463620,"[-400.0, -350.0, -300.0, -250.0, -200.0, -150....",0.0,0.6,0.76470,1000.0,/Users/stripathy/Downloads/valiante_lab_ephys_...,10000.0,0.1648,True
43,13d03008.abf,2013-12-03,L5,Homeira,/Users/stripathy/Downloads/valiante_lab_ephys_...,1.8.3.0,15.0,0.0,20.00,10000.0,...,-59.320066,"[-400.0, -350.0, -300.0, -250.0, -200.0, -150....",0.0,0.6,0.76470,1000.0,/Users/stripathy/Downloads/valiante_lab_ephys_...,10000.0,0.1648,True
42,13d03007.abf,2013-12-03,L5,Homeira,/Users/stripathy/Downloads/valiante_lab_ephys_...,1.8.3.0,15.0,0.0,20.00,10000.0,...,-56.463620,"[-400.0, -350.0, -300.0, -250.0, -200.0, -150....",0.0,0.6,0.76470,1000.0,/Users/stripathy/Downloads/valiante_lab_ephys_...,10000.0,0.1648,True
41,13d03005.abf,2013-12-03,L5,Homeira,/Users/stripathy/Downloads/valiante_lab_ephys_...,1.8.3.0,15.0,0.0,20.00,10000.0,...,-63.714597,"[-400.0, -350.0, -300.0, -250.0, -200.0, -150....",0.0,0.6,0.76470,1000.0,/Users/stripathy/Downloads/valiante_lab_ephys_...,10000.0,0.1648,True
