In [5]:
import pyabf
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import glob

In [6]:
# these are a set of convenience functions for working with Homeira and Lihua's files 
from scripts.util_functions import guess_response_gain, get_stim_gain, get_stim_info, get_stim_dict

In [7]:
# load the sheets containing gains and offsets provided by homeira
# this is needed because we need to know what the RMP values are per cell

summary_table_csv = '/Users/stripathy/rstudio_projects/valiante_ih/summary_tables/gain_offset_info_merged.csv'
#excel_file = file_base_base_path + 'valiante_lab_ephys_mar_2020/L23/Homeira/Total2Homeira-Lastversion_March 10_2020.xlsx'
cell_info = pd.read_csv(open(summary_table_csv, 'rb'))  
cell_info.shape

(212, 12)

In [8]:
# define directories containing abf files from Homeira and Lihua's experiments

### change the directory paths below to where the files are on your local directory
file_base_base_path = '/Users/stripathy/Downloads/'
#file_base_path = 'valiante_lab_ephys_mar_2020/'
file_base_path = ''

path = file_base_base_path + file_base_path

# get all abf files in directories
files = [f for f in glob.glob(path + "**/*.abf", recursive=True)]



In [9]:
import os
print(os.path.basename(files[0]))

# traverse directories and start keeping track of important metadata
files_list = list()
for f in files:
    base_file_name = os.path.basename(f)
    ret_dict = {'cell_id' : base_file_name, 'full_path' : f}
    files_list.append(ret_dict)
    
file_dict = pd.DataFrame(files_list)
len(file_dict)

# drop file duplicates
file_dict = file_dict.drop_duplicates(subset=['cell_id'])
len(file_dict)
#file_dict.sort_values(by = ['abf_version'])

c228.abf


1715

In [10]:
# merge the metadata and file info pandas data frames
cell_meta_df = cell_info.merge(file_dict, on='cell_id', how='left')
cell_meta_df = cell_meta_df.drop_duplicates(subset=['cell_id'])
# cell_meta_df = cell_meta_df.merge(gain_info_df, on='cell_id', how='left')

In [11]:
cell_meta_df.shape

(212, 13)

In [12]:
# assign rmp values to those with missing rmps
def get_v_baseline_from_fn(file_path, use_sweep = 0):
    abf_file = pyabf.ABF(file_path)
    abf_file.setSweep(use_sweep, channel=0)
    v_baseline = abf_file.sweepY[0]
    if v_baseline > -50 and use_sweep < 5:
        use_sweep = use_sweep + 1
        return get_v_baseline_from_fn(file_path, use_sweep = use_sweep)
    return v_baseline

null_rmp_indices = cell_meta_df.rmp.isnull()
new_rmp_values = [get_v_baseline_from_fn(p) for p in cell_meta_df[null_rmp_indices]['full_path']]
cell_meta_df.loc[null_rmp_indices, 'rmp'] = new_rmp_values

In [13]:
# test the usage of the convience functions defined above on a single abf file

abf_file_name = '2019_09_03_0023.abf'
curr_row = cell_meta_df.loc[cell_meta_df['cell_id'] == abf_file_name]
print(curr_row['full_path'])
get_stim_dict(curr_row, cell_meta_df, stim_name = 'sweepY')



186    /Users/stripathy/Downloads/homeira_new_intrins...
Name: full_path, dtype: object


{'stim_chan': 2,
 'stim_gain': 1,
 'stim_path': '/Users/stripathy/Downloads/homeira_new_intrinsic_files/OneDrive_1_5-27-2020 (2)/Homeira/Layer 2_Homeira_last version/2019_09_03_0023.abf',
 'valid_stim': True,
 'stim_amp_vec': array([-44., -38., -32., ...,  57.,  64.,  70.]),
 'stim_duration': 0.6118733105523537,
 'stim_start_time': 0.09216096292140562,
 'stim_end_time': 0.7040852756668537,
 'num_sweeps': 19,
 'stim_sampling_rate': 19607}

In [14]:
# test guess response gain function
abf_file_name = '2019_09_03_0036.abf'

curr_row = cell_meta_df.loc[cell_meta_df['cell_id'] == abf_file_name]
fn = curr_row['full_path'].values[0]
print(fn)
print(curr_row.keys())
rmp = curr_row['rmp'].values[0]
offset = curr_row['resp_offset'].values[0]
print(offset)
print(rmp)
abf = pyabf.ABF(fn) # loads in the abf file
print(abf)
abf.setSweep(0, channel=0)
resp_vec = abf.sweepY
print(resp_vec)
guess_response_gain(resp_vec, rmp, offset)

/Users/stripathy/Downloads/homeira_new_intrinsic_files/OneDrive_1_5-27-2020 (2)/Homeira/Layer 2_Homeira_last version/2019_09_03_0036.abf
Index(['Unnamed: 0', 'cell_id', 'clampfit_gain', 'resp_offset',
       'resp_channel_name', 'command_channel_name', 'expt_date', 'layer_name',
       'cell_type', 'recorder_name', 'subject_id', 'rmp', 'full_path'],
      dtype='object')
0.0
-65.8
ABF (version 2.6.0.0) with 3 channels (mV, mV, pA), sampled at 19.607 kHz, containing 14 sweeps, having 1 tag (C3, L2-RMP -65.8 mv), with a total length of 0.66 minutes, recorded with protocol "I-V curve ,-400 pA".
[-501.1902 -501.0986 -501.1902 ... -500.1526 -500.0915 -500.0915]


(0.25, 59.49753875732422)

In [15]:
# sweep through all listed abf files and extract all relevant metadata required to parse raw data

path = file_base_base_path + file_base_path

files = [f for f in glob.glob(path + "**/*.abf", recursive=True)]

#lihua_stim_abf = 
dict_list = list()

for abf_file in cell_meta_df.cell_id:
    print(abf_file)
    row = cell_meta_df.loc[cell_meta_df['cell_id'] == abf_file]
    
    f = row['cell_id'].values[0]
    #
    fn = row['full_path'].values[0]
    #print(fn)
    resp_chan = 0
    stim_chan = 0
    stim_info_dict = {}
    
    cell_rmp = row['rmp'].values[0]
    resp_offset = row['resp_offset'].values[0]
    recorder_name = row['recorder_name'].values[0]

    
    abf = pyabf.ABF(fn) # loads in the abf file
    num_sweeps = abf.sweepCount
    abf_tag = abf.tagComments
    if abf_tag:
        abf_tag = abf_tag[0]
    else:
        abf_tag = None
    #print(recorder_name)
    #print(num_sweeps)
    
    if len(abf.channelList) == 3:
        print(abf_file)
        stim_chan = 2
        abf.setSweep(0, channel=stim_chan)
        stim_vec = abf.sweepY
        
        resp_chan = 1
        abf.setSweep(0, channel=resp_chan)
        resp_vec = abf.sweepY
        # try to figure out gain on response channel by comparing to RMP
        (best_gain, rmp_abs_error) = guess_response_gain(resp_vec, cell_rmp, resp_offset)
        
        stim_gain = 10
        valid_stim = True
        stim_temp_dict = {'stim_chan' : stim_chan, 'stim_gain' : stim_gain, 'stim_path' : fn, 'valid_stim' : valid_stim}
        stim_dict = get_stim_info(abf, stim_channel_num = stim_chan, stim_gain = stim_gain, stim_name = 'sweepY')
        stim_dict.update(stim_temp_dict)
        stim_name = 'sweepY'   
        print(stim_dict)
    elif len(abf.channelList) == 4:

        stim_chan = 3
        abf.setSweep(0, channel=stim_chan)
        stim_vec = abf.sweepY
        
        resp_chan = 2
        abf.setSweep(0, channel=resp_chan)
        resp_vec = abf.sweepY
        # try to figure out gain on response channel by comparing to RMP
        (best_gain, rmp_abs_error) = guess_response_gain(resp_vec, cell_rmp, resp_offset)
        
        stim_gain = .2
        valid_stim = True
        stim_temp_dict = {'stim_chan' : stim_chan, 'stim_gain' : stim_gain, 'stim_path' : fn, 'valid_stim' : valid_stim}
        stim_dict = get_stim_info(abf, stim_channel_num = stim_chan, stim_gain = stim_gain, stim_name = 'sweepY')
        stim_dict.update(stim_temp_dict)
        stim_name = 'sweepY'   
    else:
        # figure out response channel
        abf.setSweep(0, channel=resp_chan)
        resp_vec = abf.sweepY
        resp_sampling_rate = int(round(1/(abf.sweepX[2] - abf.sweepX[1]))) # manually calculate the sampling rate

        # try to figure out gain on response channel by comparing to RMP
        (best_gain, rmp_abs_error) = guess_response_gain(resp_vec, cell_rmp, resp_offset)

        stim_name = 'sweepC'

        stim_dict = get_stim_dict(row, cell_meta_df, stim_name)
    
    # if in the odd case the number of sweeps doesn't match between resp vs stim file, then make them match
    if num_sweeps != stim_dict['num_sweeps']:
        print(abf_file)
        stim_dict['num_sweeps'] = num_sweeps
        stim_dict['stim_amp_vec'] = stim_dict['stim_amp_vec'][0:num_sweeps]
        
    ret_dict = {'cell_id' : f, 'resp_chan' : resp_chan, 
                    'resp_gain' : best_gain, 'rmp_error' : rmp_abs_error, 'abf_tag' : abf_tag,
                'rmp_val' : cell_rmp, 'resp_offset' : resp_offset,
                'abf_version' : abf.abfVersionString, 'resp_sampling_rate' : resp_sampling_rate, 'stim_name' : stim_name, 
               'file_time' : abf.abfDateTime}
    #ret_dict = dict(ret_dict.items() + stim_info_dict.items())
    ret_dict.update(stim_dict)
    #print(ret_dict)

    dict_list.append(ret_dict)
    

cell_raw_file_meta_df = pd.DataFrame(dict_list)



13n05011.abf
13n21007.abf
13d02004.abf
13d02022.abf
13d02049.abf
13d03005.abf
13d03007.abf
13d03008.abf
13d03029.abf
13d03040.abf
20131211_600_1_0002.abf
20131211_600_1_0042.abf
20131211_600_1_0066.abf
20131211_600_1_0067.abf
20131211_600_1_0081.abf
20131211_600_1_0108.abf
20131211_600_1_0160.abf
20131211_600_1_0184.abf
20131211_600_1_0190.abf
20140127_600_1_0118.abf
20140127_600_1_0158.abf
14304300.abf
14306300.abf
14317300.abf
14317320.abf
14317348.abf
14317333.abf
14318308.abf
14408304.abf
14408304.abf
14424000.abf
14424329.abf
14424335.abf
14515300.abf
14515323.abf
14515323.abf
14515374.abf
14515399.abf
14520300.abf
14520332.abf
14605300.abf
14605325.abf
14605338.abf
14617300.abf
14617312.abf
14624300.abf
14624325.abf
14624359.abf
14624371.abf
14626300.abf
14715300.abf
14715334.abf
14911303.abf
14918300.abf
14918302.abf
14918344.abf
14918371.abf
14918305.abf
14n03328.abf
14n03301.abf
14n03358.abf
14n04322.abf
14n04362.abf
14n10459.abf
14n10301.abf
14d02000.abf
14d02055.abf
14d02065

In [16]:
cell_raw_file_meta_df

Unnamed: 0,abf_tag,abf_version,cell_id,file_time,num_sweeps,resp_chan,resp_gain,resp_offset,resp_sampling_rate,rmp_error,...,stim_amp_vec,stim_chan,stim_duration,stim_end_time,stim_gain,stim_name,stim_path,stim_sampling_rate,stim_start_time,valid_stim
0,,1.8.3.0,13n05011.abf,2013-11-05 17:06:16.448,15,0,20.00,0.0,10000,0.158689,...,"[-400.0, -350.0, -300.0, -250.0, -200.0, -150....",0,0.600000,0.732100,1000.0,sweepC,/Users/stripathy/Downloads/homeira_new_intrins...,10000,0.132200,True
1,,1.8.3.0,13n21007.abf,2013-11-21 14:30:46.356,15,0,50.00,0.0,10000,3.677365,...,"[-400.0, -350.0, -300.0, -250.0, -200.0, -150....",0,0.600000,0.732100,1000.0,sweepC,/Users/stripathy/Downloads/homeira_new_intrins...,10000,0.132200,True
2,,1.8.3.0,13d02004.abf,2013-12-02 15:47:42.278,15,0,20.00,0.0,10000,2.288816,...,"[-400.0, -350.0, -300.0, -250.0, -200.0, -150....",0,0.600000,0.764700,1000.0,sweepC,/Users/stripathy/Downloads/homeira_new_intrins...,10000,0.164800,True
3,,1.8.3.0,13d02022.abf,2013-12-02 17:09:07.433,15,0,20.00,0.0,10000,1.049802,...,"[-400.0, -350.0, -300.0, -250.0, -200.0, -150....",0,0.600000,0.764700,1000.0,sweepC,/Users/stripathy/Downloads/homeira_new_intrins...,10000,0.164800,True
4,gain50 resting -57mv,1.8.3.0,13d02049.abf,2013-12-02 18:21:58.566,15,0,20.00,0.0,10000,1.789062,...,"[-400.0, -350.0, -300.0, -250.0, -200.0, -150....",0,0.600000,0.764700,1000.0,sweepC,/Users/stripathy/Downloads/homeira_new_intrins...,10000,0.164800,True
5,,1.8.3.0,13d03005.abf,2013-12-03 12:13:09.804,15,0,20.00,0.0,10000,2.984616,...,"[-400.0, -350.0, -300.0, -250.0, -200.0, -150....",0,0.600000,0.764700,1000.0,sweepC,/Users/stripathy/Downloads/homeira_new_intrins...,10000,0.164800,True
6,,1.8.3.0,13d03007.abf,2013-12-03 12:59:10.191,15,0,20.00,0.0,10000,3.710935,...,"[-400.0, -350.0, -300.0, -250.0, -200.0, -150....",0,0.600000,0.764700,1000.0,sweepC,/Users/stripathy/Downloads/homeira_new_intrins...,10000,0.164800,True
7,,1.8.3.0,13d03008.abf,2013-12-03 14:50:32.511,15,0,20.00,0.0,10000,3.802487,...,"[-400.0, -350.0, -300.0, -250.0, -200.0, -150....",0,0.600000,0.764700,1000.0,sweepC,/Users/stripathy/Downloads/homeira_new_intrins...,10000,0.164800,True
8,,1.8.3.0,13d03029.abf,2013-12-03 12:59:10.191,15,0,20.00,0.0,10000,3.710935,...,"[-400.0, -350.0, -300.0, -250.0, -200.0, -150....",0,0.600000,0.764700,1000.0,sweepC,/Users/stripathy/Downloads/homeira_new_intrins...,10000,0.164800,True
9,resting -60mv gain 50,1.8.3.0,13d03040.abf,2013-12-03 13:29:28.194,15,0,20.00,0.0,10000,1.230469,...,"[-400.0, -350.0, -300.0, -250.0, -200.0, -150....",0,0.600000,0.764700,1000.0,sweepC,/Users/stripathy/Downloads/homeira_new_intrins...,10000,0.164800,True


In [17]:
# merge together cell_raw_file_meta_df with some other cell metadata from the imported csv

cell_meta_df_small = cell_meta_df[['cell_id', 'expt_date', 'layer_name', 'cell_type', 'recorder_name', 'full_path']]
cell_final_raw_meta_df = cell_meta_df_small.merge(cell_raw_file_meta_df, on='cell_id', how='left')

In [18]:
cell_final_raw_meta_df = cell_final_raw_meta_df.sort_values(by = ['expt_date'])

In [19]:
cell_final_raw_meta_df

Unnamed: 0,cell_id,expt_date,layer_name,cell_type,recorder_name,full_path,abf_tag,abf_version,file_time,num_sweeps,...,stim_amp_vec,stim_chan,stim_duration,stim_end_time,stim_gain,stim_name,stim_path,stim_sampling_rate,stim_start_time,valid_stim
0,13n05011.abf,2013-11-05,L5,Pyr,Homeira,/Users/stripathy/Downloads/homeira_new_intrins...,,1.8.3.0,2013-11-05 17:06:16.448,15,...,"[-400.0, -350.0, -300.0, -250.0, -200.0, -150....",0,0.600000,0.732100,1000.0,sweepC,/Users/stripathy/Downloads/homeira_new_intrins...,10000,0.132200,True
1,13n21007.abf,2013-11-21,L5,Pyr,Homeira,/Users/stripathy/Downloads/homeira_new_intrins...,,1.8.3.0,2013-11-21 14:30:46.356,15,...,"[-400.0, -350.0, -300.0, -250.0, -200.0, -150....",0,0.600000,0.732100,1000.0,sweepC,/Users/stripathy/Downloads/homeira_new_intrins...,10000,0.132200,True
2,13d02004.abf,2013-12-02,L5,Pyr,Homeira,/Users/stripathy/Downloads/homeira_new_intrins...,,1.8.3.0,2013-12-02 15:47:42.278,15,...,"[-400.0, -350.0, -300.0, -250.0, -200.0, -150....",0,0.600000,0.764700,1000.0,sweepC,/Users/stripathy/Downloads/homeira_new_intrins...,10000,0.164800,True
3,13d02022.abf,2013-12-02,L5,Pyr,Homeira,/Users/stripathy/Downloads/homeira_new_intrins...,,1.8.3.0,2013-12-02 17:09:07.433,15,...,"[-400.0, -350.0, -300.0, -250.0, -200.0, -150....",0,0.600000,0.764700,1000.0,sweepC,/Users/stripathy/Downloads/homeira_new_intrins...,10000,0.164800,True
4,13d02049.abf,2013-12-02,L5,Pyr,Homeira,/Users/stripathy/Downloads/homeira_new_intrins...,gain50 resting -57mv,1.8.3.0,2013-12-02 18:21:58.566,15,...,"[-400.0, -350.0, -300.0, -250.0, -200.0, -150....",0,0.600000,0.764700,1000.0,sweepC,/Users/stripathy/Downloads/homeira_new_intrins...,10000,0.164800,True
5,13d03005.abf,2013-12-03,L5,Pyr,Homeira,/Users/stripathy/Downloads/homeira_new_intrins...,,1.8.3.0,2013-12-03 12:13:09.804,15,...,"[-400.0, -350.0, -300.0, -250.0, -200.0, -150....",0,0.600000,0.764700,1000.0,sweepC,/Users/stripathy/Downloads/homeira_new_intrins...,10000,0.164800,True
6,13d03007.abf,2013-12-03,L5,Pyr,Homeira,/Users/stripathy/Downloads/homeira_new_intrins...,,1.8.3.0,2013-12-03 12:59:10.191,15,...,"[-400.0, -350.0, -300.0, -250.0, -200.0, -150....",0,0.600000,0.764700,1000.0,sweepC,/Users/stripathy/Downloads/homeira_new_intrins...,10000,0.164800,True
7,13d03008.abf,2013-12-03,L5,Pyr,Homeira,/Users/stripathy/Downloads/homeira_new_intrins...,,1.8.3.0,2013-12-03 14:50:32.511,15,...,"[-400.0, -350.0, -300.0, -250.0, -200.0, -150....",0,0.600000,0.764700,1000.0,sweepC,/Users/stripathy/Downloads/homeira_new_intrins...,10000,0.164800,True
8,13d03029.abf,2013-12-03,L5,Pyr,Homeira,/Users/stripathy/Downloads/homeira_new_intrins...,,1.8.3.0,2013-12-03 12:59:10.191,15,...,"[-400.0, -350.0, -300.0, -250.0, -200.0, -150....",0,0.600000,0.764700,1000.0,sweepC,/Users/stripathy/Downloads/homeira_new_intrins...,10000,0.164800,True
9,13d03040.abf,2013-12-03,L5,Pyr,Homeira,/Users/stripathy/Downloads/homeira_new_intrins...,resting -60mv gain 50,1.8.3.0,2013-12-03 13:29:28.194,15,...,"[-400.0, -350.0, -300.0, -250.0, -200.0, -150....",0,0.600000,0.764700,1000.0,sweepC,/Users/stripathy/Downloads/homeira_new_intrins...,10000,0.164800,True


In [20]:
# save a final csv that has the ouput of this metadata gathering process
csv_meta_save_path = 'output_files/cell_final_raw_meta_df.csv'
cell_final_raw_meta_df.to_csv(csv_meta_save_path)

In [21]:
df = cell_final_raw_meta_df.query('valid_stim == False')

In [21]:
df[['cell_id', 'abf_tag', 'rmp_error', 'resp_gain']]

Unnamed: 0,cell_id,abf_tag,rmp_error,resp_gain


In [20]:
df

Unnamed: 0,cell_id,expt_date,layer_name,cell_type,recorder_name,full_path,abf_tag,abf_version,num_sweeps,resp_chan,...,stim_amp_vec,stim_chan,stim_duration,stim_end_time,stim_gain,stim_name,stim_path,stim_sampling_rate,stim_start_time,valid_stim
