## Generating summary tables

In [1]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from data_manager import DataManager
import transport_signal_processing as tsp

### Measurements and segments summary

In [2]:
# parameters
path = "K238A_25_1MKCl10mMHEPESpH7p8/MP446_P3_2/51p28uM/*"

# define session directory
session_name = path.replace('*','+').replace('/','-')
session_path = os.path.join("results", session_name)
if not os.path.exists(session_path):
    os.makedirs(session_path)
print("Results save path: {}".format(session_path))

# setup database connector
sigman = DataManager('database', safe=False)

# load informations
mdf = pd.DataFrame(sigman.load_info(path, 'm*'))
sdf = pd.DataFrame(sigman.load_info(path, 's*')).dropna().sort_values("N_cores", ascending=False)

# save to tables
mdf.to_csv(os.path.join(session_path, f"measurements.csv"))
sdf.to_csv(os.path.join(session_path, f"segments.csv"))

# debug display
display(mdf)
display(sdf)

Results save path: results/K238A_25_1MKCl10mMHEPESpH7p8-MP446_P3_2-51p28uM-+


Unnamed: 0,pore,temperature,buffer,analyte,analyte_quantity,voltage,date,recording,channel,count,filepath,pore_quantity,buffer_quantity,MODIFIED,dt
0,K238A,25,1MKCl10mMHEPESpH7p8,MP446_P3_2,51p28uM,100,221124,5,3,0,raw_data/Batch2/221124 R05 Pc_0p5uL Ac_51p28uM...,0p5uL,190uL,2022-11-25_13:01:56,5e-06
1,K238A,25,1MKCl10mMHEPESpH7p8,MP446_P3_2,51p28uM,100,221124,5,3,1,raw_data/Batch2/221124 R05 Pc_0p5uL Ac_51p28uM...,0p5uL,190uL,2022-11-25_13:02:08,5e-06
2,K238A,25,1MKCl10mMHEPESpH7p8,MP446_P3_2,51p28uM,100,221124,6,1,0,raw_data/Batch2/221124 R06 Pc_0p5uL Ac_51p28uM...,0p5uL,190uL,2022-11-25_13:02:20,5e-06
3,K238A,25,1MKCl10mMHEPESpH7p8,MP446_P3_2,51p28uM,100,221124,11,4,0,raw_data/Batch2/221124 R11 Pc_0p5uL Ac_51p28uM...,0p5uL,190uL,2022-11-25_13:02:26,5e-06
4,K238A,25,1MKCl10mMHEPESpH7p8,MP446_P3_2,51p28uM,100,221124,14,2,2,raw_data/Batch2/221124 R14 Pc_0p5uL Ac_51p28uM...,0p5uL,190uL,2022-11-25_13:02:34,5e-06
5,K238A,25,1MKCl10mMHEPESpH7p8,MP446_P3_2,51p28uM,100,221124,14,2,3,raw_data/Batch2/221124 R14 Pc_0p5uL Ac_51p28uM...,0p5uL,190uL,2022-11-25_13:02:46,5e-06
6,K238A,25,1MKCl10mMHEPESpH7p8,MP446_P3_2,51p28uM,100,221124,14,2,6,raw_data/Batch2/221124 R14 Pc_0p5uL Ac_51p28uM...,0p5uL,190uL,2022-11-25_13:02:58,5e-06
7,K238A,25,1MKCl10mMHEPESpH7p8,MP446_P3_2,51p28uM,100,221124,14,2,9,raw_data/Batch2/221124 R14 Pc_0p5uL Ac_51p28uM...,0p5uL,190uL,2022-11-25_13:03:10,5e-06
8,K238A,25,1MKCl10mMHEPESpH7p8,MP446_P3_2,51p28uM,100,221124,14,2,12,raw_data/Batch2/221124 R14 Pc_0p5uL Ac_51p28uM...,0p5uL,190uL,2022-11-25_13:03:21,5e-06
9,K238A,25,1MKCl10mMHEPESpH7p8,MP446_P3_2,51p28uM,100,221124,14,2,17,raw_data/Batch2/221124 R14 Pc_0p5uL Ac_51p28uM...,0p5uL,190uL,2022-11-25_13:03:36,5e-06


Unnamed: 0,pore,temperature,buffer,analyte,analyte_quantity,voltage,date,recording,channel,count,...,segment_range,segment_duration,MODIFIED,mI_open,sI_open,N_events,N_cores,N_reduced,selected,ratio_sel
35,K238A,25,1MKCl10mMHEPESpH7p8,MP446_P3_2,51p28uM,160,221124,14,2,15,...,"[0, 59999999]",299.999995,2022-11-25_13:15:18,111.130726,8.744505,12488,6509,6509,1,0.215855
23,K238A,25,1MKCl10mMHEPESpH7p8,MP446_P3_2,51p28uM,130,221124,14,2,13,...,"[0, 60199999]",300.999995,2022-11-25_13:15:17,86.759225,8.55851,5256,2812,2812,1,0.420341
33,K238A,25,1MKCl10mMHEPESpH7p8,MP446_P3_2,51p28uM,160,221124,14,2,11,...,"[0, 59999999]",299.999995,2022-11-25_13:15:18,103.324598,12.151452,6063,2616,2616,1,0.830657
32,K238A,25,1MKCl10mMHEPESpH7p8,MP446_P3_2,51p28uM,160,221124,14,2,8,...,"[0, 59999999]",299.999995,2022-11-25_13:15:18,111.390529,8.369764,8245,2504,2504,1,0.793131
22,K238A,25,1MKCl10mMHEPESpH7p8,MP446_P3_2,51p28uM,130,221124,14,2,10,...,"[0, 59999999]",299.999995,2022-11-25_13:15:17,81.513896,9.589626,3979,2373,2373,1,0.753898
34,K238A,25,1MKCl10mMHEPESpH7p8,MP446_P3_2,51p28uM,160,221124,14,2,14,...,"[0, 59999999]",299.999995,2022-11-25_13:15:18,111.571166,7.58924,7967,1936,1936,1,0.646694
28,K238A,25,1MKCl10mMHEPESpH7p8,MP446_P3_2,51p28uM,130,221124,15,2,1,...,"[0, 59999999]",299.999995,2022-11-25_13:15:17,94.405801,7.659753,6590,1828,1828,1,0.84628
31,K238A,25,1MKCl10mMHEPESpH7p8,MP446_P3_2,51p28uM,160,221124,14,2,5,...,"[0, 60199999]",300.999995,2022-11-25_13:15:18,112.646192,7.64964,8803,1715,1715,1,0.814577
24,K238A,25,1MKCl10mMHEPESpH7p8,MP446_P3_2,51p28uM,130,221124,14,2,16,...,"[0, 59999999]",299.999995,2022-11-25_13:15:17,88.525986,8.236801,4908,1609,1609,1,0.678682
29,K238A,25,1MKCl10mMHEPESpH7p8,MP446_P3_2,51p28uM,130,221124,15,2,3,...,"[0, 60199999]",300.999995,2022-11-25_13:15:17,93.293394,7.415502,7111,1587,1587,1,0.909263


### Events statistics summary

In [3]:
# get all session segments info
sinfo_l = [dict(r) for _, r in sdf.iterrows()]

# load data
selected, _ = tsp.utils.load_segments_data(sigman, sinfo_l, "selected")
stats, ids = tsp.utils.load_segments_data(sigman, sinfo_l, "stats")
stats = np.array(stats)

# pack data
data = {
    "sid": sdf.iloc[ids]["sid"].values,
    "dwell time [s]": stats[:,0],
    "mean current [pA]": stats[:,1],
    "standard deviation [pA]": stats[:,2],
    "skewness": stats[:,3],
    "kurtosis": stats[:,4],
    "mean open pore current [pA]": sdf.iloc[ids]["mI_open"].values,
    "standard deviation open pore current [pA]": sdf.iloc[ids]["sI_open"].values,
    "rel standard deviation event/open pore [pA]": stats[:,2] / sdf.iloc[ids]["sI_open"].values,
    "selected": np.array(selected).astype(int),
}

# create dataframe
df = pd.DataFrame(data)

# save results
df.to_csv(os.path.join(session_path, f"events_summary.csv"))

print("Total number of events is "+str((sdf.ratio_sel*sdf.N_cores*sdf.selected).sum())+" events")
print("Total time of open pore is "+str(round(((sdf.segment_duration*sdf.selected).sum())/60,2))+" minutes")

# debug display
display(df)

Total number of events is 36898.0 events
Total time of open pore is 184.23 minutes


Unnamed: 0,sid,dwell time [s],mean current [pA],standard deviation [pA],skewness,kurtosis,mean open pore current [pA],standard deviation open pore current [pA],rel standard deviation event/open pore [pA],selected
0,0,0.000450,54.736763,24.516295,-0.057058,-1.348565,111.130726,8.744505,2.803623,1
1,0,0.000615,15.581785,5.870998,-0.630369,-1.120963,111.130726,8.744505,0.671393,1
2,0,0.000580,19.510910,1.870457,-0.415407,-1.571708,111.130726,8.744505,0.213901,1
3,0,0.001345,8.592671,4.706817,-0.237097,-1.430061,111.130726,8.744505,0.538260,1
4,0,0.000325,21.260223,0.522873,0.073686,-1.452942,111.130726,8.744505,0.059794,1
...,...,...,...,...,...,...,...,...,...,...
50071,0,0.406610,-1.588244,15.741625,-9.688005,100.989690,59.890898,7.807668,2.016175,0
50072,0,0.536690,-1.243718,13.749412,-11.063471,133.581005,59.890898,7.807668,1.761014,0
50073,0,0.354735,-1.640043,16.864634,-9.012293,87.439577,59.890898,7.807668,2.160009,0
50074,0,0.390755,-1.505497,16.086683,-9.462144,96.464877,59.890898,7.807668,2.060370,0


In [4]:
sdf['eps'] = (sdf.ratio_sel*sdf.N_cores*sdf.selected)/(sdf.segment_duration*sdf.selected)
sdf['condition'] = sdf.apply(lambda x: "{}-{}-{}".format(x['pore'], x['analyte'], x['voltage']), axis=1)
pd.DataFrame({'mean_eps':sdf.groupby('condition').mean()['eps'], 'std_eps':sdf.groupby('condition').std()['eps']})

Unnamed: 0_level_0,mean_eps,std_eps
condition,Unnamed: 1_level_1,Unnamed: 2_level_1
K238A-MP446_P3_2-100,2.279119,0.670298
K238A-MP446_P3_2-130,3.791458,1.164129
K238A-MP446_P3_2-160,4.950224,1.267284
