In [89]:
import pandas as pd
import numpy as np
from math import floor

In [90]:
df = pd.read_csv('new_accuracy_table.csv')

In [91]:
df.dropna(subset=['gated_count'], inplace=True)

In [92]:
df.at[:,'gated_count'] = np.array((np.vectorize(int))(df['gated_count']), dtype='int')

In [93]:
def fix_input(num: float) -> str:
    if pd.isna(num):
        return "-"
    n = floor(num)
    return f"{n:02d}"

fix_input_column = np.vectorize(fix_input)

In [94]:
df.at[:,'input'] = fix_input_column(df['input'])

In [95]:
df.at[:, "gate"] = np.where(pd.isna(df['gate']), df["strain_name"], df["gate"])

In [96]:
df.head()

Unnamed: 0,lab_id,strain_name,plate_id,well,count,count_live,experiment_id,filename,gate,gated_count,...,std_correct_low_threshold,std_correct_low_threshold_live,std_correct_threshold,std_correct_threshold_live,std_log_gfp,std_log_gfp_gated,std_log_gfp_gated_live,std_log_gfp_live,threshold,threshold_live
0,r1c5va879uaex_r1c639xp952g4,NOR-00-Control,0,C12,,,,,NOR-00-Control,25743,...,,,,,,0.286968,,,,
1,r1c5va879uaex_r1c639xp952g4,WT-Live-Control,0,A12,,,,,WT-Live-Control,16591,...,,,,,,0.469159,,,,
2,r1c5va879uaex_r1c639xp952g4,https://hub.sd2e.org/user/sd2e/design/UWBF_169...,0,B03,28297.0,24410.0,,/work/05202/dbryce/xplan-reactor/fcs/YeastGate...,XOR,24715,...,0.002825,0.002889,0.002825,0.002889,0.981708,0.39589,0.395018,0.93442,5.257244,5.284602
3,r1c5va879uaex_r1c639xp952g4,https://hub.sd2e.org/user/sd2e/design/UWBF_169...,0,D03,27568.0,23178.0,,/work/05202/dbryce/xplan-reactor/fcs/YeastGate...,XOR,23737,...,0.002714,0.002791,0.002714,0.002791,1.035838,0.422622,0.42167,0.980817,5.257244,5.284602
4,r1c5va879uaex_r1c639xp952g4,https://hub.sd2e.org/user/sd2e/design/UWBF_169...,0,A10,29862.0,15228.0,,/work/05202/dbryce/xplan-reactor/fcs/YeastGate...,XOR,25483,...,0.002156,0.003665,0.002156,0.003665,0.872558,0.327265,0.327281,0.928051,5.257244,5.284602


In [97]:
df.input.unique()

array(['-', '11', '10', '01', '00'], dtype=object)

In [98]:
df.columns

Index(['lab_id', 'strain_name', 'plate_id', 'well', 'count', 'count_live',
       'experiment_id', 'filename', 'gate', 'gated_count', 'gated_live_count',
       'growth_media_1', 'growth_media_2', 'id', 'inc_temp', 'inc_time_1',
       'inc_time_2', 'index', 'input', 'lab', 'mean_correct_classifier',
       'mean_correct_classifier_live', 'mean_correct_high_classifier',
       'mean_correct_high_classifier_live', 'mean_correct_high_threshold',
       'mean_correct_high_threshold_live', 'mean_correct_low_classifier',
       'mean_correct_low_classifier_live', 'mean_correct_low_threshold',
       'mean_correct_low_threshold_live', 'mean_correct_threshold',
       'mean_correct_threshold_live', 'mean_log_gfp', 'mean_log_gfp_gated',
       'mean_log_gfp_gated_live', 'mean_log_gfp_live', 'media', 'od',
       'od_cutoff', 'output', 'plan', 'replicate', 'source_container',
       'std_correct_classifier', 'std_correct_classifier_live',
       'std_correct_high_classifier', 'std_correct_high_

In [99]:
df = df[['lab_id', 'well', 'count', 'gate', 'input', 'output', 'gated_count', 'growth_media_1', 'inc_temp']]

In [100]:
df.shape[0]

7351

In [101]:
df[df['gated_count'] >= 10_000].shape[0]

7351

In [102]:
print("{:,d}".format(df[df['gated_count'] >= 10_000]['gated_count'].sum()))

145,966,885


In [122]:
all_counts = \
    df[df['gated_count'] >= 10_000][['gate', 'input', 'gated_count']]\
         .groupby(['gate', 'input'])\
         .agg(['sum', 'count'])\
         .rename(columns={'gated_count': 'Gated count', 'sum': 'events', 'count': 'replicates'})

In [137]:
all_counts.loc[:, ('Gated count', 'events')] = all_counts.loc[:, ('Gated count', 'events')].astype(int).apply(lambda x: "{:,}".format(x))

In [123]:
all_counts.reset_index(drop=False)['gate'].unique()

array(['AND', 'NAND', 'NOR', 'NOR-00-Control', 'OR', 'WT-Live-Control',
       'XNOR', 'XOR'], dtype=object)

In [138]:
all_counts.query("gate not in ['NOR-00-Control', 'WT-Live-Control']")

Unnamed: 0_level_0,Unnamed: 1_level_0,Gated count,Gated count
Unnamed: 0_level_1,Unnamed: 1_level_1,events,replicates
gate,input,Unnamed: 2_level_2,Unnamed: 3_level_2
AND,0,4259281,257
AND,1,4459111,246
AND,10,4044177,234
AND,11,4791203,222
NAND,0,7402180,345
NAND,1,7412703,352
NAND,10,6778828,316
NAND,11,7308418,337
NOR,0,8573805,368
NOR,1,5913758,317


In [150]:
np.vectorize(lambda x: int(x.replace(",","")))(all_counts.query("gate not in ['NOR-00-Control', 'WT-Live-Control']")[('Gated count', 'events')]).sum()

142852659

In [151]:
all_counts.query("gate not in ['NOR-00-Control', 'WT-Live-Control']")[('Gated count', 'replicates')].sum()

7189

In [139]:
all_counts.query("gate not in ['NOR-00-Control', 'WT-Live-Control']").to_latex('gate_counts_cp1.tex')

In [140]:
all_counts.query("gate in ['NOR-00-Control', 'WT-Live-Control']").to_latex('control_counts_cp1.tex')

In [152]:
np.vectorize(lambda x: int(x.replace(",","")))(all_counts.query("gate in ['NOR-00-Control', 'WT-Live-Control']")[('Gated count', 'events')]).sum()

3114226