In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import os
import warnings

from glob import glob
from copy import deepcopy
from numbers import Number

import numpy as np
import pandas as pd

import data_helpers

In [3]:
DATA_DIR = os.path.expanduser("~/sd2e-projects/sd2e-project-14/xplan-reactor/data/transcriptic")
assert os.path.exists(DATA_DIR)

In [4]:
accuracy_table = pd.read_csv('accuracy_set.csv', index_col=0)

# Gate All Data

To see if absence of gating caused any of the anomalies we see in the model results, apply the gates to all of the data and save the results for later analysis.

## Check "table of contents"

First, check to make sure that the data in `accuracy_check.csv` covers the set of data in the filesystem.

In [5]:
all_files = glob(os.path.join(DATA_DIR, "r*_r*.csv"))
sorted(all_files)

['/work/05204/rpg/jupyter/sd2e-projects/sd2e-project-14/xplan-reactor/data/transcriptic/r1c5va879uaex_r1c639xp952g4.csv',
 '/work/05204/rpg/jupyter/sd2e-projects/sd2e-project-14/xplan-reactor/data/transcriptic/r1c5va9wyf3mx_r1c63bjzu5ruy.csv',
 '/work/05204/rpg/jupyter/sd2e-projects/sd2e-project-14/xplan-reactor/data/transcriptic/r1c5vab4q2yu9_r1c63c2e7z9hj.csv',
 '/work/05204/rpg/jupyter/sd2e-projects/sd2e-project-14/xplan-reactor/data/transcriptic/r1c5vac658fxn_r1c66qw595ydy.csv',
 '/work/05204/rpg/jupyter/sd2e-projects/sd2e-project-14/xplan-reactor/data/transcriptic/r1c5vad8u7ve2_r1c66q7cru28u.csv',
 '/work/05204/rpg/jupyter/sd2e-projects/sd2e-project-14/xplan-reactor/data/transcriptic/r1c5vaeb8vbt9_r1c66mfpj7guh.csv',
 '/work/05204/rpg/jupyter/sd2e-projects/sd2e-project-14/xplan-reactor/data/transcriptic/r1c7cppfr7yp6_r1c7jnv3pkbsj.csv',
 '/work/05204/rpg/jupyter/sd2e-projects/sd2e-project-14/xplan-reactor/data/transcriptic/r1c7cpqp2k6cw_r1c7jn9syrr8s.csv',
 '/work/05204/rpg/jupyte

In [6]:
print(accuracy_table.columns)
accuracy_table.head()['lab_id']

Index(['count', 'count_live', 'experiment_id', 'filename', 'gate',
       'growth_media_1', 'growth_media_2', 'id', 'inc_temp', 'inc_time_1',
       'inc_time_2', 'index', 'input', 'lab', 'lab_id',
       'mean_correct_classifier', 'mean_correct_classifier_live',
       'mean_correct_high_classifier', 'mean_correct_high_classifier_live',
       'mean_correct_high_threshold', 'mean_correct_high_threshold_live',
       'mean_correct_low_classifier', 'mean_correct_low_classifier_live',
       'mean_correct_low_threshold', 'mean_correct_low_threshold_live',
       'mean_correct_threshold', 'mean_correct_threshold_live', 'mean_log_gfp',
       'mean_log_gfp_live', 'media', 'od', 'od_cutoff', 'output', 'plan',
       'plate_id', 'replicate', 'source_container', 'std_correct_classifier',
       'std_correct_classifier_live', 'std_correct_high_classifier',
       'std_correct_high_classifier_live', 'std_correct_high_threshold',
       'std_correct_high_threshold_live', 'std_correct_low_classif

0    r1c7cpt3djxuj_r1c7fex29q6t2
1    r1c7cpt3djxuj_r1c7fex29q6t2
2    r1c7cpt3djxuj_r1c7fex29q6t2
3    r1c7cpt3djxuj_r1c7fex29q6t2
4    r1c7cpt3djxuj_r1c7fex29q6t2
Name: lab_id, dtype: object

In [7]:
all_files_in_table = set(accuracy_table['lab_id'])

In [8]:
all_files_in_dir = {os.path.splitext(os.path.basename(x))[0] for x in all_files}
all_files_in_dir

{'r1c5va879uaex_r1c639xp952g4',
 'r1c5va9wyf3mx_r1c63bjzu5ruy',
 'r1c5vab4q2yu9_r1c63c2e7z9hj',
 'r1c5vac658fxn_r1c66qw595ydy',
 'r1c5vad8u7ve2_r1c66q7cru28u',
 'r1c5vaeb8vbt9_r1c66mfpj7guh',
 'r1c7cppfr7yp6_r1c7jnv3pkbsj',
 'r1c7cpqp2k6cw_r1c7jn9syrr8s',
 'r1c7cprv7fe49_r1c7jmje3ebhc',
 'r1c7cpt3djxuj_r1c7fex29q6t2',
 'r1c7cpu9nj2bu_r1c7fg23qkzph',
 'r1c7cpvfzqprk_r1c7fbvba55db',
 'r1c84xvquwxth_r1c8ek7vcmdxh',
 'r1c84xwx57y95_r1c8ejz8jfg9z',
 'r1c84xy5frkf3_r1c8ejr9kezva',
 'r1c84xzc5dbuv_r1c8axrywctwb',
 'r1c84y2j7n7bu_r1c8aw59wfxy8',
 'r1c84y3r9yqxb_r1c8auwvzbgt3',
 'r1c8xx6qe9eea_r1c93enbf48t7',
 'r1c8xx7we38vw_r1c93cukfcxqp',
 'r1c8xx94j94u7_r1c93ajvdzxz5',
 'r1c8yx25rrtag_r1c96v2w8x5pq',
 'r1c8yydkumrkr_r1c96xsxw79c9',
 'r1c8yyg9gxbme_r1c96wn43pq45',
 'r1c9tpk5zrbsk_r1c9wpbvjszxe',
 'r1c9tpmcezpk2_r1c9wjnedp92f',
 'r1cbajphx4ekf_r1cbeec6cxn2t',
 'r1cbajqru7kbe_r1cbe9p9zu6dh',
 'r1cbajrxthe7y_r1cbdzkebbd46',
 'r1cbuszfskzjt_r1cc6ws9c3dh4',
 'r1cbut2pdjtwv_r1cc6qzwcd4hx',
 'r1cbut

In [9]:
all_files_in_table - all_files_in_dir

set()

In [10]:
len(accuracy_table['lab_id'].unique())

98

In [11]:
print(len(all_files_in_dir - all_files_in_table))

11


In [12]:
all_files_in_dir - all_files_in_table

{'r1df5qqzq7k6ss_r1dfjjydmudcfs',
 'r1df5qs7jb4fwq_r1dff64wwmq4gt',
 'r1dfxex6e5ekjv_r1dg94zgbvxe7r',
 'r1dfxeycahns9y_r1dg8u3z9yzuuq',
 'r1dfxezj7aae4s_r1dg5dthycwjb7',
 'r1dgppdqbd2mmk_r1dgwy63p8x5uz',
 'r1dgppew9w7j7p_r1dgwudtwth732',
 'r1dgppg46aeknn_r1dgwpywx6rtc8',
 'r1dj8mwev6qgtn_r1djfuj9qywhea',
 'r1dj8mxn3pcuf5_r1djfnchgugknq',
 'r1dj8myu92t47q_r1djfefyrxqj9m'}

In [13]:
accuracy_table.shape[0]

8686

In [14]:
! sha256sum accuracy_set.csv

f91cbdb4bcf4eff72981484fc5b8d56b733311ee88e5f6c575252136550745f9  accuracy_set.csv


# Now run through all the files in the tables, gate them, and compute the mean and SD

In [15]:
def collect_a_datafile(filename: str) -> pd.DataFrame:
    raw_df = pd.read_csv(filename, index_col=0)
    df = data_helpers.canonical_data_frame(raw_df)
    df = data_helpers.gate_dataframe(df)
    return df

In [16]:
exname = 'r1cft9hhnhjdb_r1cfwgjmz247y'
exfile = os.path.join(DATA_DIR, exname + '.csv')

In [17]:
def get_inc_temp(lab_id: str, acc_table: pd.DataFrame) -> str:
    sub = acc_table[acc_table['lab_id'] == lab_id]
    assert sub.shape[0] > 0
    it = set(sub['inc_temp'].unique())
    assert len(it) == 1
    it_val = it.pop()
    if isinstance(it_val, str):
        return it_val
    elif isinstance(it_val, Number):
        return f"warm_{int(it_val)}"

In [18]:
exdf = pd.read_csv(exfile, index_col=0)
print(exdf.columns)
exdf.head()

  has_raised = await self.run_ast_nodes(code_ast.body, cell_name,
  mask |= (ar1 == a)


Index(['inc_temp', 'inc_time_1', 'inc_time_2', 'plan', 'lab', 'growth_media_1',
       'growth_media_2', 'od_cutoff', 'source_container', 'lab_id', 'id',
       'strain_name', 'gate', 'input', 'od', 'filename', 'replicate', 'output',
       'media', 'plate_id', 'Time', 'FSC_A', 'SSC_A', 'BL1_A', 'RL1_A',
       'FSC_H', 'SSC_H', 'BL1_H', 'RL1_H', 'FSC_W', 'SSC_W', 'BL1_W', 'RL1_W',
       'index', 'live_no_stain', 'live'],
      dtype='object')


Unnamed: 0,inc_temp,inc_time_1,inc_time_2,plan,lab,growth_media_1,growth_media_2,od_cutoff,source_container,lab_id,...,SSC_H,BL1_H,RL1_H,FSC_W,SSC_W,BL1_W,RL1_W,index,live_no_stain,live
0,warm_30,18:hour,15:hour,2019_02_12_23_46_12,transcriptic,standard_media,standard_media,0.1,ct1cexrrstn83n,r1cft9hhnhjdb_r1cfwgjmz247y,...,1048575.0,3142.0,26075.0,1023.0,1023.0,0.0,29.0,0,0.0,0
1,warm_30,18:hour,15:hour,2019_02_12_23_46_12,transcriptic,standard_media,standard_media,0.1,ct1cexrrstn83n,r1cft9hhnhjdb_r1cfwgjmz247y,...,212221.0,489.0,101.0,50.0,88.0,0.0,0.0,1,1.0,0
2,warm_30,18:hour,15:hour,2019_02_12_23_46_12,transcriptic,standard_media,standard_media,0.1,ct1cexrrstn83n,r1cft9hhnhjdb_r1cfwgjmz247y,...,208320.0,508.0,99.0,45.0,56.0,0.0,0.0,2,1.0,1
3,warm_30,18:hour,15:hour,2019_02_12_23_46_12,transcriptic,standard_media,standard_media,0.1,ct1cexrrstn83n,r1cft9hhnhjdb_r1cfwgjmz247y,...,268477.0,649.0,85.0,38.0,71.0,0.0,0.0,3,1.0,0
4,warm_30,18:hour,15:hour,2019_02_12_23_46_12,transcriptic,standard_media,standard_media,0.1,ct1cexrrstn83n,r1cft9hhnhjdb_r1cfwgjmz247y,...,230036.0,640.0,108.0,45.0,69.0,0.0,0.0,4,0.0,0


In [19]:
df = data_helpers.canonical_data_frame(exdf)

In [20]:
print(df.columns.values)
df.index.names

['inc_temp' 'inc_time_1' 'inc_time_2' 'growth_media_1' 'growth_media_2'
 'od_cutoff' 'source_container' 'id' 'gate' 'input' 'filename' 'output'
 'media' 'Time' 'FSC_A' 'SSC_A' 'BL1_A' 'RL1_A' 'FSC_H' 'SSC_H' 'BL1_H'
 'RL1_H' 'FSC_W' 'SSC_W' 'BL1_W' 'RL1_W' 'index' 'live_no_stain' 'live'
 'inc_time_1_hrs']


FrozenList(['strain_name', 'inc_temp_degrees', 'inc_time_2_hrs', 'od', 'lab_id', 'plate_id', 'well', 'replicate', 'event'])

In [21]:
df['live'].unique()

array([0, 1])

In [22]:
df = data_helpers.gate_dataframe(df)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[key] = value
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_column(ilocs[0], value, pi)


In [23]:
df.loc[:, 'logGFP'] = np.log10(np.maximum(df['BL1_A'], 1))

In [24]:
# looks like there are multiple plates...
df.groupby(level=['strain_name', 'plate_id', 'well']).agg(mean_log_gfp_gated=('logGFP', 'mean'), std_log_gfp_gated=('logGFP', 'std'), gated_count=('logGFP', 'count')).head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,mean_log_gfp_gated,std_log_gfp_gated,gated_count
strain_name,plate_id,well,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
NOR-00-Control,2,C12,2.975619,0.244577,26028
WT-Live-Control,2,A12,1.654474,0.479079,18329
https://hub.sd2e.org/user/sd2e/design/UWBF_16968/1,2,A02,2.572431,0.33895,23662
https://hub.sd2e.org/user/sd2e/design/UWBF_16968/1,2,C11,2.697808,0.310978,23572
https://hub.sd2e.org/user/sd2e/design/UWBF_16968/1,2,D05,2.675767,0.299217,26500


In [25]:
# looks like there are multiple plates...
df[df['live'] == 1].groupby(level=['strain_name', 'plate_id', 'well']).agg(mean_log_gfp_gated_live=('logGFP', 'mean'), std_log_gfp_gated_live=('logGFP', 'std'), gated_live_count=('logGFP', 'count')).head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,mean_log_gfp_gated_live,std_log_gfp_gated_live,gated_live_count
strain_name,plate_id,well,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
NOR-00-Control,2,C12,2.97543,0.244508,25075
WT-Live-Control,2,A12,1.654746,0.47976,16650
https://hub.sd2e.org/user/sd2e/design/UWBF_16968/1,2,A02,2.571999,0.339017,14681
https://hub.sd2e.org/user/sd2e/design/UWBF_16968/1,2,C11,2.69765,0.310586,22717
https://hub.sd2e.org/user/sd2e/design/UWBF_16968/1,2,D05,2.673406,0.296941,9796


In [26]:
foo = data_helpers.compute_gated_scores(df)
foo.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,mean_log_gfp_gated,std_log_gfp_gated,gated_count,mean_log_gfp_gated_live,std_log_gfp_gated_live,gated_live_count
lab_id,strain_name,plate_id,well,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
r1cft9hhnhjdb_r1cfwgjmz247y,NOR-00-Control,2,C12,2.975619,0.244577,26028,2.97543,0.244508,25075
r1cft9hhnhjdb_r1cfwgjmz247y,WT-Live-Control,2,A12,1.654474,0.479079,18329,1.654746,0.47976,16650
r1cft9hhnhjdb_r1cfwgjmz247y,https://hub.sd2e.org/user/sd2e/design/UWBF_16968/1,2,A02,2.572431,0.33895,23662,2.571999,0.339017,14681
r1cft9hhnhjdb_r1cfwgjmz247y,https://hub.sd2e.org/user/sd2e/design/UWBF_16968/1,2,C11,2.697808,0.310978,23572,2.69765,0.310586,22717
r1cft9hhnhjdb_r1cfwgjmz247y,https://hub.sd2e.org/user/sd2e/design/UWBF_16968/1,2,D05,2.675767,0.299217,26500,2.673406,0.296941,9796


# Compute New scores for the gated and live-filtered wells

In [27]:
new_accuracy_table = deepcopy(accuracy_table)
data_helpers.df_create_well_column(new_accuracy_table)
data_helpers.df_add_new_gfp_columns(new_accuracy_table)

new_accuracy_table.set_index(['lab_id', 'strain_name', 'plate_id', 'well'], inplace=True)

In [None]:
processed = set()

newly_done = set()
failed = set()
with warnings.catch_warnings():
    warnings.simplefilter("ignore", pd.errors.DtypeWarning)
    warnings.simplefilter("ignore", pd.core.common.SettingWithCopyWarning)
    warnings.simplefilter("ignore", FutureWarning)

    for x in sorted(accuracy_table['lab_id'].unique()):
        if x in processed:
            print(f"{x} already processed.")
        else:
            print(x)
            try:
                filename: str = os.path.join(DATA_DIR, x + ".csv")
                assert os.path.exists(filename)
                df = collect_a_datafile(filename)
                new_df = data_helpers.compute_gated_scores(df)
                gated_rows = np.sum(~(new_df['gated_count'].isna().to_numpy()))
                gated_live_rows = np.sum(~(new_df['gated_live_count'].isna().to_numpy()))
                print(f'This file has {gated_rows} replicates with gated counts and {gated_live_rows} with gated live counts')
                new_accuracy_table = new_accuracy_table.combine_first(new_df)
                print('After combining with the previous data sets:')
                gated_rows = np.sum(~(new_accuracy_table['gated_count'].isna().to_numpy()))
                gated_live_rows = np.sum(~(new_accuracy_table['gated_live_count'].isna().to_numpy()))
                print(f'We have a total of {gated_rows} replicates with gated counts and {gated_live_rows} with gated live counts.')
                new_accuracy_table.to_csv("new_accuracy_table.csv")
                newly_done.add(x)
                processed.add(x)
            except Exception as e:
                print(f"Failed to process {x} because of error:\n\t{e}")
                processed.add(x)
                failed.add(x)
 

r1c5va879uaex_r1c639xp952g4
This file has 89 replicates with gated counts and 87 with gated live counts
After combining with the previous data sets:
We have a total of 89 replicates with gated counts and 87 with gated live counts.
r1c5va9wyf3mx_r1c63bjzu5ruy
This file has 89 replicates with gated counts and 89 with gated live counts
After combining with the previous data sets:
We have a total of 178 replicates with gated counts and 176 with gated live counts.
r1c5vab4q2yu9_r1c63c2e7z9hj
This file has 89 replicates with gated counts and 50 with gated live counts
After combining with the previous data sets:
We have a total of 267 replicates with gated counts and 226 with gated live counts.
r1c5vac658fxn_r1c66qw595ydy
This file has 87 replicates with gated counts and 87 with gated live counts
After combining with the previous data sets:
We have a total of 354 replicates with gated counts and 313 with gated live counts.
r1c5vad8u7ve2_r1c66q7cru28u
This file has 88 replicates with gated cou

In [80]:
unprocessed =  set(accuracy_table['lab_id'].unique()) - processed
unprocessed

set()

In [77]:
print(new_df.columns)
print(new_accuracy_table.combine_first(new_df).columns)

Index(['mean_log_gfp_gated', 'std_log_gfp_gated', 'gated_count',
       'mean_log_gfp_gated_live', 'std_log_gfp_gated_live',
       'gated_live_count'],
      dtype='object')
Index(['count', 'count_live', 'experiment_id', 'filename', 'gate',
       'gated_count', 'gated_live_count', 'growth_media_1', 'growth_media_2',
       'id', 'inc_temp', 'inc_time_1', 'inc_time_2', 'index', 'input', 'lab',
       'mean_correct_classifier', 'mean_correct_classifier_live',
       'mean_correct_high_classifier', 'mean_correct_high_classifier_live',
       'mean_correct_high_threshold', 'mean_correct_high_threshold_live',
       'mean_correct_low_classifier', 'mean_correct_low_classifier_live',
       'mean_correct_low_threshold', 'mean_correct_low_threshold_live',
       'mean_correct_threshold', 'mean_correct_threshold_live', 'mean_log_gfp',
       'mean_log_gfp_gated', 'mean_log_gfp_gated_live', 'mean_log_gfp_live',
       'media', 'od', 'od_cutoff', 'output', 'plan', 'replicate',
       'source_co

In [78]:
np.sum(~(new_accuracy_table.combine_first(new_df)['gated_live_count'].isna().to_numpy())), new_accuracy_table.shape[0]

(5344, 8726)

In [79]:
np.sum(~(new_accuracy_table.combine_first(new_df)['gated_count'].isna().to_numpy())), new_accuracy_table.shape[0]

(7351, 8726)

In [39]:
no_live_count = df[df['live'].isna()]
no_live_count.shape[0], df.shape[0]

(43919, 2015602)

In [47]:
no_live_count.reset_index(level='strain_name')['strain_name'].unique()

array(['WT-Live-Control', 'NOR-00-Control',
       'https://hub.sd2e.org/user/sd2e/design/UWBF_7299/1'], dtype=object)

In [51]:
no_live_count.xs('https://hub.sd2e.org/user/sd2e/design/UWBF_7299/1', level='strain_name').groupby(['plate_id', 'well']).groups

{(0, 'H11'): [('r1c5va879uaex_r1c639xp952g4', 'H11', 30, 16, 0.0003, 'r1c5va879uaex_r1c639xp952g4', 0, 'H11', 90, 27847), ('r1c5va879uaex_r1c639xp952g4', 'H11', 30, 16, 0.0003, 'r1c5va879uaex_r1c639xp952g4', 0, 'H11', 90, 27848), ('r1c5va879uaex_r1c639xp952g4', 'H11', 30, 16, 0.0003, 'r1c5va879uaex_r1c639xp952g4', 0, 'H11', 90, 27849), ('r1c5va879uaex_r1c639xp952g4', 'H11', 30, 16, 0.0003, 'r1c5va879uaex_r1c639xp952g4', 0, 'H11', 90, 27850), ('r1c5va879uaex_r1c639xp952g4', 'H11', 30, 16, 0.0003, 'r1c5va879uaex_r1c639xp952g4', 0, 'H11', 90, 27851), ('r1c5va879uaex_r1c639xp952g4', 'H11', 30, 16, 0.0003, 'r1c5va879uaex_r1c639xp952g4', 0, 'H11', 90, 27852), ('r1c5va879uaex_r1c639xp952g4', 'H11', 30, 16, 0.0003, 'r1c5va879uaex_r1c639xp952g4', 0, 'H11', 90, 27853), ('r1c5va879uaex_r1c639xp952g4', 'H11', 30, 16, 0.0003, 'r1c5va879uaex_r1c639xp952g4', 0, 'H11', 90, 27854), ('r1c5va879uaex_r1c639xp952g4', 'H11', 30, 16, 0.0003, 'r1c5va879uaex_r1c639xp952g4', 0, 'H11', 90, 27855), ('r1c5va879uae

In [65]:
no_live_count.xs('https://hub.sd2e.org/user/sd2e/design/UWBF_7299/1', level='strain_name').reset_index(level='plate_id').reset_index(level=1)[['plate_id','well']].drop_duplicates()


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,Unnamed: 6_level_0,Unnamed: 7_level_0,plate_id,well
lab_id,inc_temp_degrees,inc_time_2_hrs,od,lab_id,well,replicate,event,Unnamed: 8_level_1,Unnamed: 9_level_1
r1c5va879uaex_r1c639xp952g4,30,16,0.0003,r1c5va879uaex_r1c639xp952g4,H11,90,27847,0,H11


In [33]:
new_df.columns

Index(['mean_log_gfp_gated', 'std_log_gfp_gated', 'gated_count',
       'mean_log_gfp_gated_live', 'std_log_gfp_gated_live',
       'gated_live_count'],
      dtype='object')

In [48]:
live_count = df[~df['live'].isna()]
print(live_count.shape[0], df.shape[0])
live_count.reset_index(level='strain_name')['strain_name'].unique()

1971683 2015602


array(['https://hub.sd2e.org/user/sd2e/design/UWBF_7299/1',
       'https://hub.sd2e.org/user/sd2e/design/UWBF_6389/1',
       'https://hub.sd2e.org/user/sd2e/design/UWBF_8231/1',
       'https://hub.sd2e.org/user/sd2e/design/UWBF_7300/1',
       'https://hub.sd2e.org/user/sd2e/design/UWBF_7373/1',
       'https://hub.sd2e.org/user/sd2e/design/UWBF_6390/1',
       'https://hub.sd2e.org/user/sd2e/design/UWBF_8544/1',
       'https://hub.sd2e.org/user/sd2e/design/UWBF_6391/1',
       'https://hub.sd2e.org/user/sd2e/design/UWBF_16968/1',
       'https://hub.sd2e.org/user/sd2e/design/UWBF_7377/1',
       'https://hub.sd2e.org/user/sd2e/design/UWBF_16967/1',
       'https://hub.sd2e.org/user/sd2e/design/UWBF_8542/1',
       'https://hub.sd2e.org/user/sd2e/design/UWBF_6388/1',
       'https://hub.sd2e.org/user/sd2e/design/UWBF_7375/1',
       'https://hub.sd2e.org/user/sd2e/design/UWBF_7374/1',
       'https://hub.sd2e.org/user/sd2e/design/UWBF_8545/1',
       'https://hub.sd2e.org/user/sd2e

In [34]:
new_df.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,mean_log_gfp_gated,std_log_gfp_gated,gated_count,mean_log_gfp_gated_live,std_log_gfp_gated_live,gated_live_count
lab_id,strain_name,plate_id,well,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
r1c5va879uaex_r1c639xp952g4,NOR-00-Control,0,C12,2.872624,0.286968,25743,,,
r1c5va879uaex_r1c639xp952g4,WT-Live-Control,0,A12,1.584831,0.469159,16591,,,
r1c5va879uaex_r1c639xp952g4,https://hub.sd2e.org/user/sd2e/design/UWBF_16967/1,0,B03,2.054533,0.39589,24715,2.055798,0.395018,21777.0
r1c5va879uaex_r1c639xp952g4,https://hub.sd2e.org/user/sd2e/design/UWBF_16967/1,0,D03,1.979912,0.422622,23737,1.980302,0.42167,21954.0
r1c5va879uaex_r1c639xp952g4,https://hub.sd2e.org/user/sd2e/design/UWBF_16968/1,0,A10,2.561888,0.327265,25483,2.561358,0.327281,24836.0


In [33]:
               
print("Have processed the following additional data sets:\n\{")
for x in sorted(newly_done):
    print(f'"{x}",')
print("\}")

print("The following data sets failed to process:\n\{")
for x in sorted(failed):
    print(f'"{x}",')
print("\}")

r1c5va879uaex_r1c639xp952g4 already processed.
r1c5va9wyf3mx_r1c63bjzu5ruy already processed.
r1c5vab4q2yu9_r1c63c2e7z9hj already processed.
r1c5vac658fxn_r1c66qw595ydy already processed.
r1c5vad8u7ve2_r1c66q7cru28u already processed.
r1c5vaeb8vbt9_r1c66mfpj7guh already processed.
r1c7cppfr7yp6_r1c7jnv3pkbsj already processed.
r1c7cpqp2k6cw_r1c7jn9syrr8s already processed.
r1c7cprv7fe49_r1c7jmje3ebhc already processed.
r1c7cpt3djxuj_r1c7fex29q6t2 already processed.
r1c7cpu9nj2bu_r1c7fg23qkzph already processed.
r1c7cpvfzqprk_r1c7fbvba55db already processed.
r1c84xvquwxth_r1c8ek7vcmdxh already processed.
r1c84xwx57y95_r1c8ejz8jfg9z already processed.
r1c84xy5frkf3_r1c8ejr9kezva already processed.
r1c84xzc5dbuv_r1c8axrywctwb already processed.
r1c84y2j7n7bu_r1c8aw59wfxy8 already processed.
r1c84y3r9yqxb_r1c8auwvzbgt3 already processed.
r1c8xx6qe9eea_r1c93enbf48t7 already processed.
r1c8xx7we38vw_r1c93cukfcxqp already processed.
r1c8xx94j94u7_r1c93ajvdzxz5 already processed.
r1c8yx25rrtag

In [34]:
FAILED = ["r1d6r59sste9m2_r1d8gf8cczkrz9", "r1d9xx8rjarjuw_r1da5tgwjasvw3", "r1dag7xpmgar9k_r1dakfqtk7hpzb"]

In [35]:
FAILED

['r1d6r59sste9m2_r1d8gf8cczkrz9',
 'r1d9xx8rjarjuw_r1da5tgwjasvw3',
 'r1dag7xpmgar9k_r1dakfqtk7hpzb']