In [1]:
import pandas as pd
pd.set_option('display.max_columns', None)
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from tqdm import tqdm

#from datetime import timedelta

In [2]:
constructs = pd.read_csv('datasets/constructs.csv')
constructs

Unnamed: 0,id,short_name,full_name
0,A1,e11x32M86,3K3-P11*-gfp-B15-P(ara)-sECF11*-SP1-P(Cym)-32-...
1,A2,e11x30STNpuSspS2,3K3-P11*-gfp-B15-P(ara)-sECF11*-SP2-P(Cym)-30-...
2,A3,e11x32NpuSspS2,3K3-P11*-gfp-B15-P(ara)-sECF11*-SP2-P(Cym)-32-...
3,A4,e11x30NpuSspS1,3K3-P11*-gfp-B15-P(ara)-sECF11*-SP2-P(Cym)-30-...
4,A5,e11x32gp411,3K3-P11*-gfp-B15-P(ara)-sECF11*-SP3-P(Cym)-32-...
...,...,...,...
360,3K-E422,gfp_med,
361,E422,gfp_low,
362,C+,positive_control,
363,C-,negative_control,


In [3]:
def parse_minutes(x):
    
    spl = x.split(' ')
    hours = int(spl[0]) * 60
    mins = int(spl[2]) if spl[2] != '' else 0
    return hours + mins

def read_plate_data(df):
    
    df.columns = df.iloc[0]
    df.drop(df.index[0], inplace=True)
    df.dropna(inplace=True)
    return df.reset_index(drop=True)

def transpose_data(df):
    
    df.set_index('short_name', inplace=True)
    df = df.transpose().reset_index()
    df['time'] = df['index'].apply(parse_minutes)
    df = df.set_index('time')
    df = df.drop('index', axis=1)
    return df

def generate_data(df, plate, name, h=20, m=0):
    
    start_idx = 3
    mid_idx = h * 3 + start_idx + 1 - m
    end_idx = h * 3 + mid_idx + 1 - m

    fluo = (df.iloc[:, start_idx:mid_idx]).astype(float)
    od = (df.iloc[:, mid_idx:end_idx]).astype(float)
    fluo_half = (df.iloc[:, end_idx:]).astype(float)
    
    fluo = pd.concat([name, fluo], axis=1)
    od = pd.concat([name, od], axis=1)
    fluo_half = pd.concat([name, fluo_half], axis=1)
    
    fluo = transpose_data(fluo)
    od = transpose_data(od)
    fluo_half = transpose_data(fluo_half)
    
    return fluo, od, fluo_half

### Marionette Strain

In [4]:
def read_plate_map(data):
    
    con = data.iloc[:,:2].reset_index()
    mar = data.iloc[:,2:].reset_index().melt(id_vars=['group'])
    mar['variable'] = mar['variable'].apply(lambda x: "{:02d}".format(int(x)))
    mar['well'] = mar['group'] + mar['variable']
    mar.drop(['group', 'variable'], axis=1, inplace=True)
    mar.rename(columns={'value':'id'}, inplace=True)
    return con, mar

con, mar = read_plate_map(pd.read_csv('datasets/mario_map.csv', index_col=['group']))

In [5]:
def read_plate_data(df):
    
    df.columns = df.iloc[0]
    df.drop(df.index[0], inplace=True)
    return df.reset_index(drop=True)

data = read_plate_data(pd.read_csv('datasets/marrionette.csv'))

In [6]:
merged = pd.merge(data['well'], mar, on='well', how='left')
merged = pd.merge(merged, constructs, on='id', how='left')
name = merged['short_name'].dropna().reset_index(drop=True) #just to make sure there is no null and indexing is correct

In [7]:
mar_fluo, mar_od, mar_fluo_half = generate_data(data, mar, name, 24)
gates = name.unique().tolist()

In [9]:
def reformat_df(data):
    
    top10_wrapper = pd.DataFrame()
    mario_wrapper = pd.DataFrame()

    for gate in gates:

        top10 = data[gate].iloc[:,:4]
        top10.columns = [gate + '_00', gate + '_10', gate + '_01', gate + '_11']
        top10_wrapper = pd.concat([top10_wrapper, top10], axis=1)

        mario = data[gate].iloc[:,4:]
        mario.columns = [gate + '_00', gate + '_10', gate + '_01', gate + '_11']
        mario_wrapper = pd.concat([mario_wrapper, mario], axis=1)
        
    return top10_wrapper, mario_wrapper
    
top10_fluo, mario_fluo = reformat_df(mar_fluo_half)
#top10_od, mario_od = reformat_df(mar_od)

mario_fluo.reset_index().to_csv('datasets/marionette_fluo_half.csv', index=False)
#mario_od.reset_index().to_csv('datasets/marionette_od.csv', index=False)

### ALL GATES

### Data from plate reader 1

In [None]:
def cleanse_plate(plate):
    plate = plate.melt(id_vars=['Unnamed: 0'])
    plate['value'] = plate['value'].apply(lambda x: x.split('.')[0])
    plate['variable'] = plate['variable'].apply(lambda x: "{:02d}".format(int(x)))
    plate['variable'] = plate['Unnamed: 0'] + plate['variable']
    plate.drop('Unnamed: 0', axis=1, inplace=True)
    return plate.reset_index(drop=True).rename(columns={'variable': 'Well', 'value': 'code_name'})

plate1 = cleanse_plate(pd.read_csv('datasets/plate1_map.csv'))
plate1

In [None]:
df11 = read_plate_data('and_gate_11') # up to 20h
print(df11.shape)
df12 = read_plate_data('and_gate_12') # up to 16h
print(df12.shape)
df13 = read_plate_data('and_gate_13') # up to 16h
print(df13.shape)

In [None]:
merged = pd.merge(df11['Well'], plate1, on='Well', how='left')
merged = pd.merge(merged, naming_map, on='code_name', how='left')
name1 = merged['short_name']

In [None]:
fluo11, od11, bulk_fluo11, fluo_half11, bulk_fluo_half11 = generate_data(df11, plate1, name1, 20)
fluo12, od12, bulk_fluo12, fluo_half12, bulk_fluo_half12 = generate_data(df12, plate1, name1, 16)
fluo13, od13, bulk_fluo13, fluo_half13, bulk_fluo_half13 = generate_data(df13, plate1, name1, 16)

In [None]:
def plot_all(data, num_row, num_col):
    f, axs = plt.subplots(num_row, num_col, sharex=True, sharey=False, figsize=(14, num_row*2))
    axr = axs.ravel()
    for i, ax in tqdm(enumerate(axr)):
        if i < data[0].shape[1]:
            for d in data:
                ax.plot(d.index/60, d.iloc[:, i])
            ax.set_title(data[0].columns[i])
            ax.set_xlabel('Time (h)')
        else:
            ax.set_visible(False)
    plt.tight_layout()
    sns.despine()
    
#plot bulk fluorescence data
plot_all([bulk_fluo_half11, bulk_fluo_half12, bulk_fluo_half13], 20, 5)

In [None]:
plot_all([od11, od12, od13], 20, 5)

In [None]:
bulk_fluo1 = (bulk_fluo11 + bulk_fluo12 + bulk_fluo13) / 3
fluo1 = (fluo11 + fluo12 + fluo13) / 3
od1 = (od11 + od12 + od13) / 3
fluo_half1 = (fluo_half11 + fluo_half12 + fluo_half13) / 3
bulk_fluo_half1 = (bulk_fluo_half11 + bulk_fluo_half12 + bulk_fluo_half13) / 3

In [None]:
bulk_fluo1.dropna().to_csv('datasets/bulk_fluo_plate_1_triplicate.csv')
fluo1.dropna().to_csv('datasets/fluo_plate_1_triplicate.csv')
od1.dropna().to_csv('datasets/od_plate_1_triplicate.csv')
bulk_fluo_half1.dropna().to_csv('datasets/bulk_fluo_half_plate_1_triplicate.csv')
fluo_half1.dropna().to_csv('datasets/fluo_half_plate_1_triplicate.csv')

In [None]:
bulk_fluo11.dropna().to_csv('datasets/bulk_fluo_plate_1_single.csv')
fluo11.dropna().to_csv('datasets/fluo_plate_1_single.csv')
od11.dropna().to_csv('datasets/od_plate_1_single.csv')
bulk_fluo_half11.dropna().to_csv('datasets/bulk_fluo_half_plate_1_single.csv')
fluo_half11.dropna().to_csv('datasets/fluo_half_plate_1_single.csv')

### Data from plate reader 2

In [None]:
plate2 = cleanse_plate(pd.read_csv('datasets/plate2_map.csv'))
df21 = read_plate_data('and_gate_21') # up to 20h
print(df21.shape)
df22 = read_plate_data('and_gate_22') # up to 16h
print(df22.shape)
df23 = read_plate_data('and_gate_23') # up to 16h
print(df23.shape)

In [None]:
merged = pd.merge(df21['Well'], plate2, on='Well', how='left')
merged = pd.merge(merged, naming_map, on='code_name', how='left')
name2 = merged['short_name']

In [None]:
fluo21, od21, bulk_fluo21, fluo_half21, bulk_fluo_half21 = generate_data(df21, plate2, name2, 21, 2)
fluo22, od22, bulk_fluo22, fluo_half22, bulk_fluo_half22 = generate_data(df22, plate2, name2, 22, 0)
fluo23, od23, bulk_fluo23, fluo_half23, bulk_fluo_half23 = generate_data(df23, plate2, name2, 22, 1)

In [None]:
plot_all([bulk_fluo_half21, bulk_fluo_half22, bulk_fluo_half23], 20, 5)

In [None]:
plot_all([od21, od22, od23], 20, 5)

In [None]:
bulk_fluo2 = (bulk_fluo21 + bulk_fluo22 + bulk_fluo23) / 3
fluo2 = (fluo21 + fluo22 + fluo23) / 3
od2 = (od21 + od22 + od23) / 3
fluo_half2 = (fluo_half21 + fluo_half22 + fluo_half23) / 3
bulk_fluo_half2 = (bulk_fluo_half21 + bulk_fluo_half22 + bulk_fluo_half23) / 3

In [None]:
bulk_fluo2.to_csv('datasets/bulk_fluo_plate_2_triplicate.csv')
fluo2.to_csv('datasets/fluo_plate_2_triplicate.csv')
fluo_half2.to_csv('datasets/fluo_half_plate_2_triplicate.csv')
bulk_fluo_half2.to_csv('datasets/bulk_fluo_half_plate_2_triplicate.csv')
od2.to_csv('datasets/od_plate_2_triplicate.csv')

In [None]:
bulk_fluo21.to_csv('datasets/bulk_fluo_plate_2_single.csv')
fluo21.to_csv('datasets/fluo_plate_2_single.csv')
fluo_half21.to_csv('datasets/fluo_half_plate_2_single.csv')
bulk_fluo_half21.to_csv('datasets/bulk_fluo_half_plate_2_single.csv')
od21.to_csv('datasets/od_plate_2_single.csv')

### Induction Matrix

In [None]:
induction = read_plate_data('induction') # up to 20h
print(induction.shape)

In [None]:
induction_plate = {
    'A01': 'A18', 'A02': 'A18', 'A03': 'A18', 
    'B01': 'A18', 'B02': 'A18', 'B03': 'A18', 
    'C01': 'A18', 'C02': 'A18', 'C03': 'A18', 
    'D01': 'A195', 'D02': 'A195', 'D03': 'A195', 
    'E01': 'A195', 'E02': 'A195', 'E03': 'A195', 
    'F01': 'A195', 'F02': 'A195', 'F03': 'A195', 
    'A04': 'A29', 'A05': 'A29', 'A06': 'A29', 
    'B04': 'A29', 'B05': 'A29', 'B06': 'A29', 
    'C04': 'A29', 'C05': 'A29', 'C06': 'A29', 
    'D04': 'A259', 'D05': 'A259', 'D06': 'A259', 
    'E04': 'A259', 'E05': 'A259', 'E06': 'A259', 
    'F04': 'A259', 'F05': 'A259', 'F06': 'A259', 
    'A07': 'A76', 'A08': 'A76', 'A09': 'A76', 
    'B07': 'A76', 'B08': 'A76', 'B09': 'A76', 
    'C07': 'A76', 'C08': 'A76', 'C09': 'A76', 
    'D07': 'A267', 'D08': 'A267', 'D09': 'A267', 
    'E07': 'A267', 'E08': 'A267', 'E09': 'A267', 
    'F07': 'A267', 'F08': 'A267', 'F09': 'A267', 
    'A10': 'A109', 'A11': 'A109', 'A12': 'A109', 
    'B10': 'A109', 'B11': 'A109', 'B12': 'A109', 
    'C10': 'A109', 'C11': 'A109', 'C12': 'A109', 
    'D10': 'A294', 'D11': 'A294', 'D12': 'A294', 
    'E10': 'A294', 'E11': 'A294', 'E12': 'A294', 
    'F10': 'A294', 'F11': 'A294', 'F12': 'A294', 
    'G01': 'A323', 'G02': 'A323', 'G03': 'A323', 
    'G04': 'A323', 'G05': 'A323', 'G06': 'A323', 
    'H01': 'A323', 'H02': 'A323', 'H03': 'A323', 
}
induction_row = {
    'A': 0, 'B': 1, 'C': 2, 'D': 0, 'E': 1, 'F': 2,
    'G': 0, 'H': 1 #column g needs to be manually adjusted later
}
induction_col = {
    '01': 0, '02': 1, '03': 2, '04': 0, '05': 1, '06': 2, 
    '07': 0, '08': 1, '09': 2, '10': 0, '11': 1, '12': 2 
}

In [None]:
index = induction.iloc[:,:3]
index['code_name'] = index['Well'].map(induction_plate)
col_idx = index['Well'].str[1:]
row_idx = index['Well'].str[:1]
index['ind1_lvl'] = row_idx.map(induction_row)
index['ind2_lvl'] = col_idx.map(induction_col)
index.loc[index['Well'].isin(['G04', 'G05', 'G06']), 'ind1_lvl'] = 2
#index = index.dropna()
index.loc[index['code_name'].isnull(), 'code_name'] = 'control'
name = pd.merge(index[['code_name', 'ind1_lvl', 'ind2_lvl']], naming_map[['code_name', 'short_name']], on='code_name', how='left')
name

In [None]:
index[index['code_name']=='A323']

In [None]:
def parse_minutes(x):
    spl = x.split(' ')
    hours = int(spl[0]) * 60
    mins = int(spl[2]) if spl[2] != '' else 0
    return hours + mins

def transpose_data(df_raw):
    df = df_raw.copy()
    df.set_index('short_name', inplace=True)
    #df = df.transpose().reset_index()
    #df['time'] = df['index'].apply(parse_minutes)
    #df = df.set_index('time')
    #df = df.drop('index', axis=1)
    return df

def generate_data_induce(df, h=24):
    
    start_idx = 3
    mid_idx = h * 3 + start_idx
    fluo = (df.iloc[:, start_idx:mid_idx]).astype(float)
    od = (df.iloc[:, mid_idx:]).astype(float)
    bulk_fluo = fluo * od
    
    fluo = pd.concat([name, fluo], axis=1)
    od = pd.concat([name, od], axis=1)
    bulk_fluo = pd.concat([name, bulk_fluo], axis=1)
    
    #bulk_fluo = transpose_data(bulk_fluo)
    #fluo = transpose_data(fluo)
    #od = transpose_data(od)
    
    return fluo, od, bulk_fluo

fluo, od, bulk_fluo = generate_data_induce(induction)
bulk_fluo

In [None]:
bulk_fluo[bulk_fluo['short_name']=='e11x32STPhoRadA']

In [None]:
bulk_fluo.to_csv('datasets/bulk_fluo_induction.csv', index=False)
fluo.to_csv('datasets/fluo_induction.csv', index=False)
od.to_csv('datasets/od_induction.csv', index=False)

### XOR Gate Top 10

In [None]:
def read_plate_map(df):
    data = pd.read_csv('datasets/xor_map.csv', index_col=['Group'])
    con = data.iloc[:,:2].reset_index()
    xor = data.iloc[:,2:]
    xor = xor.reset_index().melt(id_vars=['Group'])
    xor['variable'] = xor['variable'].apply(lambda x: "{:02d}".format(int(x)))
    xor['Well'] = xor['Group'] + xor['variable']
    xor.drop(['Group', 'variable'], axis=1, inplace=True)
    xor.rename(columns={'value':'code_name'}, inplace=True)
    return con, xor

con, xor = read_plate_map(pd.read_csv('datasets/xor_map.csv'))

In [None]:
def read_plate_data(df):
    df.columns = df.iloc[0]
    df.drop(df.index[0], inplace=True)
    #df.dropna(inplace=True)
    return df.reset_index(drop=True)

data = read_plate_data(pd.read_csv('datasets/xor_gate.csv'))
data.head()

In [None]:
merged = pd.merge(data['Well'], xor, on='Well', how='left')
#merged = pd.merge(merged, naming_map, on='code_name', how='left')
merged.rename(columns={'code_name': 'short_name'}, inplace=True)
name = merged['short_name'].dropna().reset_index(drop=True)

In [None]:
fluox, odx, fluo_halfx = generate_data(data, xor, name, 24)

In [None]:
cols = ['ECF20/33', 'ECF11/15']
fluo_xor = pd.DataFrame()
od_xor = pd.DataFrame()
for c in cols:
    temp = pd.concat([fluox[c].iloc[:,i] for i in range(0, 16, 4)], axis=1)
    temp.columns = [c + '_' + "{:02b}".format(int(i)) for i in range(4)]
    fluo_xor = pd.concat([fluo_xor, temp], axis=1)
    
    temp = pd.concat([odx[c].iloc[:,i] for i in range(0, 16, 4)], axis=1)
    temp.columns = [c + '_' + "{:02b}".format(int(i)) for i in range(4)]
    od_xor = pd.concat([od_xor, temp], axis=1)

In [None]:
fluo_xor.to_csv('datasets/bulk_fluo_xor.csv')
od_xor.to_csv('datasets/od_xor.csv')

### First-round Data

In [None]:
df = pd.read_csv('datasets/raw.csv')
df.columns = df.iloc[0]
df.drop(df.index[0], inplace=True)
df.dropna(inplace=True)
print(df.shape)
df.head()

In [None]:
map_ecf = {'Sample X1': 'e15',
'Sample X2': 'e22',
'Sample X3': 'e32',
'Sample X4': 'e33',
'Sample X5': 'e34',
'Sample X6': 'e41',
'Sample X7': 'e42',
'Sample X8': 'e15',
'Sample X9': 'e22',
'Sample X10': 'e38',
'Sample X11': 'e16',
'Sample X12': 'e33',
'Sample X13': 'e15',
'Sample X14': 'e16',
'Sample X15': 'e17',
'Sample X16': 'e20',
'Sample X17': 'e22',
'Sample X18': 'e26',
'Sample X19': 'e32',
'Sample X20': 'e33',
'Sample X21': 'e34'}
map_int = {'Sample X1': 'SspGyrB',
'Sample X2': 'SspGyrB',
'Sample X3': 'SspGyrB',
'Sample X4': 'SspGyrB',
'Sample X5': 'SspGyrB',
'Sample X6': 'SspGyrB',
'Sample X7': 'SspGyrB',
'Sample X8': 'TerThyXS2',
'Sample X9': 'TerThyXS2',
'Sample X10': 'TerThyXS2',
'Sample X11': 'TerThyXS1',
'Sample X12': 'TerThyXS1',
'Sample X13': 'STPhoRadA',
'Sample X14': 'STPhoRadA',
'Sample X15': 'STPhoRadA',
'Sample X16': 'STPhoRadA',
'Sample X17': 'STPhoRadA',
'Sample X18': 'STPhoRadA',
'Sample X19': 'STPhoRadA',
'Sample X20': 'STPhoRadA',
'Sample X21': 'STPhoRadA'}

In [None]:
df['ECF'] = df['Content'].map(map_ecf)
df['Intein'] = df['Content'].map(map_int)
df.loc[df['Group'].isin(['A', 'E']), 'Ara'] = 0
df.loc[df['Group'].isin(['A', 'E']), 'Cuma'] = 0
df.loc[df['Group'].isin(['B', 'F']), 'Ara'] = 1
df.loc[df['Group'].isin(['B', 'F']), 'Cuma'] = 0
df.loc[df['Group'].isin(['C', 'G']), 'Ara'] = 0
df.loc[df['Group'].isin(['C', 'G']), 'Cuma'] = 1
df.loc[df['Group'].isin(['D', 'H']), 'Ara'] = 1
df.loc[df['Group'].isin(['D', 'H']), 'Cuma'] = 1
df = pd.concat([df[['ECF', 'Intein', 'Ara', 'Cuma']],
                 df.drop(['ECF', 'Intein', 'Ara', 'Cuma', 'Well', 'Content', 'Group'], axis=1)],
                 axis=1)
df.head()

In [None]:
cols = list(range(0,77))
fluo = df.iloc[:, cols]
#fluo.to_csv('datasets/fluoOD-all.csv', index=False)
fluo.head()

In [None]:
cols = list(range(0,4)) + list(range(77,150))
od = df.iloc[:, cols]
#od.to_csv('datasets/OD-all.csv', index=False)
od.head()

In [None]:
def parse_minutes(x):
    spl = x.split(' ')
    hours = int(spl[0]) * 60
    mins = int(spl[2]) if spl[2] != '' else 0
    return hours + mins

fluo11 = fluo[(fluo['Ara']==1) & (fluo['Cuma']==0)]
fluo11['index'] = fluo11['ECF'] + fluo11['Intein']
fluo11.set_index('index', inplace=True)
fluo11.drop(['ECF', 'Intein', 'Ara', 'Cuma'], axis=1, inplace=True)
fluo11 = fluo11.transpose().reset_index()

fluo11['time'] = fluo11[0].apply(parse_minutes)
fluo11 = fluo11.set_index('time')
fluo11 = fluo11.drop(0, axis=1)
fluo11

In [None]:
od11 = od[(od['Ara']==1) & (od['Cuma']==0)]
od11['index'] = od11['ECF'] + od11['Intein']
od11.set_index('index', inplace=True)
od11.drop(['ECF', 'Intein', 'Ara', 'Cuma'], axis=1, inplace=True)
od11 = od11.transpose().reset_index()

od11['time'] = od11[0].apply(parse_minutes)
od11 = od11.set_index('time')
od11 = od11.drop(0, axis=1)
od11

In [None]:
for col in fluo11.columns:
    try:
        fluo11[col] = fluo11[col].astype(float)
    except:
        print(col)
        continue

In [None]:
for col in od11.columns:
    od11[col] = od11[col].astype(float)

In [None]:
fluo11.to_csv('datasets/fluo-10.csv')
od11.to_csv('datasets/od-10.csv')
(fluo11 * od11).to_csv('datasets/bulk-fluo-10.csv')