In [1]:
import pandas as pd
pd.set_option('display.max_columns', None)
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from tqdm import tqdm

In [2]:
def parse_minutes(x):
    
    spl = x.split('.')[0].split(' ')
    hours = int(spl[0]) * 60
    mins = int(spl[2]) if spl[2] != '' else 0
    return hours + mins

def transpose_data(df, col):
    
    df.set_index(col, inplace=True)
    df = df.transpose().reset_index()
    df['time'] = df['index'].apply(parse_minutes)
    df = df.set_index('time').drop('index', axis=1)
    return df

def generate_data(df, name, h=24, m=0, num_data=2, start_idx=3, col='short_name'):
    
    datas = []
    idxs = [start_idx]
    for i in range(1, num_data+1):
        idxs.append(h * 3 + idxs[i-1] + 1 + int(m/20))
        data = (df.iloc[:, idxs[i-1]:idxs[i]]).astype(float)
        data = pd.concat([name, data], axis=1)
        data = transpose_data(data, col)
        datas.append(data)
    return datas

### AND Gates Screening

In [5]:
def read_map(plate_map):
    
    plate_map = plate_map.melt(id_vars=['group'])
    plate_map['variable'] = plate_map['variable'].astype(int)
    plate_map['Well'] = plate_map['group'] + plate_map['variable'].apply(lambda x: '{:02d}'.format(x))
    plate_map.loc[~plate_map['group'].isin(['H']), 'inducer'] = ((plate_map['variable'] - 1) / 6).astype(int)
    plate_map.loc[plate_map['group'].isin(['H']), 'inducer'] = 0
    return plate_map[['Well', 'value', 'inducer']]

def generate_metadata(well, plate_map):
    
    dictionary = pd.read_csv('datasets/dictionary.csv')
    sample_map = dict(zip(dictionary['id'], dictionary['short_name']))
    control_map = {
        'BK': 'blank-kan',
        'BA': 'blank-amp',
        '3K3-N': 'negative-control-kan',
        '4A3-N': 'negative-control-amp',
        '4A3-P': 'positive-control-amp'
    }
    sample_map.update(control_map)
    df = pd.merge(well, plate_map, on='Well', how='left').dropna(subset=['value']).reset_index(drop=True)
    df['short_name'] = df['value'].map(sample_map) + '_' + df['inducer'].astype(int).astype(str)
    return df.dropna()

In [9]:
folder = '024-pCympBAD-E16-41'
plate_map = read_map(pd.read_csv('datasets/experiment/{}/plate_map.csv'.format(folder)))
raw_data = pd.read_csv('datasets/experiment/{}/raw.csv'.format(folder), skiprows=[0])
metadata = generate_metadata(raw_data['Well'], plate_map)
data = raw_data[raw_data['Well'].isin(metadata['Well'])].reset_index(drop=True)
datas = generate_data(data, metadata['short_name'], h=23, m=20, num_data=4)

In [10]:
datas[0]

short_name,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,blank-kan_0,blank-kan_0,negative-control-kan_0,negative-control-kan_0,blank-amp_0,negative-control-amp_0,negative-control-amp_0,positive-control-amp_0,positive-control-amp_0
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
0,,,0.041,0.013,,0.034,0.041,0.036,0.019,,,,,,,,,
20,,,0.023,0.021,,0.022,0.034,0.029,0.029,,,,,,,,,
40,,,0.030,0.023,,0.020,0.040,0.030,0.028,,,,,,,,,
60,,,0.022,0.010,,0.038,0.054,0.047,0.044,,,,,,,,,
80,,,0.060,0.048,,0.058,0.072,0.067,0.062,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1320,,,1.031,1.332,,1.452,1.456,1.451,1.219,,,,,,,,,
1340,,,1.030,1.299,,1.420,1.447,1.460,1.205,,,,,,,,,
1360,,,1.025,1.310,,1.425,1.453,1.443,1.192,,,,,,,,,
1380,,,1.009,1.309,,1.424,1.448,1.430,1.188,,,,,,,,,


In [None]:
datas[0].to_csv('datasets/experiment/{}/ods.csv'.format(folder))
datas[1].to_csv('datasets/experiment/{}/fluos.csv'.format(folder))
datas[2].to_csv('datasets/experiment/{}/fluos-lower-1.csv'.format(folder))
datas[3].to_csv('datasets/experiment/{}/fluos-lower-2.csv'.format(folder))

### Self Experiment

#### Promoters characterization

In [11]:
def read_map(plate_map):
    
    plate_map = plate_map.melt(id_vars=['group'])
    plate_map['variable'] = plate_map['variable'].astype(int)
    plate_map['Well'] = plate_map['group'] + plate_map['variable'].apply(lambda x: '{:02d}'.format(x))
    #plate_map.loc[~plate_map['group'].isin(['H']), 'inducer'] = plate_map['variable'] - 1
    plate_map.loc[plate_map['group'].isin(['A', 'C', 'E']), 'inducer'] = (plate_map['variable'] - 1) + 12
    plate_map.loc[plate_map['group'].isin(['B', 'D', 'F']), 'inducer'] = plate_map['variable'] - 1
    plate_map.loc[plate_map['group'].isin(['H']), 'inducer'] = 0
    return plate_map[['Well', 'value', 'inducer']]

def read_map_13(plate_map):
    
    plate_map = plate_map.melt(id_vars=['group'])
    plate_map['variable'] = plate_map['variable'].astype(int)
    plate_map['Well'] = plate_map['group'] + plate_map['variable'].apply(lambda x: '{:02d}'.format(x))
    plate_map.loc[~plate_map['group'].isin(['G', 'H']), 'inducer'] = (plate_map['variable'] - 1) % 6
    plate_map.loc[plate_map['group'].isin(['G', 'H']), 'inducer'] = (plate_map['variable'] - 1) % 2
    return plate_map[['Well', 'value', 'inducer']]

def generate_metadata(well, plate_map):
    
    sample_map = {
        'P62': 'pBAD/Ara',
        'P64': 'pCin/OHC14',
        'P66': 'pCymRC/Cuma',
        'P68': 'pLuxB/AHL',
        'P69': 'pPhlF/DAPG',
        'P70': 'pSalTTC/Sal',
        'P72': 'pRhaB/Rha',
        'G720': 'e11x32STPhoRadA',
        'G721': 'e15x32NpuSspS2',
        'G722': 'e16x33NrdA2',
        'G723': 'e20x32gp411',
        'G724': 'e32x30SspGyrB',
        'G725': 'e34x30MjaKlbA',
        'G726': 'e38x32gp418',
        'G727': 'e41x32NrdJ1',
        'G728': 'e42x32STIMPDH1',
        'BK': 'Blank_Kan',
        'BA': 'Blank_Amp',
        '3K3-N': 'negative-control-kan',
        '4A3-N': 'negative-control-amp',
        '4A3-P': 'positive-control-amp'
    }
    df = pd.merge(well, plate_map, on='Well', how='left').dropna(subset=['value']).reset_index(drop=True)
    df['short_name'] = df['value'].map(sample_map) + '_' + df['inducer'].astype(int).astype(str)
    return df.dropna()

In [14]:
folder = '024-pCympBAD-E16-41'
plate_map = read_map_13(pd.read_csv('datasets/experiment/{}/plate_map.csv'.format(folder)))
raw_data = pd.read_csv('datasets/experiment/{}/raw.csv'.format(folder), skiprows=[0])
metadata = generate_metadata(raw_data['Well'], plate_map)
data = raw_data[raw_data['Well'].isin(metadata['Well'])].reset_index(drop=True)
datas = generate_data(data, metadata['short_name'], h=23, m=20, num_data=4)

In [15]:
datas[1]

short_name,e16x33NrdA2_0,e16x33NrdA2_1,e16x33NrdA2_2,e16x33NrdA2_3,e16x33NrdA2_4,e16x33NrdA2_5,e41x32NrdJ1_0,e41x32NrdJ1_1,e41x32NrdJ1_2,e41x32NrdJ1_3,e41x32NrdJ1_4,e41x32NrdJ1_5,e16x33NrdA2_0,e16x33NrdA2_1,e16x33NrdA2_2,e16x33NrdA2_3,e16x33NrdA2_4,e16x33NrdA2_5,e41x32NrdJ1_0,e41x32NrdJ1_1,e41x32NrdJ1_2,e41x32NrdJ1_3,e41x32NrdJ1_4,e41x32NrdJ1_5,e16x33NrdA2_0,e16x33NrdA2_1,e16x33NrdA2_2,e16x33NrdA2_3,e16x33NrdA2_4,e16x33NrdA2_5,e41x32NrdJ1_0,e41x32NrdJ1_1,e41x32NrdJ1_2,e41x32NrdJ1_3,e41x32NrdJ1_4,e41x32NrdJ1_5,e16x33NrdA2_0,e16x33NrdA2_1,e16x33NrdA2_2,e16x33NrdA2_3,e16x33NrdA2_4,e16x33NrdA2_5,e41x32NrdJ1_0,e41x32NrdJ1_1,e41x32NrdJ1_2,e41x32NrdJ1_3,e41x32NrdJ1_4,e41x32NrdJ1_5,e16x33NrdA2_0,e16x33NrdA2_1,e16x33NrdA2_2,e16x33NrdA2_3,e16x33NrdA2_4,e16x33NrdA2_5,e41x32NrdJ1_0,e41x32NrdJ1_1,e41x32NrdJ1_2,e41x32NrdJ1_3,e41x32NrdJ1_4,e41x32NrdJ1_5,e16x33NrdA2_0,e16x33NrdA2_1,e16x33NrdA2_2,e16x33NrdA2_3,e16x33NrdA2_4,e16x33NrdA2_5,e41x32NrdJ1_0,e41x32NrdJ1_1,e41x32NrdJ1_2,e41x32NrdJ1_3,e41x32NrdJ1_4,e41x32NrdJ1_5,Blank_Kan_0,Blank_Kan_1,negative-control-kan_0,negative-control-kan_1,Blank_Amp_1,negative-control-amp_0,negative-control-amp_1,positive-control-amp_0,positive-control-amp_1
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1
0,100.5,72.5,101.5,101.5,59.5,74.5,76.5,53.5,50.5,42.5,51.5,38.5,125.5,165.5,149.5,156.5,104.5,132.5,156.5,133.5,108.5,108.5,132.5,113.5,104.5,145.5,141.5,141.5,70.5,129.5,143.5,106.5,56.5,74.5,93.5,81.5,113.5,111.5,120.5,118.5,119.5,108.5,113.5,152.5,83.5,79.5,84.5,106.5,87.5,93.5,114.5,94.5,74.5,87.5,108.5,103.5,58.5,82.5,46.5,54.5,92.5,103.5,288.5,106.5,85.5,83.5,75.5,108.5,75.5,59.5,45.5,42.5,,,39.5,8.5,,45.5,31.5,1399.5,1058.5
20,88.5,58.5,78.5,78.5,60.5,68.5,47.5,56.5,42.5,27.5,65.5,39.5,114.5,147.5,112.5,121.5,112.5,113.5,111.5,114.5,95.5,89.5,86.5,117.5,114.5,111.5,102.5,106.5,102.5,113.5,57.5,79.5,32.5,71.5,68.5,39.5,125.5,114.5,114.5,97.5,110.5,121.5,108.5,114.5,69.5,88.5,84.5,98.5,83.5,78.5,109.5,91.5,73.5,86.5,92.5,79.5,48.5,62.5,33.5,64.5,108.5,97.5,327.5,123.5,77.5,109.5,73.5,59.5,75.5,71.5,60.5,33.5,,,36.5,37.5,,46.5,69.5,1048.5,1050.5
40,96.5,55.5,71.5,63.5,41.5,56.5,50.5,35.5,54.5,22.5,48.5,21.5,109.5,131.5,100.5,96.5,98.5,76.5,77.5,81.5,78.5,89.5,89.5,94.5,127.5,99.5,105.5,99.5,66.5,75.5,55.5,54.5,46.5,42.5,47.5,30.5,108.5,89.5,90.5,93.5,87.5,60.5,68.5,83.5,82.5,66.5,86.5,78.5,92.5,77.5,76.5,70.5,63.5,76.5,80.5,49.5,35.5,27.5,38.5,35.5,71.5,114.5,344.5,87.5,80.5,114.5,51.5,52.5,65.5,36.5,40.5,66.5,,,31.5,36.5,,40.5,59.5,1342.5,1346.5
60,89.5,62.5,58.5,58.5,61.5,62.5,46.5,38.5,57.5,53.5,42.5,41.5,99.5,150.5,104.5,83.5,84.5,77.5,93.5,94.5,75.5,80.5,88.5,101.5,101.5,102.5,82.5,70.5,70.5,74.5,74.5,46.5,34.5,50.5,37.5,36.5,116.5,93.5,85.5,77.5,60.5,77.5,78.5,68.5,70.5,74.5,74.5,93.5,109.5,87.5,95.5,54.5,53.5,67.5,68.5,63.5,75.5,73.5,62.5,47.5,103.5,112.5,329.5,101.5,84.5,95.5,53.5,33.5,35.5,44.5,29.5,43.5,,,37.5,36.5,,46.0,48.0,1770.0,1746.0
80,100.5,61.5,73.5,68.5,48.5,43.5,45.5,45.5,36.5,39.5,54.5,39.5,119.5,135.5,99.5,107.5,97.5,88.5,75.5,76.5,69.5,79.5,102.5,114.5,87.5,124.5,93.5,89.5,89.5,77.5,52.5,45.5,50.5,51.5,66.5,43.5,93.5,106.5,83.5,93.5,78.5,55.5,72.5,81.5,80.5,50.5,68.5,97.5,76.5,79.5,107.5,70.5,73.5,58.5,70.5,55.5,58.5,51.5,41.5,46.5,118.5,109.5,342.5,89.5,89.5,101.5,47.5,56.5,36.5,50.5,46.5,68.5,,,46.5,36.5,,77.5,82.5,2199.5,2235.5
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1320,4645.5,4233.5,4155.5,4274.5,4390.5,4403.5,4042.5,4092.5,4132.5,4397.5,4429.5,4637.5,4776.5,4412.5,4805.5,5649.5,7284.5,7157.5,4088.5,4112.5,4177.5,4303.5,4383.5,4586.5,4762.5,5837.5,6396.5,11895.5,19962.5,23241.5,4071.5,4072.5,4106.5,4237.5,4258.5,4495.5,4838.5,9044.5,16521.5,35611.5,45709.5,50036.5,4109.5,4112.5,4202.5,4382.5,4288.5,4617.5,4959.5,12134.5,27393.5,51913.5,58737.5,41296.5,4128.5,4052.5,4263.5,4416.5,4309.5,4647.5,5489.5,27918.5,71195.5,102023.5,119611.5,123824.5,4113.5,4051.5,4260.5,4405.5,4483.5,4878.5,,,3642.5,3798.5,,4030.5,4125.5,62000.5,66089.5
1340,4693.5,4299.5,4236.5,4329.5,4426.5,4451.5,4107.5,4108.5,4215.5,4432.5,4473.5,4646.5,4783.5,4453.5,4845.5,5708.5,7368.5,7228.5,4117.5,4095.5,4214.5,4369.5,4372.5,4643.5,4839.5,5921.5,6375.5,12030.5,19865.5,23307.5,4063.5,4126.5,4131.5,4287.5,4334.5,4459.5,4907.5,9059.5,16588.5,35680.5,45946.5,49876.5,4161.5,4180.5,4268.5,4390.5,4373.5,4761.5,5089.5,12179.5,27461.5,52013.5,58641.5,41315.5,4217.5,4104.5,4299.5,4450.5,4362.5,4733.5,5568.5,28094.5,72198.5,102625.5,118903.5,124233.5,4189.5,4160.5,4318.5,4468.5,4571.5,4967.5,,,3708.5,3855.5,,4060.0,4171.0,62756.0,70844.0
1360,4753.0,4346.0,4249.0,4370.0,4485.0,4503.0,4140.0,4153.0,4264.0,4467.0,4540.0,4715.0,4854.0,4537.0,4895.0,5744.0,7424.0,7286.0,4227.0,4190.0,4288.0,4411.0,4425.0,4621.0,4932.0,5936.0,6462.0,11953.0,19804.0,23313.0,4136.0,4117.0,4207.0,4351.0,4382.0,4508.0,5000.0,9176.0,16479.0,35446.0,45953.0,49747.0,4225.0,4209.0,4304.0,4457.0,4408.0,4707.0,5159.0,12255.0,27444.0,52167.0,58742.0,40982.0,4242.0,4217.0,4344.0,4490.0,4440.0,4843.0,5585.0,27838.0,70804.0,101962.0,119103.0,123820.0,4222.0,4241.0,4329.0,4555.0,4674.0,5074.0,,,3758.0,3860.0,,4155.5,4240.5,62412.5,71505.5
1380,4809.0,4389.0,4301.0,4420.0,4478.0,4534.0,4157.0,4197.0,4306.0,4525.0,4574.0,4748.0,4916.0,4573.0,4933.0,5752.0,7446.0,7317.0,4231.0,4191.0,4356.0,4436.0,4514.0,4729.0,4919.0,6011.0,6541.0,12026.0,19928.0,23239.0,4141.0,4160.0,4199.0,4411.0,4411.0,4572.0,5007.0,9163.0,16532.0,35366.0,45799.0,49823.0,4257.0,4267.0,4318.0,4544.0,4525.0,4820.0,5185.0,12204.0,27333.0,51794.0,58703.0,40950.0,4287.0,4181.0,4366.0,4555.0,4487.0,4820.0,5613.0,27867.0,70703.0,102607.0,118185.0,122749.0,4269.0,4275.0,4338.0,4610.0,4738.0,5110.0,,,3819.0,3901.0,,4155.0,4285.0,62493.0,68429.0


In [None]:
datas[0].to_csv('datasets/experiment/{}/ods.csv'.format(folder))
datas[1].to_csv('datasets/experiment/{}/fluos.csv'.format(folder))
datas[2].to_csv('datasets/experiment/{}/fluos-lower-1.csv'.format(folder))
datas[3].to_csv('datasets/experiment/{}/fluos-lower-2.csv'.format(folder))

#### ECFs Characterization

In [29]:
def read_map(plate_map, plate_idx):
    
    plate_map = plate_map.melt(id_vars=['group'])
    plate_map['variable'] = plate_map['variable'].astype(int)
    plate_map['Well'] = plate_map['group'] + plate_map['variable'].apply(lambda x: '{:02d}'.format(x))
    plate_map.loc[~plate_map['group'].isin(['G', 'H']), 'cuma'] = plate_map['group'].apply(lambda x: ord(x)) - 65
    plate_map.loc[~plate_map['group'].isin(['G', 'H']), 'ara'] = (plate_map['variable'] - 1) % 6
    plate_map.loc[plate_map['group'].isin(['H']), 'cuma'] = plate_idx
    plate_map.loc[plate_map['group'].isin(['H']), 'ara'] = (plate_map['variable'] - 1) % 3
    return plate_map[['Well', 'value', 'cuma', 'ara']]

def generate_metadata(well, plate_map):
    
    sample_map = {
        'G720': 'e11x32STPhoRadA',
        'G721': 'e15x32NpuSspS2',
        'G722': 'e16x33NrdA2',
        'G723': 'e20x32gp411',
        'G724': 'e32x30SspGyrB',
        'G725': 'e34x30MjaKlbA',
        'G726': 'e38x32gp418',
        'G727': 'e41x32NrdJ1',
        'G728': 'e42x32STIMPDH1',
        'BK': 'Blank_Kan',
        'BA': 'Blank_Amp',
        '3K3-N': 'negative-control-kan',
        '4A3-N': 'negative-control-amp',
        '4A3-P': 'positive-control-amp'
    }
    df = pd.merge(well, plate_map, on='Well', how='left').dropna(subset=['value']).reset_index(drop=True)
    df['short_name'] = df['value'].map(sample_map) + '_' + df['cuma'].astype(int).astype(str) + df['ara'].astype(int).astype(str)
    return df.dropna()

folder = '026-pLuxpSal-E41-42'
plate_map = read_map(pd.read_csv('datasets/experiment/{}/plate_map.csv'.format(folder)), 0)
raw_data = pd.read_csv('datasets/experiment/{}/raw.csv'.format(folder), skiprows=[0])
metadata = generate_metadata(raw_data['Well'], plate_map)
data = raw_data[raw_data['Well'].isin(metadata['Well'])].reset_index(drop=True)
datas = generate_data(data, metadata['short_name'], h=24, m=0, num_data=4)

In [30]:
datas[0]

short_name,e41x32NrdJ1_00,e41x32NrdJ1_01,e41x32NrdJ1_02,e41x32NrdJ1_03,e41x32NrdJ1_04,e41x32NrdJ1_05,e42x32STIMPDH1_00,e42x32STIMPDH1_01,e42x32STIMPDH1_02,e42x32STIMPDH1_03,e42x32STIMPDH1_04,e42x32STIMPDH1_05,e41x32NrdJ1_10,e41x32NrdJ1_11,e41x32NrdJ1_12,e41x32NrdJ1_13,e41x32NrdJ1_14,e41x32NrdJ1_15,e42x32STIMPDH1_10,e42x32STIMPDH1_11,e42x32STIMPDH1_12,e42x32STIMPDH1_13,e42x32STIMPDH1_14,e42x32STIMPDH1_15,e41x32NrdJ1_20,e41x32NrdJ1_21,e41x32NrdJ1_22,e41x32NrdJ1_23,e41x32NrdJ1_24,e41x32NrdJ1_25,e42x32STIMPDH1_20,e42x32STIMPDH1_21,e42x32STIMPDH1_22,e42x32STIMPDH1_23,e42x32STIMPDH1_24,e42x32STIMPDH1_25,e41x32NrdJ1_30,e41x32NrdJ1_31,e41x32NrdJ1_32,e41x32NrdJ1_33,e41x32NrdJ1_34,e41x32NrdJ1_35,e42x32STIMPDH1_30,e42x32STIMPDH1_31,e42x32STIMPDH1_32,e42x32STIMPDH1_33,e42x32STIMPDH1_34,e42x32STIMPDH1_35,e41x32NrdJ1_40,e41x32NrdJ1_41,e41x32NrdJ1_42,e41x32NrdJ1_43,e41x32NrdJ1_44,e41x32NrdJ1_45,e42x32STIMPDH1_40,e42x32STIMPDH1_41,e42x32STIMPDH1_42,e42x32STIMPDH1_43,e42x32STIMPDH1_44,e42x32STIMPDH1_45,e41x32NrdJ1_50,e41x32NrdJ1_51,e41x32NrdJ1_52,e41x32NrdJ1_53,e41x32NrdJ1_54,e41x32NrdJ1_55,e42x32STIMPDH1_50,e42x32STIMPDH1_51,e42x32STIMPDH1_52,e42x32STIMPDH1_53,e42x32STIMPDH1_54,e42x32STIMPDH1_55,Blank_Kan_02,Blank_Kan_00,negative-control-kan_01,negative-control-kan_02,Blank_Amp_01,negative-control-amp_02,negative-control-amp_00,positive-control-amp_01,positive-control-amp_02
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1
0,0.055,0.029,0.019,0.002,0.020,0.032,0.018,0.013,0.043,0.010,0.008,0.004,0.030,0.035,0.025,0.020,0.029,0.033,0.012,0.013,0.033,0.017,0.015,0.038,0.021,-0.002,0.009,0.003,0.00005,-0.002,0.025,-0.005,-0.002,-0.003,0.003,0.015,0.059,0.041,0.003,-0.001,-0.003,-0.002,0.003,0.011,0.032,0.019,-0.002,0.009,0.068,-0.00035,0.010,-0.002,0.006,0.012,0.006,0.050,0.045,0.039,0.036,0.056,0.028,0.015,0.008,-0.003,0.026,-0.00025,-0.004,0.050,0.005,0.043,0.024,0.036,,,0.029,0.038,,0.009,0.007,0.019,0.015
20,0.019,0.014,0.011,0.010,0.011,0.013,0.014,0.012,0.015,0.015,0.021,0.023,0.014,0.010,0.010,0.012,0.012,0.012,0.012,0.013,0.013,0.012,0.017,0.022,0.012,0.006,0.008,0.008,0.00700,0.010,0.009,0.010,0.011,0.012,0.012,0.020,0.013,0.010,0.012,0.013,0.012,0.013,0.015,0.013,0.013,0.016,0.012,0.017,0.014,0.00900,0.008,0.009,0.012,0.013,0.013,0.013,0.012,0.018,0.034,0.019,0.015,0.011,0.014,0.012,0.014,0.01600,0.016,0.016,0.014,0.017,0.017,0.019,,,0.027,0.021,,0.019,0.022,0.037,0.025
40,0.022,0.018,0.015,0.013,0.014,0.018,0.015,0.013,0.017,0.016,0.023,0.025,0.018,0.014,0.012,0.013,0.016,0.014,0.014,0.015,0.015,0.015,0.020,0.023,0.015,0.008,0.010,0.012,0.01100,0.012,0.013,0.012,0.012,0.016,0.015,0.023,0.014,0.011,0.013,0.014,0.013,0.014,0.017,0.016,0.015,0.017,0.014,0.020,0.014,0.01100,0.011,0.010,0.014,0.017,0.016,0.016,0.014,0.017,0.048,0.020,0.015,0.012,0.016,0.014,0.016,0.01700,0.018,0.017,0.016,0.019,0.020,0.022,,,0.029,0.026,,0.023,0.026,0.030,0.030
60,0.027,0.024,0.019,0.018,0.020,0.022,0.021,0.020,0.022,0.022,0.029,0.029,0.024,0.020,0.018,0.020,0.022,0.020,0.019,0.020,0.020,0.021,0.026,0.030,0.020,0.014,0.014,0.016,0.01500,0.017,0.018,0.017,0.019,0.019,0.020,0.027,0.019,0.016,0.018,0.020,0.017,0.018,0.021,0.020,0.019,0.022,0.018,0.024,0.020,0.01600,0.016,0.014,0.019,0.022,0.022,0.021,0.019,0.021,0.043,0.027,0.022,0.020,0.023,0.020,0.022,0.02400,0.025,0.025,0.023,0.025,0.025,0.028,,,0.045,0.037,,0.034,0.035,0.039,0.036
80,0.039,0.036,0.032,0.030,0.033,0.033,0.032,0.031,0.035,0.032,0.038,0.042,0.038,0.032,0.031,0.031,0.038,0.032,0.033,0.032,0.031,0.033,0.039,0.042,0.030,0.024,0.024,0.028,0.02700,0.029,0.030,0.029,0.029,0.030,0.030,0.038,0.029,0.025,0.029,0.031,0.030,0.030,0.032,0.031,0.030,0.033,0.029,0.035,0.031,0.02700,0.026,0.024,0.031,0.050,0.088,0.075,0.030,0.033,0.053,0.039,0.033,0.032,0.034,0.032,0.033,0.03500,0.096,0.071,0.034,0.038,0.037,0.043,,,0.066,0.054,,0.050,0.053,0.053,0.048
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1360,1.215,1.193,1.189,1.085,1.067,1.022,1.211,1.218,1.216,1.230,1.260,1.217,1.277,1.070,1.014,0.970,0.854,0.914,1.107,1.030,0.966,1.057,1.235,1.148,1.276,1.043,0.965,0.892,0.91400,0.834,1.119,1.085,0.998,0.858,0.801,1.090,1.279,1.032,0.948,0.827,0.848,0.771,1.008,1.010,0.918,0.873,0.863,1.103,1.270,1.05300,0.975,0.874,0.857,0.888,0.961,0.945,0.925,0.839,0.842,0.941,1.236,1.138,1.502,0.906,0.811,0.77700,1.783,0.866,0.838,0.745,0.755,1.932,,,1.202,1.039,,1.326,1.412,1.363,1.243
0,65.500,72.500,62.500,42.500,28.500,42.500,22.500,32.500,41.500,29.500,25.500,20.500,113.500,130.500,105.500,124.500,102.500,93.500,85.500,80.500,87.500,68.500,72.500,85.500,127.500,115.500,117.500,125.500,104.50000,125.500,96.500,55.500,76.500,72.500,120.500,85.500,103.500,102.500,52.500,87.500,72.500,83.500,76.500,44.500,76.500,84.500,97.500,88.500,98.500,101.50000,124.500,94.500,95.500,88.500,76.500,105.500,68.500,57.500,55.500,41.500,97.500,82.500,87.500,80.500,78.500,87.50000,69.500,84.500,76.500,86.500,88.500,75.500,,,64.500,29.500,,75.000,47.000,1040.000,908.000
20,42.000,60.000,41.000,47.000,46.000,20.000,36.000,18.000,5.000,-2.000,3.000,12.000,106.000,110.000,99.000,82.000,59.000,89.000,83.000,65.000,74.000,57.000,73.000,39.000,87.000,95.000,76.000,84.000,95.00000,75.000,64.000,44.000,67.000,58.000,71.000,46.000,86.000,96.000,77.000,71.000,79.000,67.000,80.000,35.000,50.000,58.000,64.000,43.000,62.000,73.00000,100.000,51.000,67.000,68.000,57.000,37.000,58.000,41.000,44.000,55.000,66.000,87.000,81.000,60.000,76.000,64.00000,74.000,57.000,42.000,32.000,67.000,53.000,,,67.000,29.000,,68.000,62.000,1041.000,955.000
40,67.500,58.500,35.500,43.500,43.500,46.500,37.500,42.500,34.500,20.500,21.500,13.500,99.500,110.500,105.500,81.500,84.500,71.500,66.500,79.500,50.500,57.500,68.500,63.500,89.500,102.500,92.500,70.500,85.50000,92.500,95.500,52.500,58.500,69.500,57.500,75.500,116.500,82.500,70.500,55.500,70.500,68.500,77.500,49.500,53.500,71.500,66.500,82.500,77.500,79.50000,90.500,74.500,72.500,72.500,65.500,61.500,44.500,34.500,48.500,41.500,101.500,101.500,103.500,70.500,74.500,67.50000,66.500,55.500,46.500,45.500,63.500,42.500,,,62.500,51.500,,68.000,65.000,1351.000,1202.000


In [31]:
datas[0].to_csv('datasets/experiment/{}/ods.csv'.format(folder))
datas[1].to_csv('datasets/experiment/{}/fluos.csv'.format(folder))
datas[2].to_csv('datasets/experiment/{}/fluos-lower-1.csv'.format(folder))
datas[3].to_csv('datasets/experiment/{}/fluos-lower-2.csv'.format(folder))

#### Initial experiments

In [None]:
def read_map(plate_map, plate_idx):
    
    plate_map = plate_map.melt(id_vars=['group'])
    plate_map['variable'] = plate_map['variable'].astype(int)
    plate_map['Well'] = plate_map['group'] + plate_map['variable'].apply(lambda x: '{:02d}'.format(x))
    #plate_map.loc[~plate_map['group'].isin(['G', 'H']), 'cuma'] = plate_map['group'].apply(lambda x: ord(x)) - 65
    plate_map.loc[~plate_map['group'].isin(['H']), 'inducer'] = (plate_map['variable'] - 1) % 6
    plate_map.loc[plate_map['group'].isin(['H']), 'inducer'] = plate_idx
    #plate_map.loc[plate_map['group'].isin(['G', 'H']), 'ara'] = (plate_map['variable'] - 1) % 3
    #return plate_map[['Well', 'value', 'cuma', 'ara']]
    return plate_map[['Well', 'value', 'inducer']]

def generate_metadata(well, plate_map):
    
    sample_map = {
        'P62': 'pBAD/Ara',
        'P64': 'pCin/OHC14',
        'P66': 'pCymRC/Cuma',
        'P68': 'pLuxB/AHL',
        'P69': 'pPhlF/DAPG',
        'P70': 'pSalTTC/Sal',
        'P72': 'pRhaB/Rha',
        'E720': 'e11x33',
        'E721': 'e15x33',
        'E722': 'e16x33',
        'E723': 'e20x33',
        'E724': 'e32x33',
        'E725': 'e34x33',
        'E726': 'e38x33',
        'E727': 'e41x33',
        'E728': 'e42x33',
        'G726': 'e11x32STPhoRadA',
        'G721': 'e15x32NpuSspS2',
        'G722': 'e16x33NrdA2',
        'G723': 'e20x32gp411',
        'G724': 'e32x30SspGyrB',
        'G725': 'e34x30MjaKlbA',
        'G726': 'e38x32gp418',
        'G727': 'e41x32NrdJ1',
        'G728': 'e42x32STIMPDH1',
        'BK': 'Blank_Kan',
        'BA': 'Blank_Amp',
        '3K3-N': 'negative-control-kan',
        '4A3-N': 'negative-control-amp',
        '4A3-P': 'positive-control-amp'
    }
    df = pd.merge(well, plate_map, on='Well', how='left').dropna(subset=['value']).reset_index(drop=True)
    df['short_name'] = df['value'].map(sample_map) + '_' + df['inducer'].astype(int).astype(str)
    return df.dropna()

In [None]:
plate_map = read_map(pd.read_csv('datasets/experiment/plate-3/plate_map.csv'), 1)
raw_data = pd.read_csv('datasets/experiment/plate-3/raw.csv', skiprows=[0])
metadata = generate_metadata(raw_data['Well'], plate_map)
data = raw_data[raw_data['Well'].isin(metadata['Well'])].reset_index(drop=True)
datas = generate_data(data, metadata['short_name'], h=24, m=0, num_data=4)

In [None]:
temp = '''
ods1, fluos1, fluos2, fluos3 = [], [], [], []
for i in range(0, 140, 20):
    if i==20:
        continue
    ods1.append(pd.read_csv('datasets/experiment/plate-1/od-{}.csv'.format(i)).iloc[:,3])   
    fluos1.append(pd.read_csv('datasets/experiment/plate-1/fluo-{}.csv'.format(i)).iloc[:,3])
    fluos2.append(pd.read_csv('datasets/experiment/plate-1/fluo-{}.csv'.format(i)).iloc[:,4])
    fluos3.append(pd.read_csv('datasets/experiment/plate-1/fluo-{}.csv'.format(i)).iloc[:,5])
pd.concat(fluos3, axis=1).T.to_csv('datasets/experiment/plate-1/plate1-fluos3-add.csv', index=False)
'''

In [None]:
datas[0].to_csv('datasets/experiment/plate-3/plate3-ods.csv')
datas[1].to_csv('datasets/experiment/plate-3/plate3-fluos.csv')
datas[2].to_csv('datasets/experiment/plate-3/plate3-fluos-gain-lower-1.csv')
datas[3].to_csv('datasets/experiment/plate-3/plate3-fluos-gain-lower-2.csv')

### AND Induction Matrix

In [3]:
gates = ['e11x32STPhoRadA', 'e15x32NpuSspS2', 'e16x33NrdA2', 'e20x32gp411', 'e32x30SspGyrB', 'e34x30MjaKlbA',
         'e38x32gp418', 'e41x32NrdJ1', 'e42x32STIMPDH1', 'invalid']

In [4]:
def read_map(plate_map, plate_idx):
    
    plate_map = plate_map.melt(id_vars=['group'])
    plate_map['variable'] = plate_map['variable'].astype(int)
    plate_map['well'] = plate_map['group'] + plate_map['variable'].apply(lambda x: '{:02d}'.format(x))
    plate_map.loc[~plate_map['group'].isin(['G', 'H']), 'cuma'] = plate_map['group'].apply(lambda x: ord(x)) - 65
    plate_map.loc[~plate_map['group'].isin(['G', 'H']), 'ara'] = (plate_map['variable'] - 1) % 6
    plate_map.loc[plate_map['group'].isin(['G', 'H']), 'cuma'] = plate_idx
    plate_map.loc[plate_map['group'].isin(['G', 'H']), 'ara'] = (plate_map['variable'] - 1) % 3
    return plate_map[['well', 'value', 'cuma', 'ara']]

def generate_metadata(well, plate_map, gates):
    
    sample_map = {
        'E1': gates[0],
        'E2': gates[1],
        'P_3K3': 'positive_control_3K3',
        'P_4A3': 'positive_control_4AE',
        'N_3K3': 'negative_control_3K3',
        'N_4A3': 'negative_control_4AE',
        'B_K': 'blank_K',
        'B_A': 'blank_A'
    }
    df = pd.merge(well, plate_map, on='well', how='left').dropna(subset=['value']).reset_index(drop=True)
    df['short_name'] = df['value'].map(sample_map) + '_' + df['cuma'].astype(int).astype(str) + df['ara'].astype(int).astype(str)
    return df.dropna()

In [None]:
fluos, ods = [], []
for i in tqdm(range(1, 6)):
    plate_map = read_map(pd.read_csv('datasets/induction_matrix/plate_map.csv'), i)
    raw_data = pd.read_csv('datasets/induction_matrix/plate{}-fluo-od.csv'.format(i), skiprows=[0])
    metadata = generate_metadata(raw_data['well'], plate_map, gates[(i*2)-2:(i*2)])
    data = raw_data[raw_data['well'].isin(metadata['well'])].reset_index(drop=True)
    datas = generate_data(data, metadata['short_name'], h=24, num_data=1)
    fluos.append(datas[0])
    #ods.append(datas[1])
pd.concat(fluos, axis=1).to_csv('datasets/induction_matrix/avg_fluos.csv')
#pd.concat(ods, axis=1).to_csv('datasets/induction_matrix/avg_ods.csv')

### XOR Induction Matrix

In [None]:
gates = ['e20-33', 'e11-15']

In [None]:
def read_map(plate_map, plate_idx):
    
    plate_map = pd.read_csv('datasets/induction_matrix_xor/plate_map.csv')
    plate_map = plate_map.melt(id_vars=['group'])
    plate_map['variable'] = plate_map['variable'].astype(int)
    plate_map['well'] = plate_map['group'] + plate_map['variable'].apply(lambda x: '{:02d}'.format(x))
    plate_map.loc[~plate_map['group'].isin(['G', 'H']), 'ara'] = plate_map['group'].apply(lambda x: ord(x)) - 65
    plate_map.loc[~plate_map['group'].isin(['G', 'H']), 'rha'] = (plate_map['variable'] - 1) % 6
    plate_map.loc[plate_map['group'].isin(['G', 'H']), 'ara'] = plate_idx
    plate_map.loc[plate_map['group'].isin(['G', 'H']), 'rha'] = plate_idx
    return plate_map[['well', 'value', 'ara', 'rha']]

def generate_metadata(well, plate_map, gates):
    
    sample_map = {
        'E1': gates[0],
        'E2': gates[1]
    }
    df = pd.merge(well, plate_map, on='well', how='left').dropna(subset=['value']).reset_index(drop=True)
    df['short_name'] = df['value'].map(sample_map) + '_' + df['cuma'].astype(int).astype(str) + df['ara'].astype(int).astype(str)
    return df.dropna()

In [None]:
fluos, ods = [], []
for i in tqdm(range(1, 6)):
    plate_map = read_map(pd.read_csv('datasets/induction_matrix_xor/plate_map.csv'), i)
    raw_data = pd.read_csv('datasets/induction_matrix/plate{}-fluo-od.csv'.format(i), skiprows=[0])
    metadata = generate_metadata(raw_data['well'], plate_map, gates[(i*2)-2:(i*2)])
    data = raw_data[raw_data['well'].isin(metadata['well'])].reset_index(drop=True)
    datas = generate_data(data, metadata['short_name'], h=24, num_data=1)
    fluos.append(datas[0])
    #ods.append(datas[1])
pd.concat(fluos, axis=1).to_csv('datasets/induction_matrix/avg_fluos.csv')

In [None]:
plate_map

### Exercise

In [None]:
def read_map(plate_map):
    
    plate_map = plate_map.melt(id_vars=['group'])
    plate_map['variable'] = plate_map['variable'].astype(int)
    plate_map['well'] = plate_map['group'] + plate_map['variable'].apply(lambda x: "{:02d}".format(x))
    plate_map.loc[~plate_map['group'].isin(['C', 'D', 'G', 'H']), 'inducer'] = 0
    plate_map.loc[plate_map['group'].isin(['C', 'D', 'G', 'H']), 'inducer'] = 1
    return plate_map[['well', 'value', 'inducer']]

plate_map = read_map(pd.read_csv('datasets/exercise/plate_map.csv'))

In [None]:
def generate_metadata(well, plate_map):
    df = pd.merge(well, plate_map, on='well', how='left').dropna(subset=['value']).reset_index(drop=True)
    return df.dropna()

raw_data = pd.read_csv('datasets/exercise/first-exercise.csv', skiprows=[0])
metadata = generate_metadata(raw_data['well'], plate_map)
data = raw_data[raw_data['well'].isin(metadata['well'])].reset_index(drop=True)

In [None]:
fluo, od, fluo_half = generate_data(data, metadata['value'], 24, col='value')

In [None]:
i = 3 * 12
promoters = fluo.iloc[:,i:i+10]
#promoters
promoters.plot()

In [None]:
i = 2 * 12
promoters = od.iloc[:,i:i+9]
#promoters
for i in range(promoters.shape[1]):
    plt.plot(od.index/60, promoters.iloc[:, i], label=promoters.columns[i])
plt.ylabel('OD')
plt.xlabel('Hour')
plt.legend()
sns.despine()

In [None]:
a = []
ECF = od[['E720', 'E721', 'E722', 'E723', 'E724', 'E725', 'E726']]
for i in range(0, 32, 4):
    a.append(ECF.iloc[:,i+2:i+3])
ECFs = pd.concat(a, axis=1)
ECFs

In [None]:
for i in range(ECFs.shape[1]):
    plt.plot(ECFs.index/60, ECFs.iloc[:, i], label=ECFs.columns[i])
plt.ylabel('Fluo (a.u)')
plt.xlabel('Hour')
plt.legend()
sns.despine()

In [None]:
fluos = pd.read_csv('datasets/induction_matrix/induction_fluo.csv', index_col='time')
ods = pd.read_csv('datasets/induction_matrix/induction_od.csv', index_col='time')
gates = ['e11x32STPhoRadA', 'e15x32NpuSspS2', 'e16x33NrdA2', 'e20x32gp411', 'e32x30SspGyrB',
         'e34x30MjaKlbA', 'e38x32gp418', 'e41x32NrdJ1', 'e42x32STIMPDH1']
g = []
for gate in gates[:-2]:
    fluo = fluos['{}_55'.format(gate)]
    g.append(fluo)
    plt.plot(fluo.index/60, fluo, label=gate)
plt.legend()
sns.despine()

In [None]:
gdf = pd.concat(g, axis=1)
gdf

In [None]:
f, axs = plt.subplots(2, 5, sharex=False, sharey=False, figsize=(16, 5))
axr = axs.ravel()
for i, ax in enumerate(axr):
    if i < len(gates[:-2]):
        ax.plot(gdf.index/60, gdf.iloc[:,i], label='gate')
        ax.plot(ECFs.index/60, ECFs.iloc[:,i], label='intact')
        ax.set_title(gates[i])
        ax.legend(loc=4)
        sns.despine()
    else:
        ax.set_visible(False)
plt.tight_layout()

In [None]:
constructs = pd.read_csv('datasets/dictionary.csv')
constructs

In [None]:
constructs[constructs['short_name'].isin(gates)]

In [None]:
def parse_minutes(x):
    
    spl = x.split(' ')
    hours = int(spl[0]) * 60
    mins = int(spl[2]) if spl[2] != '' else 0
    return hours + mins

def transpose_data(df):
    
    df.set_index('short_name', inplace=True)
    df = df.transpose().reset_index()
    df['time'] = df['index'].apply(parse_minutes)
    df = df.set_index('time')
    df = df.drop('index', axis=1)
    return df

def generate_data(df, plate, name, h=20, m=0):
    
    start_idx = 3
    mid_idx = h * 3 + start_idx + 1 - m
    end_idx = h * 3 + mid_idx + 1 - m

    fluo = (df.iloc[:, start_idx:mid_idx]).astype(float)
    od = (df.iloc[:, mid_idx:end_idx]).astype(float)
    fluo_half = (df.iloc[:, end_idx:]).astype(float)
    
    fluo = pd.concat([name, fluo], axis=1)
    od = pd.concat([name, od], axis=1)
    fluo_half = pd.concat([name, fluo_half], axis=1)
    
    fluo = transpose_data(fluo)
    od = transpose_data(od)
    fluo_half = transpose_data(fluo_half)
    
    return fluo, od, fluo_half

In [None]:
def read_plate_data(df):
    
    df.columns = df.iloc[0]
    df.drop(df.index[0], inplace=True)
    df.dropna(inplace=True)
    return df.reset_index(drop=True)

read_plate_data()

### Marionette Strain

In [None]:
def read_plate_map(data):
    
    con = data.iloc[:,:2].reset_index()
    mar = data.iloc[:,2:].reset_index().melt(id_vars=['group'])
    mar['variable'] = mar['variable'].apply(lambda x: "{:02d}".format(int(x)))
    mar['well'] = mar['group'] + mar['variable']
    mar.drop(['group', 'variable'], axis=1, inplace=True)
    mar.rename(columns={'value':'id'}, inplace=True)
    return con, mar

con, mar = read_plate_map(pd.read_csv('datasets/mario_map.csv', index_col=['group']))

In [None]:
def read_plate_data(df):
    
    df.columns = df.iloc[0]
    df.drop(df.index[0], inplace=True)
    return df.reset_index(drop=True)

data = read_plate_data(pd.read_csv('datasets/marrionette.csv'))

In [None]:
merged = pd.merge(data['well'], mar, on='well', how='left')
merged = pd.merge(merged, constructs, on='id', how='left')
name = merged['short_name'].dropna().reset_index(drop=True) #just to make sure there is no null and indexing is correct

In [None]:
merged

In [None]:
mar_fluo, mar_od, mar_fluo_half = generate_data(data, mar, name, 24)
gates = name.unique().tolist()

In [None]:
def reformat_df(data):
    
    top10_wrapper = pd.DataFrame()
    mario_wrapper = pd.DataFrame()

    for gate in gates:

        top10 = data[gate].iloc[:,:4]
        top10.columns = [gate + '_00', gate + '_10', gate + '_01', gate + '_11']
        top10_wrapper = pd.concat([top10_wrapper, top10], axis=1)

        mario = data[gate].iloc[:,4:]
        mario.columns = [gate + '_00', gate + '_10', gate + '_01', gate + '_11']
        mario_wrapper = pd.concat([mario_wrapper, mario], axis=1)
        
    return top10_wrapper, mario_wrapper

In [None]:
top10_fluo, mario_fluo = reformat_df(mar_fluo)
#top10_od, mario_od = reformat_df(mar_od)

#mario_fluo.reset_index().to_csv('datasets/marionette_fluo_half.csv', index=False)
#mario_od.reset_index().to_csv('datasets/marionette_od.csv', index=False)

### ALL GATES

### Data from plate reader 1

In [None]:
def cleanse_plate(plate):
    plate = plate.melt(id_vars=['Unnamed: 0'])
    plate['value'] = plate['value'].apply(lambda x: x.split('.')[0])
    plate['variable'] = plate['variable'].apply(lambda x: "{:02d}".format(int(x)))
    plate['variable'] = plate['Unnamed: 0'] + plate['variable']
    plate.drop('Unnamed: 0', axis=1, inplace=True)
    return plate.reset_index(drop=True).rename(columns={'variable': 'Well', 'value': 'code_name'})

plate1 = cleanse_plate(pd.read_csv('datasets/plate1_map.csv'))
plate1

In [None]:
df11 = read_plate_data('and_gate_11') # up to 20h
print(df11.shape)
df12 = read_plate_data('and_gate_12') # up to 16h
print(df12.shape)
df13 = read_plate_data('and_gate_13') # up to 16h
print(df13.shape)

In [None]:
merged = pd.merge(df11['Well'], plate1, on='Well', how='left')
merged = pd.merge(merged, naming_map, on='code_name', how='left')
name1 = merged['short_name']

In [None]:
fluo11, od11, bulk_fluo11, fluo_half11, bulk_fluo_half11 = generate_data(df11, plate1, name1, 20)
fluo12, od12, bulk_fluo12, fluo_half12, bulk_fluo_half12 = generate_data(df12, plate1, name1, 16)
fluo13, od13, bulk_fluo13, fluo_half13, bulk_fluo_half13 = generate_data(df13, plate1, name1, 16)

In [None]:
def plot_all(data, num_row, num_col):
    f, axs = plt.subplots(num_row, num_col, sharex=True, sharey=False, figsize=(14, num_row*2))
    axr = axs.ravel()
    for i, ax in tqdm(enumerate(axr)):
        if i < data[0].shape[1]:
            for d in data:
                ax.plot(d.index/60, d.iloc[:, i])
            ax.set_title(data[0].columns[i])
            ax.set_xlabel('Time (h)')
        else:
            ax.set_visible(False)
    plt.tight_layout()
    sns.despine()
    
#plot bulk fluorescence data
plot_all([bulk_fluo_half11, bulk_fluo_half12, bulk_fluo_half13], 20, 5)

In [None]:
plot_all([od11, od12, od13], 20, 5)

In [None]:
bulk_fluo1 = (bulk_fluo11 + bulk_fluo12 + bulk_fluo13) / 3
fluo1 = (fluo11 + fluo12 + fluo13) / 3
od1 = (od11 + od12 + od13) / 3
fluo_half1 = (fluo_half11 + fluo_half12 + fluo_half13) / 3
bulk_fluo_half1 = (bulk_fluo_half11 + bulk_fluo_half12 + bulk_fluo_half13) / 3

In [None]:
bulk_fluo1.dropna().to_csv('datasets/bulk_fluo_plate_1_triplicate.csv')
fluo1.dropna().to_csv('datasets/fluo_plate_1_triplicate.csv')
od1.dropna().to_csv('datasets/od_plate_1_triplicate.csv')
bulk_fluo_half1.dropna().to_csv('datasets/bulk_fluo_half_plate_1_triplicate.csv')
fluo_half1.dropna().to_csv('datasets/fluo_half_plate_1_triplicate.csv')

In [None]:
bulk_fluo11.dropna().to_csv('datasets/bulk_fluo_plate_1_single.csv')
fluo11.dropna().to_csv('datasets/fluo_plate_1_single.csv')
od11.dropna().to_csv('datasets/od_plate_1_single.csv')
bulk_fluo_half11.dropna().to_csv('datasets/bulk_fluo_half_plate_1_single.csv')
fluo_half11.dropna().to_csv('datasets/fluo_half_plate_1_single.csv')

### Data from plate reader 2

In [None]:
plate2 = cleanse_plate(pd.read_csv('datasets/plate2_map.csv'))
df21 = read_plate_data('and_gate_21') # up to 20h
print(df21.shape)
df22 = read_plate_data('and_gate_22') # up to 16h
print(df22.shape)
df23 = read_plate_data('and_gate_23') # up to 16h
print(df23.shape)

In [None]:
merged = pd.merge(df21['Well'], plate2, on='Well', how='left')
merged = pd.merge(merged, naming_map, on='code_name', how='left')
name2 = merged['short_name']

In [None]:
fluo21, od21, bulk_fluo21, fluo_half21, bulk_fluo_half21 = generate_data(df21, plate2, name2, 21, 2)
fluo22, od22, bulk_fluo22, fluo_half22, bulk_fluo_half22 = generate_data(df22, plate2, name2, 22, 0)
fluo23, od23, bulk_fluo23, fluo_half23, bulk_fluo_half23 = generate_data(df23, plate2, name2, 22, 1)

In [None]:
plot_all([bulk_fluo_half21, bulk_fluo_half22, bulk_fluo_half23], 20, 5)

In [None]:
plot_all([od21, od22, od23], 20, 5)

In [None]:
bulk_fluo2 = (bulk_fluo21 + bulk_fluo22 + bulk_fluo23) / 3
fluo2 = (fluo21 + fluo22 + fluo23) / 3
od2 = (od21 + od22 + od23) / 3
fluo_half2 = (fluo_half21 + fluo_half22 + fluo_half23) / 3
bulk_fluo_half2 = (bulk_fluo_half21 + bulk_fluo_half22 + bulk_fluo_half23) / 3

In [None]:
bulk_fluo2.to_csv('datasets/bulk_fluo_plate_2_triplicate.csv')
fluo2.to_csv('datasets/fluo_plate_2_triplicate.csv')
fluo_half2.to_csv('datasets/fluo_half_plate_2_triplicate.csv')
bulk_fluo_half2.to_csv('datasets/bulk_fluo_half_plate_2_triplicate.csv')
od2.to_csv('datasets/od_plate_2_triplicate.csv')

In [None]:
bulk_fluo21.to_csv('datasets/bulk_fluo_plate_2_single.csv')
fluo21.to_csv('datasets/fluo_plate_2_single.csv')
fluo_half21.to_csv('datasets/fluo_half_plate_2_single.csv')
bulk_fluo_half21.to_csv('datasets/bulk_fluo_half_plate_2_single.csv')
od21.to_csv('datasets/od_plate_2_single.csv')

### Induction Matrix

In [None]:
induction = read_plate_data('induction') # up to 20h
print(induction.shape)

In [None]:
induction_plate = {
    'A01': 'A18', 'A02': 'A18', 'A03': 'A18', 
    'B01': 'A18', 'B02': 'A18', 'B03': 'A18', 
    'C01': 'A18', 'C02': 'A18', 'C03': 'A18', 
    'D01': 'A195', 'D02': 'A195', 'D03': 'A195', 
    'E01': 'A195', 'E02': 'A195', 'E03': 'A195', 
    'F01': 'A195', 'F02': 'A195', 'F03': 'A195', 
    'A04': 'A29', 'A05': 'A29', 'A06': 'A29', 
    'B04': 'A29', 'B05': 'A29', 'B06': 'A29', 
    'C04': 'A29', 'C05': 'A29', 'C06': 'A29', 
    'D04': 'A259', 'D05': 'A259', 'D06': 'A259', 
    'E04': 'A259', 'E05': 'A259', 'E06': 'A259', 
    'F04': 'A259', 'F05': 'A259', 'F06': 'A259', 
    'A07': 'A76', 'A08': 'A76', 'A09': 'A76', 
    'B07': 'A76', 'B08': 'A76', 'B09': 'A76', 
    'C07': 'A76', 'C08': 'A76', 'C09': 'A76', 
    'D07': 'A267', 'D08': 'A267', 'D09': 'A267', 
    'E07': 'A267', 'E08': 'A267', 'E09': 'A267', 
    'F07': 'A267', 'F08': 'A267', 'F09': 'A267', 
    'A10': 'A109', 'A11': 'A109', 'A12': 'A109', 
    'B10': 'A109', 'B11': 'A109', 'B12': 'A109', 
    'C10': 'A109', 'C11': 'A109', 'C12': 'A109', 
    'D10': 'A294', 'D11': 'A294', 'D12': 'A294', 
    'E10': 'A294', 'E11': 'A294', 'E12': 'A294', 
    'F10': 'A294', 'F11': 'A294', 'F12': 'A294', 
    'G01': 'A323', 'G02': 'A323', 'G03': 'A323', 
    'G04': 'A323', 'G05': 'A323', 'G06': 'A323', 
    'H01': 'A323', 'H02': 'A323', 'H03': 'A323', 
}
induction_row = {
    'A': 0, 'B': 1, 'C': 2, 'D': 0, 'E': 1, 'F': 2,
    'G': 0, 'H': 1 #column g needs to be manually adjusted later
}
induction_col = {
    '01': 0, '02': 1, '03': 2, '04': 0, '05': 1, '06': 2, 
    '07': 0, '08': 1, '09': 2, '10': 0, '11': 1, '12': 2 
}

In [None]:
index = induction.iloc[:,:3]
index['code_name'] = index['Well'].map(induction_plate)
col_idx = index['Well'].str[1:]
row_idx = index['Well'].str[:1]
index['ind1_lvl'] = row_idx.map(induction_row)
index['ind2_lvl'] = col_idx.map(induction_col)
index.loc[index['Well'].isin(['G04', 'G05', 'G06']), 'ind1_lvl'] = 2
#index = index.dropna()
index.loc[index['code_name'].isnull(), 'code_name'] = 'control'
name = pd.merge(index[['code_name', 'ind1_lvl', 'ind2_lvl']], naming_map[['code_name', 'short_name']], on='code_name', how='left')
name

In [None]:
index[index['code_name']=='A323']

In [None]:
def parse_minutes(x):
    spl = x.split(' ')
    hours = int(spl[0]) * 60
    mins = int(spl[2]) if spl[2] != '' else 0
    return hours + mins

def transpose_data(df_raw):
    df = df_raw.copy()
    df.set_index('short_name', inplace=True)
    #df = df.transpose().reset_index()
    #df['time'] = df['index'].apply(parse_minutes)
    #df = df.set_index('time')
    #df = df.drop('index', axis=1)
    return df

def generate_data_induce(df, h=24):
    
    start_idx = 3
    mid_idx = h * 3 + start_idx
    fluo = (df.iloc[:, start_idx:mid_idx]).astype(float)
    od = (df.iloc[:, mid_idx:]).astype(float)
    bulk_fluo = fluo * od
    
    fluo = pd.concat([name, fluo], axis=1)
    od = pd.concat([name, od], axis=1)
    bulk_fluo = pd.concat([name, bulk_fluo], axis=1)
    
    #bulk_fluo = transpose_data(bulk_fluo)
    #fluo = transpose_data(fluo)
    #od = transpose_data(od)
    
    return fluo, od, bulk_fluo

fluo, od, bulk_fluo = generate_data_induce(induction)
bulk_fluo

In [None]:
bulk_fluo[bulk_fluo['short_name']=='e11x32STPhoRadA']

In [None]:
bulk_fluo.to_csv('datasets/bulk_fluo_induction.csv', index=False)
fluo.to_csv('datasets/fluo_induction.csv', index=False)
od.to_csv('datasets/od_induction.csv', index=False)

### XOR Gate Top 10

In [None]:
def read_plate_map(df):
    data = pd.read_csv('datasets/xor_map.csv', index_col=['Group'])
    con = data.iloc[:,:2].reset_index()
    xor = data.iloc[:,2:]
    xor = xor.reset_index().melt(id_vars=['Group'])
    xor['variable'] = xor['variable'].apply(lambda x: "{:02d}".format(int(x)))
    xor['Well'] = xor['Group'] + xor['variable']
    xor.drop(['Group', 'variable'], axis=1, inplace=True)
    xor.rename(columns={'value':'code_name'}, inplace=True)
    return con, xor

con, xor = read_plate_map(pd.read_csv('datasets/xor_map.csv'))

In [None]:
def read_plate_data(df):
    df.columns = df.iloc[0]
    df.drop(df.index[0], inplace=True)
    #df.dropna(inplace=True)
    return df.reset_index(drop=True)

data = read_plate_data(pd.read_csv('datasets/xor_gate.csv'))
data.head()

In [None]:
merged = pd.merge(data['Well'], xor, on='Well', how='left')
#merged = pd.merge(merged, naming_map, on='code_name', how='left')
merged.rename(columns={'code_name': 'short_name'}, inplace=True)
name = merged['short_name'].dropna().reset_index(drop=True)

In [None]:
fluox, odx, fluo_halfx = generate_data(data, xor, name, 24)

In [None]:
cols = ['ECF20/33', 'ECF11/15']
fluo_xor = pd.DataFrame()
od_xor = pd.DataFrame()
for c in cols:
    temp = pd.concat([fluox[c].iloc[:,i] for i in range(0, 16, 4)], axis=1)
    temp.columns = [c + '_' + "{:02b}".format(int(i)) for i in range(4)]
    fluo_xor = pd.concat([fluo_xor, temp], axis=1)
    
    temp = pd.concat([odx[c].iloc[:,i] for i in range(0, 16, 4)], axis=1)
    temp.columns = [c + '_' + "{:02b}".format(int(i)) for i in range(4)]
    od_xor = pd.concat([od_xor, temp], axis=1)

In [None]:
fluo_xor.to_csv('datasets/bulk_fluo_xor.csv')
od_xor.to_csv('datasets/od_xor.csv')

### First-round Data

In [None]:
df = pd.read_csv('datasets/raw.csv')
df.columns = df.iloc[0]
df.drop(df.index[0], inplace=True)
df.dropna(inplace=True)
print(df.shape)
df.head()

In [None]:
map_ecf = {'Sample X1': 'e15',
'Sample X2': 'e22',
'Sample X3': 'e32',
'Sample X4': 'e33',
'Sample X5': 'e34',
'Sample X6': 'e41',
'Sample X7': 'e42',
'Sample X8': 'e15',
'Sample X9': 'e22',
'Sample X10': 'e38',
'Sample X11': 'e16',
'Sample X12': 'e33',
'Sample X13': 'e15',
'Sample X14': 'e16',
'Sample X15': 'e17',
'Sample X16': 'e20',
'Sample X17': 'e22',
'Sample X18': 'e26',
'Sample X19': 'e32',
'Sample X20': 'e33',
'Sample X21': 'e34'}
map_int = {'Sample X1': 'SspGyrB',
'Sample X2': 'SspGyrB',
'Sample X3': 'SspGyrB',
'Sample X4': 'SspGyrB',
'Sample X5': 'SspGyrB',
'Sample X6': 'SspGyrB',
'Sample X7': 'SspGyrB',
'Sample X8': 'TerThyXS2',
'Sample X9': 'TerThyXS2',
'Sample X10': 'TerThyXS2',
'Sample X11': 'TerThyXS1',
'Sample X12': 'TerThyXS1',
'Sample X13': 'STPhoRadA',
'Sample X14': 'STPhoRadA',
'Sample X15': 'STPhoRadA',
'Sample X16': 'STPhoRadA',
'Sample X17': 'STPhoRadA',
'Sample X18': 'STPhoRadA',
'Sample X19': 'STPhoRadA',
'Sample X20': 'STPhoRadA',
'Sample X21': 'STPhoRadA'}

In [None]:
df['ECF'] = df['Content'].map(map_ecf)
df['Intein'] = df['Content'].map(map_int)
df.loc[df['Group'].isin(['A', 'E']), 'Ara'] = 0
df.loc[df['Group'].isin(['A', 'E']), 'Cuma'] = 0
df.loc[df['Group'].isin(['B', 'F']), 'Ara'] = 1
df.loc[df['Group'].isin(['B', 'F']), 'Cuma'] = 0
df.loc[df['Group'].isin(['C', 'G']), 'Ara'] = 0
df.loc[df['Group'].isin(['C', 'G']), 'Cuma'] = 1
df.loc[df['Group'].isin(['D', 'H']), 'Ara'] = 1
df.loc[df['Group'].isin(['D', 'H']), 'Cuma'] = 1
df = pd.concat([df[['ECF', 'Intein', 'Ara', 'Cuma']],
                 df.drop(['ECF', 'Intein', 'Ara', 'Cuma', 'Well', 'Content', 'Group'], axis=1)],
                 axis=1)
df.head()

In [None]:
cols = list(range(0,77))
fluo = df.iloc[:, cols]
#fluo.to_csv('datasets/fluoOD-all.csv', index=False)
fluo.head()

In [None]:
cols = list(range(0,4)) + list(range(77,150))
od = df.iloc[:, cols]
#od.to_csv('datasets/OD-all.csv', index=False)
od.head()

In [None]:
def parse_minutes(x):
    spl = x.split(' ')
    hours = int(spl[0]) * 60
    mins = int(spl[2]) if spl[2] != '' else 0
    return hours + mins

fluo11 = fluo[(fluo['Ara']==1) & (fluo['Cuma']==0)]
fluo11['index'] = fluo11['ECF'] + fluo11['Intein']
fluo11.set_index('index', inplace=True)
fluo11.drop(['ECF', 'Intein', 'Ara', 'Cuma'], axis=1, inplace=True)
fluo11 = fluo11.transpose().reset_index()

fluo11['time'] = fluo11[0].apply(parse_minutes)
fluo11 = fluo11.set_index('time')
fluo11 = fluo11.drop(0, axis=1)
fluo11

In [None]:
od11 = od[(od['Ara']==1) & (od['Cuma']==0)]
od11['index'] = od11['ECF'] + od11['Intein']
od11.set_index('index', inplace=True)
od11.drop(['ECF', 'Intein', 'Ara', 'Cuma'], axis=1, inplace=True)
od11 = od11.transpose().reset_index()

od11['time'] = od11[0].apply(parse_minutes)
od11 = od11.set_index('time')
od11 = od11.drop(0, axis=1)
od11

In [None]:
for col in fluo11.columns:
    try:
        fluo11[col] = fluo11[col].astype(float)
    except:
        print(col)
        continue

In [None]:
for col in od11.columns:
    od11[col] = od11[col].astype(float)

In [None]:
fluo11.to_csv('datasets/fluo-10.csv')
od11.to_csv('datasets/od-10.csv')
(fluo11 * od11).to_csv('datasets/bulk-fluo-10.csv')

### Sequence to Function

In [None]:
filenames = sorted(os.listdir('datasets/sequences/'))
buffer = {}
for filename in tqdm(filenames):
    gb_file = "datasets/sequences/" + filename
    for gb_record in SeqIO.parse(open(gb_file,"r"), "genbank") :
        buffer[filename[:-3]] = str(gb_record.seq)
df = pd.DataFrame.from_dict(buffer, orient='index').reset_index()
df.columns = ['full_name', 'sequence']

In [None]:
constructs = pd.read_csv('datasets/constructs.csv')
constructs['full_name'] = constructs['full_name'].str.replace("*", "")
df2 = pd.merge(df, constructs, on="full_name", how="left")
df2 = df2.dropna()
df2 = df2[['id', 'short_name', 'full_name', 'sequence']]
fluo1 = pd.read_csv('datasets/bulk_fluo_plate_1_single.csv', index_col='time')
fluo2 = pd.read_csv('datasets/bulk_fluo_plate_2_single.csv', index_col='time')
fluo = pd.concat([fluo1, fluo2], axis=1)
data = fluo.T.reset_index().drop(1220, axis=1)
data.rename(columns={'index': 'short_name'}, inplace=True)
#data.columns = ['short_name', 'fluo_20h']
df_final = pd.merge(df2, data, on='short_name', how='left').dropna()
df_final

In [None]:
df_final.isnull().sum()

In [None]:
df_final.to_csv('datasets/sequence_data.csv')