In [1]:
import pandas as pd
pd.set_option('display.max_columns', None)
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from tqdm import tqdm

In [2]:
def parse_minutes(x):
    
    spl = x.split('.')[0].split(' ')
    hours = int(spl[0]) * 60
    mins = int(spl[2]) if spl[2] != '' else 0
    return hours + mins

def transpose_data(df, col):
    
    df.set_index(col, inplace=True)
    df = df.transpose().reset_index()
    df['time'] = df['index'].apply(parse_minutes)
    df = df.set_index('time').drop('index', axis=1)
    return df

def generate_data(df, name, h=24, m=0, num_data=2, start_idx=3, col='short_name'):
    
    datas = []
    idxs = [start_idx]
    for i in range(1, num_data+1):
        idxs.append(h * 3 + idxs[i-1] + 1 + int(m/20))
        data = (df.iloc[:, idxs[i-1]:idxs[i]]).astype(float)
        data = pd.concat([name, data], axis=1)
        data = transpose_data(data, col)
        datas.append(data)
    return datas

### AND Gates Screening

In [3]:
def read_map(plate_map):
    
    plate_map = plate_map.melt(id_vars=['group'])
    plate_map['variable'] = plate_map['variable'].astype(int)
    plate_map['Well'] = plate_map['group'] + plate_map['variable'].apply(lambda x: '{:02d}'.format(x))
    plate_map.loc[~plate_map['group'].isin(['H']), 'inducer'] = ((plate_map['variable'] - 1) / 6).astype(int)
    plate_map.loc[plate_map['group'].isin(['H']), 'inducer'] = 0
    return plate_map[['Well', 'value', 'inducer']]

def generate_metadata(well, plate_map):
    
    dictionary = pd.read_csv('datasets/dictionary.csv')
    sample_map = dict(zip(dictionary['id'], dictionary['short_name']))
    control_map = {
        'BK': 'blank-kan',
        'BA': 'blank-amp',
        '3K3-N': 'negative-control-kan',
        '4A3-N': 'negative-control-amp',
        '4A3-P': 'positive-control-amp'
    }
    sample_map.update(control_map)
    df = pd.merge(well, plate_map, on='Well', how='left').dropna(subset=['value']).reset_index(drop=True)
    df['short_name'] = df['value'].map(sample_map) + '_' + df['inducer'].astype(int).astype(str)
    return df.dropna()

In [4]:
folder = '021-all-ands-second-half-second-part'
plate_map = read_map(pd.read_csv('datasets/experiment/{}/plate_map.csv'.format(folder)))
raw_data = pd.read_csv('datasets/experiment/{}/raw.csv'.format(folder), skiprows=[0])
metadata = generate_metadata(raw_data['Well'], plate_map)
data = raw_data[raw_data['Well'].isin(metadata['Well'])].reset_index(drop=True)
datas = generate_data(data, metadata['short_name'], h=14, m=20, num_data=4)

In [5]:
datas[0].to_csv('datasets/experiment/{}/ods.csv'.format(folder))
datas[1].to_csv('datasets/experiment/{}/fluos.csv'.format(folder))
datas[2].to_csv('datasets/experiment/{}/fluos-lower-1.csv'.format(folder))
datas[3].to_csv('datasets/experiment/{}/fluos-lower-2.csv'.format(folder))

### Self Experiment

#### Promoters characterization

In [4]:
def read_map(plate_map):
    
    plate_map = plate_map.melt(id_vars=['group'])
    plate_map['variable'] = plate_map['variable'].astype(int)
    plate_map['Well'] = plate_map['group'] + plate_map['variable'].apply(lambda x: '{:02d}'.format(x))
    #plate_map.loc[~plate_map['group'].isin(['H']), 'inducer'] = plate_map['variable'] - 1
    plate_map.loc[plate_map['group'].isin(['A', 'C', 'E']), 'inducer'] = (plate_map['variable'] - 1) + 12
    plate_map.loc[plate_map['group'].isin(['B', 'D', 'F']), 'inducer'] = plate_map['variable'] - 1
    plate_map.loc[plate_map['group'].isin(['H']), 'inducer'] = 0
    return plate_map[['Well', 'value', 'inducer']]

def read_map_13(plate_map):
    
    plate_map = plate_map.melt(id_vars=['group'])
    plate_map['variable'] = plate_map['variable'].astype(int)
    plate_map['Well'] = plate_map['group'] + plate_map['variable'].apply(lambda x: '{:02d}'.format(x))
    plate_map.loc[~plate_map['group'].isin(['G', 'H']), 'inducer'] = (plate_map['variable'] - 1) % 6
    plate_map.loc[plate_map['group'].isin(['G', 'H']), 'inducer'] = (plate_map['variable'] - 1) % 2
    return plate_map[['Well', 'value', 'inducer']]

def generate_metadata(well, plate_map):
    
    sample_map = {
        'P62': 'pBAD/Ara',
        'P64': 'pCin/OHC14',
        'P66': 'pCymRC/Cuma',
        'P68': 'pLuxB/AHL',
        'P69': 'pPhlF/DAPG',
        'P70': 'pSalTTC/Sal',
        'P72': 'pRhaB/Rha',
        'G720': 'e11x32STPhoRadA',
        'G721': 'e15x32NpuSspS2',
        'G722': 'e16x33NrdA2',
        'G723': 'e20x32gp411',
        'G724': 'e32x30SspGyrB',
        'G725': 'e34x30MjaKlbA',
        'G726': 'e38x32gp418',
        'G727': 'e41x32NrdJ1',
        'G728': 'e42x32STIMPDH1',
        'BK': 'Blank_Kan',
        'BA': 'Blank_Amp',
        '3K3-N': 'negative-control-kan',
        '4A3-N': 'negative-control-amp',
        '4A3-P': 'positive-control-amp'
    }
    df = pd.merge(well, plate_map, on='Well', how='left').dropna(subset=['value']).reset_index(drop=True)
    df['short_name'] = df['value'].map(sample_map) + '_' + df['inducer'].astype(int).astype(str)
    return df.dropna()

In [6]:
folder = '013-2-ecfs-0-ind-promoters'
plate_map = read_map_13(pd.read_csv('datasets/experiment/{}/plate_map.csv'.format(folder)))
raw_data = pd.read_csv('datasets/experiment/{}/raw.csv'.format(folder), skiprows=[0])
metadata = generate_metadata(raw_data['Well'], plate_map)
data = raw_data[raw_data['Well'].isin(metadata['Well'])].reset_index(drop=True)
datas = generate_data(data, metadata['short_name'], h=20, m=20, num_data=4)

In [8]:
datas[1]

short_name,e20x32gp411_0,e20x32gp411_1,e20x32gp411_2,e20x32gp411_3,e20x32gp411_4,e20x32gp411_5,e34x30MjaKlbA_0,e34x30MjaKlbA_1,e34x30MjaKlbA_2,e34x30MjaKlbA_3,e34x30MjaKlbA_4,e34x30MjaKlbA_5,e20x32gp411_0,e20x32gp411_1,e20x32gp411_2,e20x32gp411_3,e20x32gp411_4,e20x32gp411_5,e34x30MjaKlbA_0,e34x30MjaKlbA_1,e34x30MjaKlbA_2,e34x30MjaKlbA_3,e34x30MjaKlbA_4,e34x30MjaKlbA_5,e20x32gp411_0,e20x32gp411_1,e20x32gp411_2,e20x32gp411_3,e20x32gp411_4,e20x32gp411_5,e34x30MjaKlbA_0,e34x30MjaKlbA_1,e34x30MjaKlbA_2,e34x30MjaKlbA_3,e34x30MjaKlbA_4,e34x30MjaKlbA_5,e20x32gp411_0,e20x32gp411_1,e20x32gp411_2,e20x32gp411_3,e20x32gp411_4,e20x32gp411_5,e34x30MjaKlbA_0,e34x30MjaKlbA_1,e34x30MjaKlbA_2,e34x30MjaKlbA_3,e34x30MjaKlbA_4,e34x30MjaKlbA_5,e20x32gp411_0,e20x32gp411_1,e20x32gp411_2,e20x32gp411_3,e20x32gp411_4,e20x32gp411_5,e34x30MjaKlbA_0,e34x30MjaKlbA_1,e34x30MjaKlbA_2,e34x30MjaKlbA_3,e34x30MjaKlbA_4,e34x30MjaKlbA_5,e20x32gp411_0,e20x32gp411_1,e20x32gp411_2,e20x32gp411_3,e20x32gp411_4,e20x32gp411_5,e34x30MjaKlbA_0,e34x30MjaKlbA_1,e34x30MjaKlbA_2,e34x30MjaKlbA_3,e34x30MjaKlbA_4,e34x30MjaKlbA_5,pRhaB/Rha_0,pRhaB/Rha_1,pSalTTC/Sal_0,pSalTTC/Sal_1,pPhlF/DAPG_0,pPhlF/DAPG_1,pLuxB/AHL_0,pLuxB/AHL_1,pCymRC/Cuma_0,pCymRC/Cuma_1,pCin/OHC14_0,pCin/OHC14_1,pBAD/Ara_0,pBAD/Ara_1,Blank_Kan_0,Blank_Kan_1,negative-control-kan_0,negative-control-kan_1,Blank_Amp_0,Blank_Amp_1,negative-control-amp_0,negative-control-amp_1,positive-control-amp_0,positive-control-amp_1
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1,Unnamed: 82_level_1,Unnamed: 83_level_1,Unnamed: 84_level_1,Unnamed: 85_level_1,Unnamed: 86_level_1,Unnamed: 87_level_1,Unnamed: 88_level_1,Unnamed: 89_level_1,Unnamed: 90_level_1,Unnamed: 91_level_1,Unnamed: 92_level_1,Unnamed: 93_level_1,Unnamed: 94_level_1,Unnamed: 95_level_1,Unnamed: 96_level_1
0,-75.0,-49.0,-69.0,-76.0,-95.0,-81.0,-100.0,-99.0,-104.0,-103.0,-100.0,-96.0,-15.0,-33.0,-50.0,-17.0,-70.0,-69.0,-70.0,-74.0,-63.0,-99.0,-68.0,-36.0,-51.0,-50.0,-63.0,-56.0,-87.0,-57.0,-78.0,-83.0,-89.0,-91.0,-109.0,-89.0,-31.0,-49.0,-48.0,-72.0,-66.0,-51.0,-77.0,-94.0,-94.0,-68.0,-98.0,-77.0,-24.0,-52.0,-61.0,-68.0,-67.0,-63.0,-72.0,-63.0,-87.0,-83.0,-85.0,-83.0,-44.0,-42.0,-61.0,-77.0,-90.0,-80.0,-90.0,-80.0,-92.0,-85.0,-98.0,-97.0,-54.0,-81.0,-35.0,-39.0,4975.0,4600.0,-38.0,-24.0,-21.0,2.0,-46.0,-20.0,-6.0,23.0,,,-52.0,-49.0,,,33.0,57.0,1224.0,1143.0
20,-78.5,-67.5,-84.5,-70.5,-98.5,-80.5,-105.5,-109.5,-99.5,-100.5,-97.5,-104.5,-48.5,-55.5,-56.5,-39.5,-70.5,-43.5,-64.5,-56.5,-57.5,-82.5,-80.5,-64.5,-71.5,-65.5,-70.5,-65.5,-69.5,-74.5,-84.5,-86.5,-69.5,-88.5,-86.5,-84.5,-49.5,-67.5,-70.5,-66.5,-92.5,-64.5,-56.5,-69.5,-95.5,-69.5,-92.5,-61.5,-70.5,-38.5,-56.5,-58.5,-75.5,-62.5,-79.5,-79.5,-82.5,-88.5,-76.5,-81.5,-43.5,-38.5,-68.5,-69.5,-69.5,-62.5,-105.5,-103.5,-106.5,-98.5,-87.5,-84.5,-58.5,-88.5,-39.5,-30.5,5883.5,2594.5,-56.5,-50.5,-34.5,-38.5,-84.5,-32.5,-2.5,8.5,,,-40.5,-33.5,,,34.5,37.5,1311.5,1150.5
40,-80.5,-78.5,-89.5,-93.5,-90.5,-93.5,-80.5,-125.5,-115.5,-101.5,-107.5,-88.5,-39.5,-41.5,-36.5,-54.5,-78.5,-45.5,-62.5,-57.5,-70.5,-74.5,-67.5,-39.5,-72.5,-42.5,-68.5,-77.5,-90.5,-75.5,-76.5,-70.5,-80.5,-70.5,-89.5,-72.5,-60.5,-58.5,-65.5,-53.5,-101.5,-42.5,-65.5,-67.5,-96.5,-74.5,-84.5,-75.5,-74.5,-63.5,-74.5,-69.5,-70.5,-78.5,-79.5,-77.5,-90.5,-98.5,-64.5,-72.5,-54.5,-63.5,-62.5,-81.5,-68.5,-78.5,-90.5,-77.5,-95.5,-106.5,-108.5,-75.5,-43.5,-73.5,-71.5,-39.5,6125.5,2810.5,-49.5,-39.5,-38.5,-49.5,-54.5,4.5,-13.5,18.5,,,-20.5,-28.5,,,40.5,37.5,1730.5,1504.5
60,-64.5,-49.5,-62.5,-66.5,-78.5,-86.5,-81.5,-84.5,-103.5,-85.5,-104.5,-87.5,-17.5,-45.5,-36.5,-37.5,-45.5,-66.5,-44.5,-45.5,-41.5,-37.5,-31.5,-15.5,-51.5,-49.5,-53.5,-74.5,-53.5,-56.5,-75.5,-64.5,-45.5,-65.5,-79.5,-50.5,-44.5,-38.5,-62.5,-61.5,-73.5,-41.5,-71.5,-53.5,-70.5,-46.5,-66.5,-35.5,-38.5,-27.5,-52.5,-63.5,-78.5,-52.5,-46.5,-48.5,-62.5,-67.5,-49.5,-48.5,-37.5,-36.5,-63.5,-72.5,-64.5,-74.5,-75.5,-72.5,-62.5,-72.5,-72.5,-65.5,-45.5,-60.5,-39.5,-18.5,6732.5,3156.5,-57.5,-22.5,-36.5,-24.5,-36.5,-16.5,13.5,14.5,,,-26.5,-9.5,,,47.5,54.5,2296.5,1966.5
80,-45.5,-61.5,-61.5,-79.5,-72.5,-71.5,-76.5,-93.5,-85.5,-68.5,-69.5,-68.5,-14.5,-48.5,-50.5,-44.5,-56.5,-49.5,-47.5,-29.5,-35.5,-40.5,-52.5,-21.5,-66.5,-55.5,-50.5,-45.5,-55.5,-41.5,-59.5,-76.5,-63.5,-43.5,-56.5,-51.5,-48.5,-37.5,-70.5,-53.5,-88.5,-47.5,-61.5,-51.5,-81.5,-37.5,-78.5,-27.5,-64.5,-35.5,-51.5,-51.5,-66.5,-42.5,-56.5,-46.5,-56.5,-62.5,-72.5,-57.5,-36.5,-34.5,-48.5,-67.5,-62.5,-60.5,-71.5,-76.5,-62.5,-68.5,-87.5,-54.5,-19.5,-40.5,-42.5,-17.5,7630.5,3692.5,-60.5,-22.5,-34.5,0.5,-51.5,16.5,10.5,35.5,,,-18.5,-6.5,,,63.0,68.0,2857.0,2465.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1140,3861.0,3524.0,3451.0,3424.0,3720.0,3821.0,3880.0,3886.0,3915.0,3998.0,4053.0,4367.0,3955.0,3655.0,3615.0,3629.0,3694.0,4039.0,3936.0,4016.0,3927.0,3928.0,4061.0,4456.0,3922.0,3707.0,3606.0,3666.0,5140.0,4878.0,3917.0,3886.0,3904.0,3883.0,4074.0,4317.0,3976.0,3731.0,3724.0,3708.0,20977.0,4030.0,4001.0,4032.0,3962.0,4023.0,6401.0,4320.0,3972.0,3733.0,3698.0,3712.0,26756.0,47309.0,4036.0,3876.0,4039.0,4019.0,8331.0,9644.0,3947.0,3708.0,3640.0,3657.0,3708.0,44285.0,4019.0,3820.0,4079.0,4129.0,5967.0,5775.0,4701.0,4352.0,3678.0,3729.0,133826.0,105070.0,4473.0,4633.0,3241.0,3219.0,3150.0,3470.0,5253.0,4961.0,,,3336.0,3285.0,,,3726.5,3601.5,68655.5,77596.5
1160,3917.0,3551.0,3494.0,3412.0,3733.0,3849.0,3899.0,3892.0,3960.0,3999.0,4103.0,4448.0,3993.0,3719.0,3665.0,3690.0,3725.0,4011.0,3964.0,4030.0,3989.0,3972.0,4073.0,4426.0,3978.0,3711.0,3615.0,3676.0,5186.0,4891.0,3952.0,3924.0,3970.0,3885.0,4118.0,4353.0,4025.0,3781.0,3791.0,3748.0,20949.0,4082.0,3990.0,4054.0,4011.0,4053.0,6442.0,4333.0,3997.0,3768.0,3738.0,3744.0,26773.0,47526.0,4090.0,3959.0,4035.0,4091.0,8272.0,9669.0,4007.0,3767.0,3715.0,3703.0,3717.0,44421.0,4085.0,3882.0,4082.0,4168.0,6016.0,5814.0,4783.0,4343.0,3704.0,3825.0,135858.0,106073.0,4536.0,4668.0,3243.0,3244.0,3193.0,3541.0,5223.0,4992.0,,,3335.0,3292.0,,,3739.5,3619.5,68533.5,78110.5
1180,3931.5,3594.5,3508.5,3497.5,3747.5,3863.5,3887.5,3961.5,3980.5,4007.5,4129.5,4419.5,4042.5,3674.5,3676.5,3694.5,3741.5,4034.5,3972.5,4064.5,4020.5,3984.5,4058.5,4487.5,4053.5,3756.5,3684.5,3706.5,5238.5,4899.5,3922.5,4012.5,3947.5,3900.5,4078.5,4348.5,4030.5,3838.5,3758.5,3801.5,21012.5,4050.5,4028.5,4108.5,4037.5,4037.5,6476.5,4376.5,3992.5,3801.5,3775.5,3801.5,26819.5,47544.5,4095.5,3940.5,4066.5,4121.5,8378.5,9567.5,4001.5,3772.5,3763.5,3746.5,3716.5,44347.5,4075.5,3918.5,4054.5,4204.5,6040.5,5859.5,4784.5,4383.5,3742.5,3860.5,135494.5,107388.5,4532.5,4696.5,3280.5,3257.5,3175.5,3547.5,5317.5,4994.5,,,3350.5,3319.5,,,3779.0,3655.0,68464.0,78319.0
1200,3974.0,3609.0,3543.0,3510.0,3792.0,3922.0,3958.0,3971.0,3974.0,4042.0,4129.0,4447.0,4105.0,3747.0,3686.0,3756.0,3769.0,4070.0,4036.0,4030.0,4026.0,4079.0,4095.0,4530.0,4059.0,3777.0,3698.0,3703.0,5265.0,4938.0,3976.0,3998.0,3975.0,3935.0,4129.0,4419.0,4077.0,3874.0,3817.0,3807.0,21050.0,4100.0,4083.0,4120.0,4061.0,4096.0,6514.0,4369.0,4063.0,3841.0,3747.0,3779.0,27044.0,47697.0,4143.0,3952.0,4081.0,4152.0,8397.0,9721.0,4064.0,3826.0,3790.0,3727.0,3775.0,44491.0,4084.0,3960.0,4155.0,4207.0,6065.0,5865.0,4841.0,4413.0,3757.0,3867.0,137393.0,108070.0,4569.0,4679.0,3301.0,3307.0,3221.0,3577.0,5298.0,5059.0,,,3381.0,3321.0,,,3812.5,3708.5,68327.5,78715.5


In [9]:
datas[0].to_csv('datasets/experiment/{}/ods.csv'.format(folder))
datas[1].to_csv('datasets/experiment/{}/fluos.csv'.format(folder))
datas[2].to_csv('datasets/experiment/{}/fluos-lower-1.csv'.format(folder))
datas[3].to_csv('datasets/experiment/{}/fluos-lower-2.csv'.format(folder))

#### ECFs Characterization

In [20]:
def read_map(plate_map, plate_idx):
    
    plate_map = plate_map.melt(id_vars=['group'])
    plate_map['variable'] = plate_map['variable'].astype(int)
    plate_map['Well'] = plate_map['group'] + plate_map['variable'].apply(lambda x: '{:02d}'.format(x))
    plate_map.loc[~plate_map['group'].isin(['G', 'H']), 'cuma'] = plate_map['group'].apply(lambda x: ord(x)) - 65
    plate_map.loc[~plate_map['group'].isin(['G', 'H']), 'ara'] = (plate_map['variable'] - 1) % 6
    plate_map.loc[plate_map['group'].isin(['H']), 'cuma'] = plate_idx
    plate_map.loc[plate_map['group'].isin(['H']), 'ara'] = (plate_map['variable'] - 1) % 3
    return plate_map[['Well', 'value', 'cuma', 'ara']]

def generate_metadata(well, plate_map):
    
    sample_map = {
        'G720': 'e11x32STPhoRadA',
        'G721': 'e15x32NpuSspS2',
        'G722': 'e16x33NrdA2',
        'G723': 'e20x32gp411',
        'G724': 'e32x30SspGyrB',
        'G725': 'e34x30MjaKlbA',
        'G726': 'e38x32gp418',
        'G727': 'e41x32NrdJ1',
        'G728': 'e42x32STIMPDH1',
        'BK': 'Blank_Kan',
        'BA': 'Blank_Amp',
        '3K3-N': 'negative-control-kan',
        '4A3-N': 'negative-control-amp',
        '4A3-P': 'positive-control-amp'
    }
    df = pd.merge(well, plate_map, on='Well', how='left').dropna(subset=['value']).reset_index(drop=True)
    df['short_name'] = df['value'].map(sample_map) + '_' + df['cuma'].astype(int).astype(str) + df['ara'].astype(int).astype(str)
    return df.dropna()

folder = '017-ecfs-20-38-20211104'
plate_map = read_map(pd.read_csv('datasets/experiment/{}/plate_map.csv'.format(folder)), 0)
raw_data = pd.read_csv('datasets/experiment/{}/raw.csv'.format(folder), skiprows=[0])
metadata = generate_metadata(raw_data['Well'], plate_map)
data = raw_data[raw_data['Well'].isin(metadata['Well'])].reset_index(drop=True)
datas = generate_data(data, metadata['short_name'], h=23, m=0, num_data=4)

In [21]:
datas[0]

short_name,e20x32gp411_00,e20x32gp411_01,e20x32gp411_02,e20x32gp411_03,e20x32gp411_04,e20x32gp411_05,e34x30MjaKlbA_00,e34x30MjaKlbA_01,e34x30MjaKlbA_02,e34x30MjaKlbA_03,e34x30MjaKlbA_04,e34x30MjaKlbA_05,e20x32gp411_10,e20x32gp411_11,e20x32gp411_12,e20x32gp411_13,e20x32gp411_14,e20x32gp411_15,e34x30MjaKlbA_10,e34x30MjaKlbA_11,e34x30MjaKlbA_12,e34x30MjaKlbA_13,e34x30MjaKlbA_14,e34x30MjaKlbA_15,e20x32gp411_20,e20x32gp411_21,e20x32gp411_22,e20x32gp411_23,e20x32gp411_24,e20x32gp411_25,e34x30MjaKlbA_20,e34x30MjaKlbA_21,e34x30MjaKlbA_22,e34x30MjaKlbA_23,e34x30MjaKlbA_24,e34x30MjaKlbA_25,e20x32gp411_30,e20x32gp411_31,e20x32gp411_32,e20x32gp411_33,e20x32gp411_34,e20x32gp411_35,e34x30MjaKlbA_30,e34x30MjaKlbA_31,e34x30MjaKlbA_32,e34x30MjaKlbA_33,e34x30MjaKlbA_34,e34x30MjaKlbA_35,e20x32gp411_40,e20x32gp411_41,e20x32gp411_42,e20x32gp411_43,e20x32gp411_44,e20x32gp411_45,e34x30MjaKlbA_40,e34x30MjaKlbA_41,e34x30MjaKlbA_42,e34x30MjaKlbA_43,e34x30MjaKlbA_44,e34x30MjaKlbA_45,e20x32gp411_50,e20x32gp411_51,e20x32gp411_52,e20x32gp411_53,e20x32gp411_54,e20x32gp411_55,e34x30MjaKlbA_50,e34x30MjaKlbA_51,e34x30MjaKlbA_52,e34x30MjaKlbA_53,e34x30MjaKlbA_54,e34x30MjaKlbA_55,Blank_Kan_02,Blank_Kan_00,negative-control-kan_01,negative-control-kan_02,Blank_Amp_00,Blank_Amp_01,negative-control-amp_02,negative-control-amp_00,positive-control-amp_01,positive-control-amp_02
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1,Unnamed: 82_level_1
0,0.056,0.048,0.087,0.027,0.141,0.006,0.009,-0.00055,0.005,0.032,0.020,0.026,0.053,0.008,0.037,0.00055,0.010,-0.002,-0.001,0.00015,-0.006,0.021,0.017,0.004,0.003,0.004,0.003,-0.007,0.126,0.011,0.014,0.007,-0.00045,0.009,0.003,0.015,0.002,0.002,0.037,0.00065,0.002,-0.003,0.010,0.00055,-0.005,0.008,-0.001,0.018,0.016,-0.001,0.036,-0.004,0.021,0.008,0.011,0.028,0.011,0.023,0.055,0.020,0.025,0.028,0.029,0.008,-0.00015,0.003,0.016,0.028,0.029,0.026,0.018,0.047,,,0.017,0.004,,,0.037,0.055,0.049,0.054
20,0.078,0.062,0.077,0.041,0.143,0.025,0.020,0.09200,0.018,0.019,0.020,0.026,0.061,0.020,0.014,0.01500,0.019,0.014,0.013,0.02600,0.014,0.011,0.025,0.051,0.023,0.023,0.019,0.014,0.115,0.015,0.016,0.010,0.01200,0.018,0.017,0.018,0.016,0.014,0.017,0.01600,0.015,0.014,0.014,0.01500,0.012,0.019,0.014,0.024,0.015,0.067,0.014,0.017,0.028,0.025,0.014,0.013,0.013,0.009,0.024,0.020,0.026,0.018,0.015,0.015,0.02400,0.040,0.012,0.015,0.015,0.013,0.019,0.040,,,0.026,0.018,,,0.075,0.029,0.028,0.030
40,0.081,0.067,0.076,0.043,0.145,0.027,0.022,0.09100,0.023,0.021,0.023,0.029,0.068,0.022,0.016,0.01600,0.021,0.016,0.017,0.02200,0.016,0.012,0.027,0.019,0.026,0.026,0.018,0.015,0.120,0.016,0.020,0.012,0.01400,0.020,0.020,0.019,0.019,0.016,0.017,0.01800,0.016,0.014,0.017,0.01700,0.014,0.021,0.015,0.026,0.020,0.019,0.016,0.015,0.033,0.027,0.015,0.016,0.015,0.012,0.029,0.024,0.028,0.020,0.018,0.017,0.02000,0.019,0.015,0.018,0.019,0.016,0.023,0.039,,,0.029,0.020,,,0.028,0.029,0.033,0.034
60,0.088,0.072,0.081,0.047,0.161,0.032,0.030,0.05900,0.027,0.029,0.030,0.036,0.074,0.026,0.020,0.02100,0.025,0.030,0.020,0.02700,0.021,0.018,0.031,0.024,0.030,0.041,0.024,0.020,0.122,0.022,0.024,0.019,0.01800,0.028,0.024,0.025,0.023,0.024,0.023,0.02100,0.020,0.018,0.022,0.02200,0.020,0.026,0.020,0.032,0.030,0.025,0.021,0.021,0.038,0.029,0.022,0.021,0.035,0.017,0.036,0.028,0.034,0.025,0.023,0.022,0.02500,0.032,0.022,0.025,0.024,0.021,0.027,0.043,,,0.038,0.028,,,0.041,0.038,0.042,0.045
80,0.100,0.084,0.090,0.058,0.161,0.043,0.200,0.04700,0.042,0.046,0.046,0.052,0.087,0.039,0.030,0.03200,0.035,0.030,0.033,0.04100,0.034,0.031,0.045,0.036,0.043,0.068,0.033,0.031,0.130,0.031,0.036,0.030,0.03200,0.038,0.038,0.036,0.035,0.088,0.033,0.03000,0.030,0.066,0.057,0.03500,0.031,0.038,0.032,0.044,0.039,0.059,0.033,0.033,0.046,0.118,0.084,0.035,0.033,0.031,0.050,0.041,0.045,0.036,0.033,0.032,0.03600,0.101,0.102,0.038,0.039,0.035,0.042,0.056,,,0.056,0.040,,,0.063,0.056,0.055,0.067
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
60,92.500,79.500,82.500,85.500,97.500,55.500,76.500,69.50000,65.500,69.500,76.500,98.500,112.500,104.500,79.500,88.50000,87.500,70.500,107.500,87.50000,105.500,100.500,83.500,97.500,108.500,121.500,98.500,114.500,88.500,106.500,129.500,123.500,94.50000,98.500,118.500,85.500,43.500,57.500,53.500,50.50000,73.500,43.500,120.500,115.50000,130.500,80.500,98.500,94.500,94.500,70.500,89.500,80.500,95.500,88.500,106.500,144.500,118.500,127.500,118.500,123.500,72.500,86.500,87.500,74.500,89.50000,70.500,120.500,125.500,122.500,126.500,146.500,110.500,,,96.500,49.500,,,82.000,68.000,2015.000,2222.000
80,91.500,68.500,61.500,71.500,85.500,52.500,91.500,77.50000,64.500,57.500,76.500,92.500,92.500,88.500,83.500,59.50000,80.500,82.500,82.500,101.50000,86.500,108.500,43.500,103.500,110.500,108.500,67.500,85.500,106.500,94.500,116.500,126.500,95.50000,95.500,112.500,98.500,29.500,51.500,65.500,50.50000,64.500,36.500,96.500,85.50000,104.500,88.500,116.500,85.500,75.500,70.500,87.500,75.500,69.500,79.500,123.500,114.500,113.500,95.500,109.500,108.500,55.500,60.500,91.500,87.500,90.50000,59.500,105.500,113.500,113.500,108.500,140.500,91.500,,,94.500,43.500,,,112.000,86.000,2488.000,2844.000
100,106.000,93.000,68.000,64.000,105.000,71.000,95.000,102.00000,75.000,96.000,104.000,120.000,108.000,105.000,89.000,65.00000,101.000,74.000,104.000,101.00000,106.000,120.000,86.000,119.000,103.000,126.000,87.000,107.000,96.000,103.000,126.000,148.000,106.00000,115.000,150.000,94.000,25.000,63.000,67.000,44.00000,72.000,42.000,111.000,104.00000,129.000,90.000,137.000,98.000,102.000,89.000,97.000,91.000,73.000,90.000,109.000,133.000,121.000,119.000,134.000,132.000,54.000,78.000,65.000,97.000,86.00000,64.000,121.000,138.000,124.000,123.000,140.000,124.000,,,101.000,53.000,,,129.000,107.000,3042.000,3448.000
120,112.500,98.500,93.500,72.500,112.500,63.500,119.500,118.50000,111.500,117.500,116.500,123.500,135.500,133.500,97.500,81.50000,92.500,88.500,119.500,128.50000,125.500,138.500,100.500,117.500,106.500,133.500,103.500,86.500,95.500,118.500,148.500,167.500,137.50000,124.500,133.500,127.500,50.500,76.500,87.500,62.50000,87.500,43.500,118.500,116.50000,133.500,110.500,137.500,103.500,89.500,121.500,102.500,115.500,87.500,83.500,141.500,149.500,129.500,125.500,147.500,141.500,93.500,94.500,89.500,119.500,98.50000,79.500,140.500,141.500,143.500,158.500,161.500,143.500,,,120.500,80.500,,,170.500,139.500,3650.500,4064.500


In [22]:
datas[0].to_csv('datasets/experiment/{}/ods.csv'.format(folder))
datas[1].to_csv('datasets/experiment/{}/fluos.csv'.format(folder))
datas[2].to_csv('datasets/experiment/{}/fluos-lower-1.csv'.format(folder))
datas[3].to_csv('datasets/experiment/{}/fluos-lower-2.csv'.format(folder))

#### Initial experiments

In [None]:
def read_map(plate_map, plate_idx):
    
    plate_map = plate_map.melt(id_vars=['group'])
    plate_map['variable'] = plate_map['variable'].astype(int)
    plate_map['Well'] = plate_map['group'] + plate_map['variable'].apply(lambda x: '{:02d}'.format(x))
    #plate_map.loc[~plate_map['group'].isin(['G', 'H']), 'cuma'] = plate_map['group'].apply(lambda x: ord(x)) - 65
    plate_map.loc[~plate_map['group'].isin(['H']), 'inducer'] = (plate_map['variable'] - 1) % 6
    plate_map.loc[plate_map['group'].isin(['H']), 'inducer'] = plate_idx
    #plate_map.loc[plate_map['group'].isin(['G', 'H']), 'ara'] = (plate_map['variable'] - 1) % 3
    #return plate_map[['Well', 'value', 'cuma', 'ara']]
    return plate_map[['Well', 'value', 'inducer']]

def generate_metadata(well, plate_map):
    
    sample_map = {
        'P62': 'pBAD/Ara',
        'P64': 'pCin/OHC14',
        'P66': 'pCymRC/Cuma',
        'P68': 'pLuxB/AHL',
        'P69': 'pPhlF/DAPG',
        'P70': 'pSalTTC/Sal',
        'P72': 'pRhaB/Rha',
        'E720': 'e11x33',
        'E721': 'e15x33',
        'E722': 'e16x33',
        'E723': 'e20x33',
        'E724': 'e32x33',
        'E725': 'e34x33',
        'E726': 'e38x33',
        'E727': 'e41x33',
        'E728': 'e42x33',
        'G726': 'e11x32STPhoRadA',
        'G721': 'e15x32NpuSspS2',
        'G722': 'e16x33NrdA2',
        'G723': 'e20x32gp411',
        'G724': 'e32x30SspGyrB',
        'G725': 'e34x30MjaKlbA',
        'G726': 'e38x32gp418',
        'G727': 'e41x32NrdJ1',
        'G728': 'e42x32STIMPDH1',
        'BK': 'Blank_Kan',
        'BA': 'Blank_Amp',
        '3K3-N': 'negative-control-kan',
        '4A3-N': 'negative-control-amp',
        '4A3-P': 'positive-control-amp'
    }
    df = pd.merge(well, plate_map, on='Well', how='left').dropna(subset=['value']).reset_index(drop=True)
    df['short_name'] = df['value'].map(sample_map) + '_' + df['inducer'].astype(int).astype(str)
    return df.dropna()

In [None]:
plate_map = read_map(pd.read_csv('datasets/experiment/plate-3/plate_map.csv'), 1)
raw_data = pd.read_csv('datasets/experiment/plate-3/raw.csv', skiprows=[0])
metadata = generate_metadata(raw_data['Well'], plate_map)
data = raw_data[raw_data['Well'].isin(metadata['Well'])].reset_index(drop=True)
datas = generate_data(data, metadata['short_name'], h=24, m=0, num_data=4)

In [None]:
temp = '''
ods1, fluos1, fluos2, fluos3 = [], [], [], []
for i in range(0, 140, 20):
    if i==20:
        continue
    ods1.append(pd.read_csv('datasets/experiment/plate-1/od-{}.csv'.format(i)).iloc[:,3])   
    fluos1.append(pd.read_csv('datasets/experiment/plate-1/fluo-{}.csv'.format(i)).iloc[:,3])
    fluos2.append(pd.read_csv('datasets/experiment/plate-1/fluo-{}.csv'.format(i)).iloc[:,4])
    fluos3.append(pd.read_csv('datasets/experiment/plate-1/fluo-{}.csv'.format(i)).iloc[:,5])
pd.concat(fluos3, axis=1).T.to_csv('datasets/experiment/plate-1/plate1-fluos3-add.csv', index=False)
'''

In [None]:
datas[0].to_csv('datasets/experiment/plate-3/plate3-ods.csv')
datas[1].to_csv('datasets/experiment/plate-3/plate3-fluos.csv')
datas[2].to_csv('datasets/experiment/plate-3/plate3-fluos-gain-lower-1.csv')
datas[3].to_csv('datasets/experiment/plate-3/plate3-fluos-gain-lower-2.csv')

### AND Induction Matrix

In [None]:
gates = ['e11x32STPhoRadA', 'e15x32NpuSspS2', 'e16x33NrdA2', 'e20x32gp411', 'e32x30SspGyrB', 'e34x30MjaKlbA',
         'e38x32gp418', 'e41x32NrdJ1', 'e42x32STIMPDH1', 'invalid']

In [None]:
def read_map(plate_map, plate_idx):
    
    plate_map = plate_map.melt(id_vars=['group'])
    plate_map['variable'] = plate_map['variable'].astype(int)
    plate_map['well'] = plate_map['group'] + plate_map['variable'].apply(lambda x: '{:02d}'.format(x))
    plate_map.loc[~plate_map['group'].isin(['G', 'H']), 'cuma'] = plate_map['group'].apply(lambda x: ord(x)) - 65
    plate_map.loc[~plate_map['group'].isin(['G', 'H']), 'ara'] = (plate_map['variable'] - 1) % 6
    plate_map.loc[plate_map['group'].isin(['G', 'H']), 'cuma'] = plate_idx
    plate_map.loc[plate_map['group'].isin(['G', 'H']), 'ara'] = (plate_map['variable'] - 1) % 3
    return plate_map[['well', 'value', 'cuma', 'ara']]

def generate_metadata(well, plate_map, gates):
    
    sample_map = {
        'E1': gates[0],
        'E2': gates[1],
        'P_3K3': 'positive_control_3K3',
        'P_4A3': 'positive_control_4AE',
        'N_3K3': 'negative_control_3K3',
        'N_4A3': 'negative_control_4AE',
        'B_K': 'blank_K',
        'B_A': 'blank_A'
    }
    df = pd.merge(well, plate_map, on='well', how='left').dropna(subset=['value']).reset_index(drop=True)
    df['short_name'] = df['value'].map(sample_map) + '_' + df['cuma'].astype(int).astype(str) + df['ara'].astype(int).astype(str)
    return df.dropna()

In [None]:
fluos, ods = [], []
for i in tqdm(range(1, 6)):
    plate_map = read_map(pd.read_csv('datasets/induction_matrix/plate_map.csv'), i)
    raw_data = pd.read_csv('datasets/induction_matrix/plate{}-fluo-od.csv'.format(i), skiprows=[0])
    metadata = generate_metadata(raw_data['well'], plate_map, gates[(i*2)-2:(i*2)])
    data = raw_data[raw_data['well'].isin(metadata['well'])].reset_index(drop=True)
    datas = generate_data(data, metadata['short_name'], h=24, num_data=1)
    fluos.append(datas[0])
    #ods.append(datas[1])
pd.concat(fluos, axis=1).to_csv('datasets/induction_matrix/avg_fluos.csv')
#pd.concat(ods, axis=1).to_csv('datasets/induction_matrix/avg_ods.csv')

### XOR Induction Matrix

In [None]:
gates = ['e20-33', 'e11-15']

In [None]:
def read_map(plate_map, plate_idx):
    
    plate_map = pd.read_csv('datasets/induction_matrix_xor/plate_map.csv')
    plate_map = plate_map.melt(id_vars=['group'])
    plate_map['variable'] = plate_map['variable'].astype(int)
    plate_map['well'] = plate_map['group'] + plate_map['variable'].apply(lambda x: '{:02d}'.format(x))
    plate_map.loc[~plate_map['group'].isin(['G', 'H']), 'ara'] = plate_map['group'].apply(lambda x: ord(x)) - 65
    plate_map.loc[~plate_map['group'].isin(['G', 'H']), 'rha'] = (plate_map['variable'] - 1) % 6
    plate_map.loc[plate_map['group'].isin(['G', 'H']), 'ara'] = plate_idx
    plate_map.loc[plate_map['group'].isin(['G', 'H']), 'rha'] = plate_idx
    return plate_map[['well', 'value', 'ara', 'rha']]

def generate_metadata(well, plate_map, gates):
    
    sample_map = {
        'E1': gates[0],
        'E2': gates[1]
    }
    df = pd.merge(well, plate_map, on='well', how='left').dropna(subset=['value']).reset_index(drop=True)
    df['short_name'] = df['value'].map(sample_map) + '_' + df['cuma'].astype(int).astype(str) + df['ara'].astype(int).astype(str)
    return df.dropna()

In [None]:
fluos, ods = [], []
for i in tqdm(range(1, 6)):
    plate_map = read_map(pd.read_csv('datasets/induction_matrix_xor/plate_map.csv'), i)
    raw_data = pd.read_csv('datasets/induction_matrix/plate{}-fluo-od.csv'.format(i), skiprows=[0])
    metadata = generate_metadata(raw_data['well'], plate_map, gates[(i*2)-2:(i*2)])
    data = raw_data[raw_data['well'].isin(metadata['well'])].reset_index(drop=True)
    datas = generate_data(data, metadata['short_name'], h=24, num_data=1)
    fluos.append(datas[0])
    #ods.append(datas[1])
pd.concat(fluos, axis=1).to_csv('datasets/induction_matrix/avg_fluos.csv')

In [None]:
plate_map

### Exercise

In [None]:
def read_map(plate_map):
    
    plate_map = plate_map.melt(id_vars=['group'])
    plate_map['variable'] = plate_map['variable'].astype(int)
    plate_map['well'] = plate_map['group'] + plate_map['variable'].apply(lambda x: "{:02d}".format(x))
    plate_map.loc[~plate_map['group'].isin(['C', 'D', 'G', 'H']), 'inducer'] = 0
    plate_map.loc[plate_map['group'].isin(['C', 'D', 'G', 'H']), 'inducer'] = 1
    return plate_map[['well', 'value', 'inducer']]

plate_map = read_map(pd.read_csv('datasets/exercise/plate_map.csv'))

In [None]:
def generate_metadata(well, plate_map):
    df = pd.merge(well, plate_map, on='well', how='left').dropna(subset=['value']).reset_index(drop=True)
    return df.dropna()

raw_data = pd.read_csv('datasets/exercise/first-exercise.csv', skiprows=[0])
metadata = generate_metadata(raw_data['well'], plate_map)
data = raw_data[raw_data['well'].isin(metadata['well'])].reset_index(drop=True)

In [None]:
fluo, od, fluo_half = generate_data(data, metadata['value'], 24, col='value')

In [None]:
i = 3 * 12
promoters = fluo.iloc[:,i:i+10]
#promoters
promoters.plot()

In [None]:
i = 2 * 12
promoters = od.iloc[:,i:i+9]
#promoters
for i in range(promoters.shape[1]):
    plt.plot(od.index/60, promoters.iloc[:, i], label=promoters.columns[i])
plt.ylabel('OD')
plt.xlabel('Hour')
plt.legend()
sns.despine()

In [None]:
a = []
ECF = od[['E720', 'E721', 'E722', 'E723', 'E724', 'E725', 'E726']]
for i in range(0, 32, 4):
    a.append(ECF.iloc[:,i+2:i+3])
ECFs = pd.concat(a, axis=1)
ECFs

In [None]:
for i in range(ECFs.shape[1]):
    plt.plot(ECFs.index/60, ECFs.iloc[:, i], label=ECFs.columns[i])
plt.ylabel('Fluo (a.u)')
plt.xlabel('Hour')
plt.legend()
sns.despine()

In [None]:
fluos = pd.read_csv('datasets/induction_matrix/induction_fluo.csv', index_col='time')
ods = pd.read_csv('datasets/induction_matrix/induction_od.csv', index_col='time')
gates = ['e11x32STPhoRadA', 'e15x32NpuSspS2', 'e16x33NrdA2', 'e20x32gp411', 'e32x30SspGyrB',
         'e34x30MjaKlbA', 'e38x32gp418', 'e41x32NrdJ1', 'e42x32STIMPDH1']
g = []
for gate in gates[:-2]:
    fluo = fluos['{}_55'.format(gate)]
    g.append(fluo)
    plt.plot(fluo.index/60, fluo, label=gate)
plt.legend()
sns.despine()

In [None]:
gdf = pd.concat(g, axis=1)
gdf

In [None]:
f, axs = plt.subplots(2, 5, sharex=False, sharey=False, figsize=(16, 5))
axr = axs.ravel()
for i, ax in enumerate(axr):
    if i < len(gates[:-2]):
        ax.plot(gdf.index/60, gdf.iloc[:,i], label='gate')
        ax.plot(ECFs.index/60, ECFs.iloc[:,i], label='intact')
        ax.set_title(gates[i])
        ax.legend(loc=4)
        sns.despine()
    else:
        ax.set_visible(False)
plt.tight_layout()

In [None]:
constructs = pd.read_csv('datasets/dictionary.csv')
constructs

In [None]:
constructs[constructs['short_name'].isin(gates)]

In [None]:
def parse_minutes(x):
    
    spl = x.split(' ')
    hours = int(spl[0]) * 60
    mins = int(spl[2]) if spl[2] != '' else 0
    return hours + mins

def transpose_data(df):
    
    df.set_index('short_name', inplace=True)
    df = df.transpose().reset_index()
    df['time'] = df['index'].apply(parse_minutes)
    df = df.set_index('time')
    df = df.drop('index', axis=1)
    return df

def generate_data(df, plate, name, h=20, m=0):
    
    start_idx = 3
    mid_idx = h * 3 + start_idx + 1 - m
    end_idx = h * 3 + mid_idx + 1 - m

    fluo = (df.iloc[:, start_idx:mid_idx]).astype(float)
    od = (df.iloc[:, mid_idx:end_idx]).astype(float)
    fluo_half = (df.iloc[:, end_idx:]).astype(float)
    
    fluo = pd.concat([name, fluo], axis=1)
    od = pd.concat([name, od], axis=1)
    fluo_half = pd.concat([name, fluo_half], axis=1)
    
    fluo = transpose_data(fluo)
    od = transpose_data(od)
    fluo_half = transpose_data(fluo_half)
    
    return fluo, od, fluo_half

In [None]:
def read_plate_data(df):
    
    df.columns = df.iloc[0]
    df.drop(df.index[0], inplace=True)
    df.dropna(inplace=True)
    return df.reset_index(drop=True)

read_plate_data()

### Marionette Strain

In [None]:
def read_plate_map(data):
    
    con = data.iloc[:,:2].reset_index()
    mar = data.iloc[:,2:].reset_index().melt(id_vars=['group'])
    mar['variable'] = mar['variable'].apply(lambda x: "{:02d}".format(int(x)))
    mar['well'] = mar['group'] + mar['variable']
    mar.drop(['group', 'variable'], axis=1, inplace=True)
    mar.rename(columns={'value':'id'}, inplace=True)
    return con, mar

con, mar = read_plate_map(pd.read_csv('datasets/mario_map.csv', index_col=['group']))

In [None]:
def read_plate_data(df):
    
    df.columns = df.iloc[0]
    df.drop(df.index[0], inplace=True)
    return df.reset_index(drop=True)

data = read_plate_data(pd.read_csv('datasets/marrionette.csv'))

In [None]:
merged = pd.merge(data['well'], mar, on='well', how='left')
merged = pd.merge(merged, constructs, on='id', how='left')
name = merged['short_name'].dropna().reset_index(drop=True) #just to make sure there is no null and indexing is correct

In [None]:
merged

In [None]:
mar_fluo, mar_od, mar_fluo_half = generate_data(data, mar, name, 24)
gates = name.unique().tolist()

In [None]:
def reformat_df(data):
    
    top10_wrapper = pd.DataFrame()
    mario_wrapper = pd.DataFrame()

    for gate in gates:

        top10 = data[gate].iloc[:,:4]
        top10.columns = [gate + '_00', gate + '_10', gate + '_01', gate + '_11']
        top10_wrapper = pd.concat([top10_wrapper, top10], axis=1)

        mario = data[gate].iloc[:,4:]
        mario.columns = [gate + '_00', gate + '_10', gate + '_01', gate + '_11']
        mario_wrapper = pd.concat([mario_wrapper, mario], axis=1)
        
    return top10_wrapper, mario_wrapper

In [None]:
top10_fluo, mario_fluo = reformat_df(mar_fluo)
#top10_od, mario_od = reformat_df(mar_od)

#mario_fluo.reset_index().to_csv('datasets/marionette_fluo_half.csv', index=False)
#mario_od.reset_index().to_csv('datasets/marionette_od.csv', index=False)

### ALL GATES

### Data from plate reader 1

In [None]:
def cleanse_plate(plate):
    plate = plate.melt(id_vars=['Unnamed: 0'])
    plate['value'] = plate['value'].apply(lambda x: x.split('.')[0])
    plate['variable'] = plate['variable'].apply(lambda x: "{:02d}".format(int(x)))
    plate['variable'] = plate['Unnamed: 0'] + plate['variable']
    plate.drop('Unnamed: 0', axis=1, inplace=True)
    return plate.reset_index(drop=True).rename(columns={'variable': 'Well', 'value': 'code_name'})

plate1 = cleanse_plate(pd.read_csv('datasets/plate1_map.csv'))
plate1

In [None]:
df11 = read_plate_data('and_gate_11') # up to 20h
print(df11.shape)
df12 = read_plate_data('and_gate_12') # up to 16h
print(df12.shape)
df13 = read_plate_data('and_gate_13') # up to 16h
print(df13.shape)

In [None]:
merged = pd.merge(df11['Well'], plate1, on='Well', how='left')
merged = pd.merge(merged, naming_map, on='code_name', how='left')
name1 = merged['short_name']

In [None]:
fluo11, od11, bulk_fluo11, fluo_half11, bulk_fluo_half11 = generate_data(df11, plate1, name1, 20)
fluo12, od12, bulk_fluo12, fluo_half12, bulk_fluo_half12 = generate_data(df12, plate1, name1, 16)
fluo13, od13, bulk_fluo13, fluo_half13, bulk_fluo_half13 = generate_data(df13, plate1, name1, 16)

In [None]:
def plot_all(data, num_row, num_col):
    f, axs = plt.subplots(num_row, num_col, sharex=True, sharey=False, figsize=(14, num_row*2))
    axr = axs.ravel()
    for i, ax in tqdm(enumerate(axr)):
        if i < data[0].shape[1]:
            for d in data:
                ax.plot(d.index/60, d.iloc[:, i])
            ax.set_title(data[0].columns[i])
            ax.set_xlabel('Time (h)')
        else:
            ax.set_visible(False)
    plt.tight_layout()
    sns.despine()
    
#plot bulk fluorescence data
plot_all([bulk_fluo_half11, bulk_fluo_half12, bulk_fluo_half13], 20, 5)

In [None]:
plot_all([od11, od12, od13], 20, 5)

In [None]:
bulk_fluo1 = (bulk_fluo11 + bulk_fluo12 + bulk_fluo13) / 3
fluo1 = (fluo11 + fluo12 + fluo13) / 3
od1 = (od11 + od12 + od13) / 3
fluo_half1 = (fluo_half11 + fluo_half12 + fluo_half13) / 3
bulk_fluo_half1 = (bulk_fluo_half11 + bulk_fluo_half12 + bulk_fluo_half13) / 3

In [None]:
bulk_fluo1.dropna().to_csv('datasets/bulk_fluo_plate_1_triplicate.csv')
fluo1.dropna().to_csv('datasets/fluo_plate_1_triplicate.csv')
od1.dropna().to_csv('datasets/od_plate_1_triplicate.csv')
bulk_fluo_half1.dropna().to_csv('datasets/bulk_fluo_half_plate_1_triplicate.csv')
fluo_half1.dropna().to_csv('datasets/fluo_half_plate_1_triplicate.csv')

In [None]:
bulk_fluo11.dropna().to_csv('datasets/bulk_fluo_plate_1_single.csv')
fluo11.dropna().to_csv('datasets/fluo_plate_1_single.csv')
od11.dropna().to_csv('datasets/od_plate_1_single.csv')
bulk_fluo_half11.dropna().to_csv('datasets/bulk_fluo_half_plate_1_single.csv')
fluo_half11.dropna().to_csv('datasets/fluo_half_plate_1_single.csv')

### Data from plate reader 2

In [None]:
plate2 = cleanse_plate(pd.read_csv('datasets/plate2_map.csv'))
df21 = read_plate_data('and_gate_21') # up to 20h
print(df21.shape)
df22 = read_plate_data('and_gate_22') # up to 16h
print(df22.shape)
df23 = read_plate_data('and_gate_23') # up to 16h
print(df23.shape)

In [None]:
merged = pd.merge(df21['Well'], plate2, on='Well', how='left')
merged = pd.merge(merged, naming_map, on='code_name', how='left')
name2 = merged['short_name']

In [None]:
fluo21, od21, bulk_fluo21, fluo_half21, bulk_fluo_half21 = generate_data(df21, plate2, name2, 21, 2)
fluo22, od22, bulk_fluo22, fluo_half22, bulk_fluo_half22 = generate_data(df22, plate2, name2, 22, 0)
fluo23, od23, bulk_fluo23, fluo_half23, bulk_fluo_half23 = generate_data(df23, plate2, name2, 22, 1)

In [None]:
plot_all([bulk_fluo_half21, bulk_fluo_half22, bulk_fluo_half23], 20, 5)

In [None]:
plot_all([od21, od22, od23], 20, 5)

In [None]:
bulk_fluo2 = (bulk_fluo21 + bulk_fluo22 + bulk_fluo23) / 3
fluo2 = (fluo21 + fluo22 + fluo23) / 3
od2 = (od21 + od22 + od23) / 3
fluo_half2 = (fluo_half21 + fluo_half22 + fluo_half23) / 3
bulk_fluo_half2 = (bulk_fluo_half21 + bulk_fluo_half22 + bulk_fluo_half23) / 3

In [None]:
bulk_fluo2.to_csv('datasets/bulk_fluo_plate_2_triplicate.csv')
fluo2.to_csv('datasets/fluo_plate_2_triplicate.csv')
fluo_half2.to_csv('datasets/fluo_half_plate_2_triplicate.csv')
bulk_fluo_half2.to_csv('datasets/bulk_fluo_half_plate_2_triplicate.csv')
od2.to_csv('datasets/od_plate_2_triplicate.csv')

In [None]:
bulk_fluo21.to_csv('datasets/bulk_fluo_plate_2_single.csv')
fluo21.to_csv('datasets/fluo_plate_2_single.csv')
fluo_half21.to_csv('datasets/fluo_half_plate_2_single.csv')
bulk_fluo_half21.to_csv('datasets/bulk_fluo_half_plate_2_single.csv')
od21.to_csv('datasets/od_plate_2_single.csv')

### Induction Matrix

In [None]:
induction = read_plate_data('induction') # up to 20h
print(induction.shape)

In [None]:
induction_plate = {
    'A01': 'A18', 'A02': 'A18', 'A03': 'A18', 
    'B01': 'A18', 'B02': 'A18', 'B03': 'A18', 
    'C01': 'A18', 'C02': 'A18', 'C03': 'A18', 
    'D01': 'A195', 'D02': 'A195', 'D03': 'A195', 
    'E01': 'A195', 'E02': 'A195', 'E03': 'A195', 
    'F01': 'A195', 'F02': 'A195', 'F03': 'A195', 
    'A04': 'A29', 'A05': 'A29', 'A06': 'A29', 
    'B04': 'A29', 'B05': 'A29', 'B06': 'A29', 
    'C04': 'A29', 'C05': 'A29', 'C06': 'A29', 
    'D04': 'A259', 'D05': 'A259', 'D06': 'A259', 
    'E04': 'A259', 'E05': 'A259', 'E06': 'A259', 
    'F04': 'A259', 'F05': 'A259', 'F06': 'A259', 
    'A07': 'A76', 'A08': 'A76', 'A09': 'A76', 
    'B07': 'A76', 'B08': 'A76', 'B09': 'A76', 
    'C07': 'A76', 'C08': 'A76', 'C09': 'A76', 
    'D07': 'A267', 'D08': 'A267', 'D09': 'A267', 
    'E07': 'A267', 'E08': 'A267', 'E09': 'A267', 
    'F07': 'A267', 'F08': 'A267', 'F09': 'A267', 
    'A10': 'A109', 'A11': 'A109', 'A12': 'A109', 
    'B10': 'A109', 'B11': 'A109', 'B12': 'A109', 
    'C10': 'A109', 'C11': 'A109', 'C12': 'A109', 
    'D10': 'A294', 'D11': 'A294', 'D12': 'A294', 
    'E10': 'A294', 'E11': 'A294', 'E12': 'A294', 
    'F10': 'A294', 'F11': 'A294', 'F12': 'A294', 
    'G01': 'A323', 'G02': 'A323', 'G03': 'A323', 
    'G04': 'A323', 'G05': 'A323', 'G06': 'A323', 
    'H01': 'A323', 'H02': 'A323', 'H03': 'A323', 
}
induction_row = {
    'A': 0, 'B': 1, 'C': 2, 'D': 0, 'E': 1, 'F': 2,
    'G': 0, 'H': 1 #column g needs to be manually adjusted later
}
induction_col = {
    '01': 0, '02': 1, '03': 2, '04': 0, '05': 1, '06': 2, 
    '07': 0, '08': 1, '09': 2, '10': 0, '11': 1, '12': 2 
}

In [None]:
index = induction.iloc[:,:3]
index['code_name'] = index['Well'].map(induction_plate)
col_idx = index['Well'].str[1:]
row_idx = index['Well'].str[:1]
index['ind1_lvl'] = row_idx.map(induction_row)
index['ind2_lvl'] = col_idx.map(induction_col)
index.loc[index['Well'].isin(['G04', 'G05', 'G06']), 'ind1_lvl'] = 2
#index = index.dropna()
index.loc[index['code_name'].isnull(), 'code_name'] = 'control'
name = pd.merge(index[['code_name', 'ind1_lvl', 'ind2_lvl']], naming_map[['code_name', 'short_name']], on='code_name', how='left')
name

In [None]:
index[index['code_name']=='A323']

In [None]:
def parse_minutes(x):
    spl = x.split(' ')
    hours = int(spl[0]) * 60
    mins = int(spl[2]) if spl[2] != '' else 0
    return hours + mins

def transpose_data(df_raw):
    df = df_raw.copy()
    df.set_index('short_name', inplace=True)
    #df = df.transpose().reset_index()
    #df['time'] = df['index'].apply(parse_minutes)
    #df = df.set_index('time')
    #df = df.drop('index', axis=1)
    return df

def generate_data_induce(df, h=24):
    
    start_idx = 3
    mid_idx = h * 3 + start_idx
    fluo = (df.iloc[:, start_idx:mid_idx]).astype(float)
    od = (df.iloc[:, mid_idx:]).astype(float)
    bulk_fluo = fluo * od
    
    fluo = pd.concat([name, fluo], axis=1)
    od = pd.concat([name, od], axis=1)
    bulk_fluo = pd.concat([name, bulk_fluo], axis=1)
    
    #bulk_fluo = transpose_data(bulk_fluo)
    #fluo = transpose_data(fluo)
    #od = transpose_data(od)
    
    return fluo, od, bulk_fluo

fluo, od, bulk_fluo = generate_data_induce(induction)
bulk_fluo

In [None]:
bulk_fluo[bulk_fluo['short_name']=='e11x32STPhoRadA']

In [None]:
bulk_fluo.to_csv('datasets/bulk_fluo_induction.csv', index=False)
fluo.to_csv('datasets/fluo_induction.csv', index=False)
od.to_csv('datasets/od_induction.csv', index=False)

### XOR Gate Top 10

In [None]:
def read_plate_map(df):
    data = pd.read_csv('datasets/xor_map.csv', index_col=['Group'])
    con = data.iloc[:,:2].reset_index()
    xor = data.iloc[:,2:]
    xor = xor.reset_index().melt(id_vars=['Group'])
    xor['variable'] = xor['variable'].apply(lambda x: "{:02d}".format(int(x)))
    xor['Well'] = xor['Group'] + xor['variable']
    xor.drop(['Group', 'variable'], axis=1, inplace=True)
    xor.rename(columns={'value':'code_name'}, inplace=True)
    return con, xor

con, xor = read_plate_map(pd.read_csv('datasets/xor_map.csv'))

In [None]:
def read_plate_data(df):
    df.columns = df.iloc[0]
    df.drop(df.index[0], inplace=True)
    #df.dropna(inplace=True)
    return df.reset_index(drop=True)

data = read_plate_data(pd.read_csv('datasets/xor_gate.csv'))
data.head()

In [None]:
merged = pd.merge(data['Well'], xor, on='Well', how='left')
#merged = pd.merge(merged, naming_map, on='code_name', how='left')
merged.rename(columns={'code_name': 'short_name'}, inplace=True)
name = merged['short_name'].dropna().reset_index(drop=True)

In [None]:
fluox, odx, fluo_halfx = generate_data(data, xor, name, 24)

In [None]:
cols = ['ECF20/33', 'ECF11/15']
fluo_xor = pd.DataFrame()
od_xor = pd.DataFrame()
for c in cols:
    temp = pd.concat([fluox[c].iloc[:,i] for i in range(0, 16, 4)], axis=1)
    temp.columns = [c + '_' + "{:02b}".format(int(i)) for i in range(4)]
    fluo_xor = pd.concat([fluo_xor, temp], axis=1)
    
    temp = pd.concat([odx[c].iloc[:,i] for i in range(0, 16, 4)], axis=1)
    temp.columns = [c + '_' + "{:02b}".format(int(i)) for i in range(4)]
    od_xor = pd.concat([od_xor, temp], axis=1)

In [None]:
fluo_xor.to_csv('datasets/bulk_fluo_xor.csv')
od_xor.to_csv('datasets/od_xor.csv')

### First-round Data

In [None]:
df = pd.read_csv('datasets/raw.csv')
df.columns = df.iloc[0]
df.drop(df.index[0], inplace=True)
df.dropna(inplace=True)
print(df.shape)
df.head()

In [None]:
map_ecf = {'Sample X1': 'e15',
'Sample X2': 'e22',
'Sample X3': 'e32',
'Sample X4': 'e33',
'Sample X5': 'e34',
'Sample X6': 'e41',
'Sample X7': 'e42',
'Sample X8': 'e15',
'Sample X9': 'e22',
'Sample X10': 'e38',
'Sample X11': 'e16',
'Sample X12': 'e33',
'Sample X13': 'e15',
'Sample X14': 'e16',
'Sample X15': 'e17',
'Sample X16': 'e20',
'Sample X17': 'e22',
'Sample X18': 'e26',
'Sample X19': 'e32',
'Sample X20': 'e33',
'Sample X21': 'e34'}
map_int = {'Sample X1': 'SspGyrB',
'Sample X2': 'SspGyrB',
'Sample X3': 'SspGyrB',
'Sample X4': 'SspGyrB',
'Sample X5': 'SspGyrB',
'Sample X6': 'SspGyrB',
'Sample X7': 'SspGyrB',
'Sample X8': 'TerThyXS2',
'Sample X9': 'TerThyXS2',
'Sample X10': 'TerThyXS2',
'Sample X11': 'TerThyXS1',
'Sample X12': 'TerThyXS1',
'Sample X13': 'STPhoRadA',
'Sample X14': 'STPhoRadA',
'Sample X15': 'STPhoRadA',
'Sample X16': 'STPhoRadA',
'Sample X17': 'STPhoRadA',
'Sample X18': 'STPhoRadA',
'Sample X19': 'STPhoRadA',
'Sample X20': 'STPhoRadA',
'Sample X21': 'STPhoRadA'}

In [None]:
df['ECF'] = df['Content'].map(map_ecf)
df['Intein'] = df['Content'].map(map_int)
df.loc[df['Group'].isin(['A', 'E']), 'Ara'] = 0
df.loc[df['Group'].isin(['A', 'E']), 'Cuma'] = 0
df.loc[df['Group'].isin(['B', 'F']), 'Ara'] = 1
df.loc[df['Group'].isin(['B', 'F']), 'Cuma'] = 0
df.loc[df['Group'].isin(['C', 'G']), 'Ara'] = 0
df.loc[df['Group'].isin(['C', 'G']), 'Cuma'] = 1
df.loc[df['Group'].isin(['D', 'H']), 'Ara'] = 1
df.loc[df['Group'].isin(['D', 'H']), 'Cuma'] = 1
df = pd.concat([df[['ECF', 'Intein', 'Ara', 'Cuma']],
                 df.drop(['ECF', 'Intein', 'Ara', 'Cuma', 'Well', 'Content', 'Group'], axis=1)],
                 axis=1)
df.head()

In [None]:
cols = list(range(0,77))
fluo = df.iloc[:, cols]
#fluo.to_csv('datasets/fluoOD-all.csv', index=False)
fluo.head()

In [None]:
cols = list(range(0,4)) + list(range(77,150))
od = df.iloc[:, cols]
#od.to_csv('datasets/OD-all.csv', index=False)
od.head()

In [None]:
def parse_minutes(x):
    spl = x.split(' ')
    hours = int(spl[0]) * 60
    mins = int(spl[2]) if spl[2] != '' else 0
    return hours + mins

fluo11 = fluo[(fluo['Ara']==1) & (fluo['Cuma']==0)]
fluo11['index'] = fluo11['ECF'] + fluo11['Intein']
fluo11.set_index('index', inplace=True)
fluo11.drop(['ECF', 'Intein', 'Ara', 'Cuma'], axis=1, inplace=True)
fluo11 = fluo11.transpose().reset_index()

fluo11['time'] = fluo11[0].apply(parse_minutes)
fluo11 = fluo11.set_index('time')
fluo11 = fluo11.drop(0, axis=1)
fluo11

In [None]:
od11 = od[(od['Ara']==1) & (od['Cuma']==0)]
od11['index'] = od11['ECF'] + od11['Intein']
od11.set_index('index', inplace=True)
od11.drop(['ECF', 'Intein', 'Ara', 'Cuma'], axis=1, inplace=True)
od11 = od11.transpose().reset_index()

od11['time'] = od11[0].apply(parse_minutes)
od11 = od11.set_index('time')
od11 = od11.drop(0, axis=1)
od11

In [None]:
for col in fluo11.columns:
    try:
        fluo11[col] = fluo11[col].astype(float)
    except:
        print(col)
        continue

In [None]:
for col in od11.columns:
    od11[col] = od11[col].astype(float)

In [None]:
fluo11.to_csv('datasets/fluo-10.csv')
od11.to_csv('datasets/od-10.csv')
(fluo11 * od11).to_csv('datasets/bulk-fluo-10.csv')

### Sequence to Function

In [None]:
filenames = sorted(os.listdir('datasets/sequences/'))
buffer = {}
for filename in tqdm(filenames):
    gb_file = "datasets/sequences/" + filename
    for gb_record in SeqIO.parse(open(gb_file,"r"), "genbank") :
        buffer[filename[:-3]] = str(gb_record.seq)
df = pd.DataFrame.from_dict(buffer, orient='index').reset_index()
df.columns = ['full_name', 'sequence']

In [None]:
constructs = pd.read_csv('datasets/constructs.csv')
constructs['full_name'] = constructs['full_name'].str.replace("*", "")
df2 = pd.merge(df, constructs, on="full_name", how="left")
df2 = df2.dropna()
df2 = df2[['id', 'short_name', 'full_name', 'sequence']]
fluo1 = pd.read_csv('datasets/bulk_fluo_plate_1_single.csv', index_col='time')
fluo2 = pd.read_csv('datasets/bulk_fluo_plate_2_single.csv', index_col='time')
fluo = pd.concat([fluo1, fluo2], axis=1)
data = fluo.T.reset_index().drop(1220, axis=1)
data.rename(columns={'index': 'short_name'}, inplace=True)
#data.columns = ['short_name', 'fluo_20h']
df_final = pd.merge(df2, data, on='short_name', how='left').dropna()
df_final

In [None]:
df_final.isnull().sum()

In [None]:
df_final.to_csv('datasets/sequence_data.csv')