In [13]:
import pandas as pd
pd.set_option('display.max_columns', None)
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from tqdm import tqdm

In [2]:
def parse_minutes(x):
    
    spl = x.split('.')[0].split(' ')
    hours = int(spl[0]) * 60
    mins = int(spl[2]) if spl[2] != '' else 0
    return hours + mins

def transpose_data(df, col):
    
    df.set_index(col, inplace=True)
    df = df.transpose().reset_index()
    df['time'] = df['index'].apply(parse_minutes)
    df = df.set_index('time').drop('index', axis=1)
    return df

def generate_data(df, name, h=24, m=0, num_data=2, col='short_name', start_idx=3):
    
    datas = []
    idxs = [start_idx]
    for i in range(1, num_data+1):
        idxs.append(h * 3 + idxs[i-1] + 1 + int(m/20))
        data = (df.iloc[:, idxs[i-1]:idxs[i]]).astype(float)
        data = pd.concat([name, data], axis=1)
        data = transpose_data(data, col)
        datas.append(data)
    return datas

### Self Experiment

#### Promoters characterization

In [20]:
def read_map(plate_map):
    
    plate_map = plate_map.melt(id_vars=['group'])
    plate_map['variable'] = plate_map['variable'].astype(int)
    plate_map['Well'] = plate_map['group'] + plate_map['variable'].apply(lambda x: '{:02d}'.format(x))
    plate_map.loc[~plate_map['group'].isin(['H']), 'inducer'] = plate_map['variable'] - 1
    plate_map.loc[plate_map['group'].isin(['H']), 'inducer'] = 0
    return plate_map[['Well', 'value', 'inducer']]

def generate_metadata(well, plate_map):
    
    sample_map = {
        'P62': 'pBAD/Ara',
        'P64': 'pCin/OHC14',
        'P66': 'pCymRC/Cuma',
        'P68': 'pLuxB/AHL',
        'P69': 'pPhlF/DAPG',
        'P70': 'pSalTTC/Sal',
        'P72': 'pRhaB/Rha',
        'BK': 'Blank_Kan',
        'BA': 'Blank_Amp',
        '3K3-N': 'negative-control-kan',
        '4A3-N': 'negative-control-amp',
        '4A3-P': 'positive-control-amp'
    }
    df = pd.merge(well, plate_map, on='Well', how='left').dropna(subset=['value']).reset_index(drop=True)
    df['short_name'] = df['value'].map(sample_map) + '_' + df['inducer'].astype(int).astype(str)
    return df.dropna()

In [24]:
folder = '008-promoters-characterization'
plate_map = read_map(pd.read_csv('datasets/experiment/{}/plate_map.csv'.format(folder)))
raw_data = pd.read_csv('datasets/experiment/{}/raw.csv'.format(folder), skiprows=[0])
metadata = generate_metadata(raw_data['Well'], plate_map)
data = raw_data[raw_data['Well'].isin(metadata['Well'])].reset_index(drop=True)
datas = generate_data(data, metadata['short_name'], h=22, m=0, num_data=7)

In [25]:
datas[1]

short_name,pBAD/Ara_0,pBAD/Ara_1,pBAD/Ara_2,pBAD/Ara_3,pBAD/Ara_4,pBAD/Ara_5,pBAD/Ara_6,pBAD/Ara_7,pBAD/Ara_8,pBAD/Ara_9,pBAD/Ara_10,pBAD/Ara_11,pCin/OHC14_0,pCin/OHC14_1,pCin/OHC14_2,pCin/OHC14_3,pCin/OHC14_4,pCin/OHC14_5,pCin/OHC14_6,pCin/OHC14_7,pCin/OHC14_8,pCin/OHC14_9,pCin/OHC14_10,pCin/OHC14_11,pCymRC/Cuma_0,pCymRC/Cuma_1,pCymRC/Cuma_2,pCymRC/Cuma_3,pCymRC/Cuma_4,pCymRC/Cuma_5,pCymRC/Cuma_6,pCymRC/Cuma_7,pCymRC/Cuma_8,pCymRC/Cuma_9,pCymRC/Cuma_10,pCymRC/Cuma_11,pLuxB/AHL_0,pLuxB/AHL_1,pLuxB/AHL_2,pLuxB/AHL_3,pLuxB/AHL_4,pLuxB/AHL_5,pLuxB/AHL_6,pLuxB/AHL_7,pLuxB/AHL_8,pLuxB/AHL_9,pLuxB/AHL_10,pLuxB/AHL_11,pPhlF/DAPG_0,pPhlF/DAPG_1,pPhlF/DAPG_2,pPhlF/DAPG_3,pPhlF/DAPG_4,pPhlF/DAPG_5,pPhlF/DAPG_6,pPhlF/DAPG_7,pPhlF/DAPG_8,pPhlF/DAPG_9,pPhlF/DAPG_10,pPhlF/DAPG_11,pSalTTC/Sal_0,pSalTTC/Sal_1,pSalTTC/Sal_2,pSalTTC/Sal_3,pSalTTC/Sal_4,pSalTTC/Sal_5,pSalTTC/Sal_6,pSalTTC/Sal_7,pSalTTC/Sal_8,pSalTTC/Sal_9,pSalTTC/Sal_10,pSalTTC/Sal_11,pRhaB/Rha_0,pRhaB/Rha_1,pRhaB/Rha_2,pRhaB/Rha_3,pRhaB/Rha_4,pRhaB/Rha_5,pRhaB/Rha_6,pRhaB/Rha_7,pRhaB/Rha_8,pRhaB/Rha_9,pRhaB/Rha_10,pRhaB/Rha_11,Blank_Kan_0,Blank_Kan_0,negative-control-kan_0,negative-control-kan_0,Blank_Amp_0,Blank_Amp_0,negative-control-amp_0,negative-control-amp_0,positive-control-amp_0,positive-control-amp_0
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1,Unnamed: 82_level_1,Unnamed: 83_level_1,Unnamed: 84_level_1,Unnamed: 85_level_1,Unnamed: 86_level_1,Unnamed: 87_level_1,Unnamed: 88_level_1,Unnamed: 89_level_1,Unnamed: 90_level_1,Unnamed: 91_level_1,Unnamed: 92_level_1,Unnamed: 93_level_1,Unnamed: 94_level_1
0,0.040,-0.002,-0.006,0.013,0.017,-0.001,-0.00400,0.001,-0.00300,0.065,0.021,0.007,0.025,0.009,-0.002,0.012,0.011,-0.004,-0.00200,0.014,-0.00300,0.016,0.010,0.006,-0.006,0.036,0.015,0.006,0.008,0.005,-0.001,0.020,0.000,0.004,0.000,0.023,-0.004,-0.004,-0.007,-0.003,-0.008,0.007,-0.007,-0.00500,-0.004,0.007,-0.007,0.037,-0.007,-0.004,-0.010,-0.010,-0.010,-0.008,-0.008,-0.004,-0.004,0.037,-0.007,0.002,-0.006,-0.0100,-0.008,-0.008,-0.006,-0.007,-0.009,-0.004,-0.005,0.044,-0.007,0.006,-0.004,0.002,-0.002,-0.0001,0.0005,-0.004,0.00300,-0.005,-0.006,0.004,0.014,0.006,,,0.086,0.096,,,0.005,0.041,0.032,0.035
20,0.042,0.025,0.012,0.014,0.007,0.008,0.00200,0.011,0.00035,0.037,0.019,0.042,0.035,0.024,0.014,0.010,0.017,0.010,0.00085,0.021,-0.00085,0.021,0.009,0.020,0.018,0.030,0.014,0.013,0.006,0.007,-0.002,0.023,-0.004,0.018,-0.003,0.029,-0.004,0.034,-0.003,0.011,-0.004,0.028,-0.003,-0.00100,-0.004,0.024,-0.004,0.039,-0.004,0.009,-0.006,-0.005,-0.005,-0.002,-0.004,-0.004,-0.005,0.024,-0.005,0.012,-0.004,-0.0010,-0.005,-0.005,-0.003,-0.005,-0.004,-0.004,-0.004,0.014,-0.004,0.012,-0.004,0.010,-0.007,-0.0030,-0.0030,-0.002,0.00075,-0.006,-0.005,0.005,0.030,0.012,,,0.060,0.049,,,0.071,0.012,0.040,0.067
40,0.054,0.035,0.012,0.014,0.005,0.005,-0.00025,0.010,-0.00200,0.040,0.016,0.044,0.037,0.026,0.018,0.012,0.019,0.009,-0.00200,0.023,-0.00400,0.029,0.009,0.020,0.020,0.038,0.015,0.017,0.006,0.005,-0.004,0.022,-0.005,0.026,-0.003,0.031,-0.005,0.039,-0.003,0.010,-0.005,0.029,-0.005,-0.00100,-0.004,0.032,-0.005,0.046,-0.004,0.011,-0.005,-0.006,-0.006,0.009,-0.005,-0.006,-0.005,0.031,-0.006,0.015,-0.005,-0.0010,-0.006,-0.004,-0.004,-0.004,-0.004,-0.005,-0.004,0.016,-0.004,0.014,-0.004,0.012,-0.006,-0.0010,-0.0020,-0.002,-0.00100,-0.004,-0.004,0.008,0.032,0.013,,,0.064,0.056,,,0.018,0.050,0.061,0.090
60,0.068,0.047,0.015,0.018,0.008,0.006,0.00200,0.013,-0.00080,0.050,0.021,0.052,0.050,0.034,0.025,0.028,0.029,0.014,-0.00050,0.028,-0.00400,0.039,0.009,0.024,0.026,0.050,0.019,0.022,0.008,0.007,-0.005,0.030,-0.005,0.040,-0.003,0.038,-0.004,0.048,-0.004,0.014,-0.005,0.039,-0.004,-0.00100,-0.004,0.044,-0.005,0.056,-0.005,0.016,-0.005,-0.006,-0.006,-0.003,-0.005,-0.004,-0.005,0.037,-0.006,0.017,-0.004,0.0007,-0.005,-0.005,-0.002,-0.004,-0.004,-0.005,-0.004,0.022,-0.004,0.017,-0.004,0.016,-0.006,-0.0020,-0.0020,-0.004,0.00090,-0.006,-0.005,0.010,0.033,0.016,,,0.084,0.072,,,0.180,0.039,0.080,0.115
80,0.092,0.064,0.024,0.028,0.014,0.008,0.00300,0.021,0.00100,0.071,0.029,0.070,0.069,0.051,0.037,0.042,0.040,0.020,0.00100,0.041,-0.00400,0.054,0.007,0.031,0.038,0.072,0.031,0.035,0.013,0.011,-0.004,0.041,-0.004,0.056,-0.004,0.051,-0.003,0.068,-0.004,0.020,-0.005,0.050,-0.004,0.00085,-0.004,0.057,-0.005,0.076,-0.005,0.026,-0.006,-0.006,-0.006,0.048,0.041,-0.004,-0.005,0.050,-0.005,0.024,-0.004,0.0040,-0.004,-0.005,-0.003,-0.003,-0.004,-0.004,-0.005,0.032,-0.003,0.024,-0.004,0.023,-0.006,-0.0030,-0.0030,-0.004,0.00200,-0.005,-0.005,0.016,0.031,0.020,,,0.118,0.104,,,0.140,0.053,0.105,0.156
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1240,1.151,1.219,1.291,1.004,1.380,3.362,1.33300,1.374,1.35700,1.634,1.560,1.686,1.207,2.581,1.175,1.193,1.142,1.150,1.18800,1.151,1.30600,1.069,1.450,1.405,1.247,2.999,1.184,1.187,1.093,1.215,1.346,0.948,1.214,0.994,1.434,1.359,1.251,1.017,1.269,0.810,1.367,1.185,1.381,0.88000,1.196,0.871,1.362,1.142,1.361,1.300,1.377,1.408,1.370,1.402,1.435,1.384,1.415,1.263,1.585,1.457,1.330,1.2730,1.303,1.326,1.331,1.343,1.380,1.436,1.469,1.383,1.502,1.441,1.322,1.316,1.380,1.4230,1.3730,1.376,1.44300,1.482,1.570,1.461,1.595,1.689,,,1.258,1.264,,,1.061,1.142,1.398,1.367
1260,1.134,1.217,1.286,0.982,1.319,3.358,1.03500,1.340,1.30500,1.604,1.531,1.624,1.205,3.131,1.168,1.183,1.142,1.166,1.18400,1.153,1.30800,1.070,1.469,1.422,1.195,3.037,1.170,1.218,1.088,1.205,1.330,0.931,1.205,0.988,1.420,1.345,1.247,1.008,1.287,0.764,1.359,1.122,1.345,0.87300,1.224,0.886,1.359,1.119,1.360,1.305,1.366,1.402,1.427,1.463,1.495,1.467,1.509,1.301,1.590,1.442,1.321,1.2840,1.302,1.318,1.335,1.340,1.358,1.439,1.480,1.381,1.503,1.434,1.313,1.305,1.379,1.4100,1.3670,1.473,1.43400,1.479,1.596,1.441,1.600,1.694,,,1.252,1.266,,,1.039,1.132,1.402,1.370
1280,1.141,1.215,1.277,0.982,1.389,3.318,1.03200,1.322,1.32700,1.653,1.549,1.643,1.212,1.188,1.156,1.167,1.141,1.140,1.17300,1.124,1.30400,1.052,1.449,1.414,1.194,2.991,1.142,1.139,1.087,1.185,1.326,0.926,1.194,0.975,1.416,1.355,1.247,1.003,1.266,0.766,1.320,1.213,1.316,0.86000,1.160,0.859,1.330,1.118,1.354,1.279,1.357,1.387,1.461,1.434,1.430,1.394,1.419,1.234,1.576,1.433,1.324,1.2650,1.300,1.302,1.455,1.387,1.353,1.420,1.461,1.354,1.494,1.427,1.313,1.308,1.389,1.4100,1.3640,1.388,1.42200,1.466,1.560,1.408,1.602,1.699,,,1.252,1.266,,,1.037,1.123,1.404,1.348
1300,1.118,1.221,1.277,0.981,1.458,3.323,1.01200,1.322,1.29500,1.618,1.538,1.626,1.192,1.169,1.136,1.178,1.087,1.167,1.15700,1.134,1.29900,1.058,1.434,1.373,1.190,3.228,1.626,1.175,1.094,1.191,1.317,0.929,1.209,0.977,1.404,1.353,1.244,1.005,1.266,0.752,1.434,1.174,1.358,0.82300,1.148,0.841,1.317,1.113,1.356,1.295,1.342,1.425,1.354,1.378,1.419,1.365,1.410,1.212,1.563,1.429,1.324,1.2930,1.291,1.302,1.313,1.344,1.350,1.428,1.466,1.370,1.506,1.421,1.309,1.304,1.392,1.4110,1.3690,1.433,1.42700,1.465,1.563,1.389,1.589,1.698,,,1.266,1.252,,,1.035,1.135,1.413,1.345


In [26]:
datas[1].to_csv('datasets/experiment/{}/ods.csv'.format(folder))
datas[0].to_csv('datasets/experiment/{}/fluos.csv'.format(folder))
datas[2].to_csv('datasets/experiment/{}/fluos-lower-1.csv'.format(folder))
datas[3].to_csv('datasets/experiment/{}/fluos-lower-2.csv'.format(folder))

#### ECFs Characterization

In [16]:
def read_map(plate_map, plate_idx):
    
    plate_map = plate_map.melt(id_vars=['group'])
    plate_map['variable'] = plate_map['variable'].astype(int)
    plate_map['Well'] = plate_map['group'] + plate_map['variable'].apply(lambda x: '{:02d}'.format(x))
    plate_map.loc[~plate_map['group'].isin(['H']), 'inducer'] = (plate_map['variable'] - 1) % 6
    plate_map.loc[plate_map['group'].isin(['H']), 'inducer'] = plate_idx
    return plate_map[['Well', 'value', 'inducer']]

def generate_metadata(well, plate_map):
    
    sample_map = {
        'E720': 'e11x33',
        'E721': 'e15x33',
        'E722': 'e16x33',
        'E723': 'e20x33',
        'E724': 'e32x33',
        'E725': 'e34x33',
        'E726': 'e38x33',
        'E727': 'e41x33',
        'E728': 'e42x33',
        'G720': 'e11x32STPhoRadA',
        'G721': 'e15x32NpuSspS2',
        'G722': 'e16x33NrdA2',
        'G723': 'e20x32gp411',
        'G724': 'e32x30SspGyrB',
        'G725': 'e34x30MjaKlbA',
        'G726': 'e38x32gp418',
        'G727': 'e41x32NrdJ1',
        'G728': 'e42x32STIMPDH1',
        'BK': 'Blank_Kan',
        'BA': 'Blank_Amp',
        '3K3-N': 'negative-control-kan',
        '4A3-N': 'negative-control-amp',
        '4A3-P': 'positive-control-amp'
    }
    df = pd.merge(well, plate_map, on='Well', how='left').dropna(subset=['value']).reset_index(drop=True)
    df['short_name'] = df['value'].map(sample_map) + '_' + df['inducer'].astype(int).astype(str)
    return df.dropna()

folder = '009-intact-vs-gate-ecfs'
plate_map = read_map(pd.read_csv('datasets/experiment/{}/plate_map.csv'.format(folder)), 1)
raw_data = pd.read_csv('datasets/experiment/{}/raw.csv'.format(folder), skiprows=[0])
metadata = generate_metadata(raw_data['Well'], plate_map)
data = raw_data[raw_data['Well'].isin(metadata['Well'])].reset_index(drop=True)
datas = generate_data(data, metadata['short_name'], h=22, m=20, num_data=4)

In [18]:
datas[1]

short_name,e20x33_0,e20x33_1,e20x33_2,e20x33_3,e20x33_4,e20x33_5,e20x32gp411_0,e20x32gp411_1,e20x32gp411_2,e20x32gp411_3,e20x32gp411_4,e20x32gp411_5,e32x33_0,e32x33_1,e32x33_2,e32x33_3,e32x33_4,e32x33_5,e32x30SspGyrB_0,e32x30SspGyrB_1,e32x30SspGyrB_2,e32x30SspGyrB_3,e32x30SspGyrB_4,e32x30SspGyrB_5,e34x33_0,e34x33_1,e34x33_2,e34x33_3,e34x33_4,e34x33_5,e34x30MjaKlbA_0,e34x30MjaKlbA_1,e34x30MjaKlbA_2,e34x30MjaKlbA_3,e34x30MjaKlbA_4,e34x30MjaKlbA_5,e38x33_0,e38x33_1,e38x33_2,e38x33_3,e38x33_4,e38x33_5,e38x32gp418_0,e38x32gp418_1,e38x32gp418_2,e38x32gp418_3,e38x32gp418_4,e38x32gp418_5,e41x33_0,e41x33_1,e41x33_2,e41x33_3,e41x33_4,e41x33_5,e41x32NrdJ1_0,e41x32NrdJ1_1,e41x32NrdJ1_2,e41x32NrdJ1_3,e41x32NrdJ1_4,e41x32NrdJ1_5,e42x33_0,e42x33_1,e42x33_2,e42x33_3,e42x33_4,e42x33_5,e42x32STIMPDH1_0,e42x32STIMPDH1_1,e42x32STIMPDH1_2,e42x32STIMPDH1_3,e42x32STIMPDH1_4,e42x32STIMPDH1_5,Blank_Kan_1,Blank_Kan_1,negative-control-kan_1,negative-control-kan_1,Blank_Amp_1,Blank_Amp_1,negative-control-amp_1,negative-control-amp_1,positive-control-amp_1,positive-control-amp_1
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1,Unnamed: 82_level_1
0,375.5,291.5,303.5,302.5,262.5,276.5,243.5,154.5,164.5,141.5,160.5,160.5,378.5,324.5,318.5,343.5,339.5,270.5,270.5,215.5,220.5,190.5,179.5,208.5,312.5,268.5,287.5,293.5,247.5,248.5,246.5,176.5,166.5,142.5,168.5,204.5,242.5,216.5,247.5,207.5,184.5,163.5,211.5,169.5,156.5,105.5,161.5,191.5,219.5,189.5,179.5,186.5,158.5,170.5,164.5,139.5,131.5,86.5,112.5,161.5,223.5,194.5,192.5,184.5,185.5,197.5,163.5,100.5,101.5,48.5,84.5,155.5,,,229.5,172.5,,,476.5,385.5,11550.5,12216.5
20,557.5,1144.5,2314.5,3078.5,3097.5,2911.5,291.5,215.5,326.5,980.5,1139.5,1181.5,600.5,1293.5,2385.5,3296.5,3279.5,2816.5,390.5,286.5,379.5,1076.5,1427.5,1713.5,483.5,1409.5,2425.5,3076.5,3080.5,2939.5,336.5,280.5,284.5,319.5,362.5,539.5,285.5,369.5,552.5,665.5,658.5,525.5,298.5,235.5,247.5,236.5,314.5,382.5,294.5,550.5,952.5,1368.5,1410.5,1318.5,255.5,213.5,203.5,222.5,373.5,496.5,362.5,1972.5,4519.5,5352.5,5297.5,5252.5,259.5,193.5,293.5,1239.5,1606.5,1722.5,,,309.5,217.5,,,596.5,498.5,14295.5,15155.5
40,789.5,4150.5,9477.5,11714.5,11273.5,9728.5,437.5,378.5,1501.5,6089.5,6269.5,5812.5,843.5,4150.5,9620.5,11840.5,11534.5,9815.5,536.5,496.5,1121.5,6902.5,7799.5,8127.5,1052.5,6506.5,9609.5,10960.5,10103.5,8543.5,526.5,457.5,473.5,1342.5,2873.5,3493.5,507.5,1398.5,3377.5,3836.5,3301.5,2116.5,433.5,404.5,430.5,1810.5,2324.5,1928.5,555.5,2333.5,4947.5,6113.5,6097.5,5340.5,386.5,361.5,407.5,1296.5,3505.5,2846.5,647.5,4889.5,11942.5,13691.5,13708.5,13630.5,384.5,324.5,777.5,4637.5,4937.5,5265.5,,,461.5,394.5,,,790.5,666.5,19502.5,20473.5
60,1169.0,9258.0,23854.0,28119.0,26444.0,21716.0,627.0,646.0,4490.0,15699.0,14970.0,12736.0,1197.0,8470.0,22512.0,27244.0,26093.0,21020.0,742.0,691.0,3002.0,18112.0,18217.0,17062.0,1771.0,14751.0,21617.0,22913.0,20595.0,16348.0,755.0,672.0,776.0,4931.0,8851.0,9152.0,804.0,4627.0,10808.0,11138.0,9447.0,5508.0,620.0,580.0,993.0,7280.0,8091.0,5578.0,858.0,5807.0,12289.0,14632.0,14345.0,12176.0,587.0,561.0,665.0,4452.0,9840.0,6539.0,858.0,7038.0,18974.0,21388.0,21059.0,20882.0,538.0,497.0,1136.0,7872.0,7176.0,7901.0,,,641.0,534.0,,,1067.0,932.0,24420.0,25665.0
80,1472.5,12920.5,36685.5,44948.5,44249.5,38303.5,874.5,988.5,8767.5,27144.5,26732.5,21006.5,1589.5,11262.5,39977.5,46993.5,45396.5,38165.5,956.5,977.5,5294.5,32696.5,33340.5,28462.5,2627.5,25966.5,39134.5,40094.5,35074.5,26874.5,1049.5,982.5,1234.5,11919.5,17698.5,16271.5,1230.5,10670.5,23886.5,23780.5,20112.5,11341.5,913.5,863.5,1834.5,15801.5,18216.5,11796.5,1284.5,11061.5,23893.5,28179.5,27484.5,22701.5,787.5,810.5,1000.5,9442.5,19586.5,11473.5,1117.5,9123.5,26994.5,30384.5,29985.5,29542.5,743.5,715.5,1607.5,11756.5,9582.5,10303.5,,,871.5,761.5,,,1276.0,1140.0,28310.0,29801.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1260,5420.5,20719.5,75917.5,102587.5,105953.5,105732.5,5145.5,5428.5,19091.5,71885.5,100890.5,150232.5,5754.5,17872.5,94234.5,162469.5,169101.5,170226.5,4705.5,4837.5,11933.5,90281.5,142908.5,210943.5,6254.5,37321.5,128657.5,140313.5,141991.5,150547.5,4807.5,4728.5,5971.5,40412.5,74273.5,105583.5,5694.5,27045.5,71898.5,84001.5,68879.5,60056.5,4748.5,4682.5,7316.5,44087.5,57263.5,72054.5,5547.5,33769.5,102389.5,143363.5,144487.5,141250.5,4450.5,4492.5,5656.5,27785.5,61136.5,69198.5,5044.5,24635.5,111989.5,152901.5,155350.5,162662.5,4464.5,4580.5,7487.5,53608.5,88713.5,140080.5,,,4428.5,4256.5,,,5331.5,5175.5,69091.5,79131.5
1280,5446.0,20695.0,75854.0,100337.0,106219.0,105167.0,5186.0,5464.0,19204.0,71967.0,101180.0,150567.0,5782.0,17826.0,92992.0,161089.0,169248.0,168405.0,4764.0,4888.0,11992.0,90131.0,142762.0,210713.0,6232.0,37361.0,128473.0,140333.0,141086.0,148647.0,4826.0,4759.0,6017.0,40334.0,74087.0,104683.0,5789.0,26986.0,71804.0,83871.0,69202.0,60097.0,4749.0,4712.0,7346.0,43999.0,57329.0,72107.0,5610.0,33645.0,102320.0,142768.0,144368.0,140901.0,4485.0,4553.0,5719.0,27689.0,61308.0,69312.0,5119.0,24655.0,111727.0,153105.0,155236.0,162233.0,4490.0,4592.0,7499.0,53701.0,88651.0,140661.0,,,4435.0,4315.0,,,5361.0,5182.0,69454.0,79097.0
1300,5477.0,20642.0,75489.0,100489.0,105638.0,105188.0,5222.0,5483.0,19197.0,71977.0,101016.0,150327.0,5831.0,17850.0,92885.0,160957.0,167855.0,167879.0,4816.0,4895.0,11926.0,89752.0,142749.0,212216.0,6276.0,37410.0,128275.0,140458.0,140890.0,148739.0,4841.0,4798.0,6020.0,40252.0,74037.0,104452.0,5820.0,26868.0,71910.0,84041.0,69131.0,59856.0,4780.0,4747.0,7378.0,43866.0,57045.0,72178.0,5647.0,33692.0,102102.0,142493.0,143840.0,140200.0,4515.0,4542.0,5747.0,27569.0,61190.0,69579.0,5143.0,24763.0,111447.0,152694.0,155357.0,162610.0,4542.0,4645.0,7572.0,53548.0,88447.0,140257.0,,,4505.0,4350.0,,,5375.5,5239.5,69527.5,79646.5
1320,5564.0,20651.0,75365.0,100312.0,105518.0,104624.0,5323.0,5568.0,19338.0,71966.0,100944.0,150194.0,5892.0,17835.0,92485.0,160265.0,168073.0,168317.0,4844.0,4955.0,12088.0,89798.0,142525.0,210569.0,6313.0,37183.0,127643.0,140065.0,140585.0,148639.0,4927.0,4899.0,6059.0,40222.0,73967.0,105230.0,5913.0,26880.0,71802.0,83691.0,69025.0,60051.0,4858.0,4828.0,7454.0,43791.0,57164.0,71953.0,5673.0,33569.0,101698.0,141437.0,143776.0,139739.0,4553.0,4675.0,5856.0,27731.0,61025.0,69019.0,5179.0,24547.0,111120.0,152681.0,155192.0,162531.0,4605.0,4666.0,7652.0,53531.0,88403.0,140173.0,,,4593.0,4394.0,,,5464.0,5322.0,69659.0,79955.0


In [19]:
datas[0].to_csv('datasets/experiment/{}/ods.csv'.format(folder))
datas[1].to_csv('datasets/experiment/{}/fluos.csv'.format(folder))
datas[2].to_csv('datasets/experiment/{}/fluos-lower-1.csv'.format(folder))
datas[3].to_csv('datasets/experiment/{}/fluos-lower-2.csv'.format(folder))

#### Initial experiments

In [None]:
def read_map(plate_map, plate_idx):
    
    plate_map = plate_map.melt(id_vars=['group'])
    plate_map['variable'] = plate_map['variable'].astype(int)
    plate_map['Well'] = plate_map['group'] + plate_map['variable'].apply(lambda x: '{:02d}'.format(x))
    #plate_map.loc[~plate_map['group'].isin(['G', 'H']), 'cuma'] = plate_map['group'].apply(lambda x: ord(x)) - 65
    plate_map.loc[~plate_map['group'].isin(['H']), 'inducer'] = (plate_map['variable'] - 1) % 6
    plate_map.loc[plate_map['group'].isin(['H']), 'inducer'] = plate_idx
    #plate_map.loc[plate_map['group'].isin(['G', 'H']), 'ara'] = (plate_map['variable'] - 1) % 3
    #return plate_map[['Well', 'value', 'cuma', 'ara']]
    return plate_map[['Well', 'value', 'inducer']]

def generate_metadata(well, plate_map):
    
    sample_map = {
        'P62': 'pBAD/Ara',
        'P64': 'pCin/OHC14',
        'P66': 'pCymRC/Cuma',
        'P68': 'pLuxB/AHL',
        'P69': 'pPhlF/DAPG',
        'P70': 'pSalTTC/Sal',
        'P72': 'pRhaB/Rha',
        'E720': 'e11x33',
        'E721': 'e15x33',
        'E722': 'e16x33',
        'E723': 'e20x33',
        'E724': 'e32x33',
        'E725': 'e34x33',
        'E726': 'e38x33',
        'E727': 'e41x33',
        'E728': 'e42x33',
        'G726': 'e11x32STPhoRadA',
        'G721': 'e15x32NpuSspS2',
        'G722': 'e16x33NrdA2',
        'G723': 'e20x32gp411',
        'G724': 'e32x30SspGyrB',
        'G725': 'e34x30MjaKlbA',
        'G726': 'e38x32gp418',
        'G727': 'e41x32NrdJ1',
        'G728': 'e42x32STIMPDH1',
        'BK': 'Blank_Kan',
        'BA': 'Blank_Amp',
        '3K3-N': 'negative-control-kan',
        '4A3-N': 'negative-control-amp',
        '4A3-P': 'positive-control-amp'
    }
    df = pd.merge(well, plate_map, on='Well', how='left').dropna(subset=['value']).reset_index(drop=True)
    df['short_name'] = df['value'].map(sample_map) + '_' + df['inducer'].astype(int).astype(str)
    return df.dropna()

In [None]:
plate_map = read_map(pd.read_csv('datasets/experiment/plate-3/plate_map.csv'), 1)
raw_data = pd.read_csv('datasets/experiment/plate-3/raw.csv', skiprows=[0])
metadata = generate_metadata(raw_data['Well'], plate_map)
data = raw_data[raw_data['Well'].isin(metadata['Well'])].reset_index(drop=True)
datas = generate_data(data, metadata['short_name'], h=24, m=0, num_data=4)

In [None]:
temp = '''
ods1, fluos1, fluos2, fluos3 = [], [], [], []
for i in range(0, 140, 20):
    if i==20:
        continue
    ods1.append(pd.read_csv('datasets/experiment/plate-1/od-{}.csv'.format(i)).iloc[:,3])   
    fluos1.append(pd.read_csv('datasets/experiment/plate-1/fluo-{}.csv'.format(i)).iloc[:,3])
    fluos2.append(pd.read_csv('datasets/experiment/plate-1/fluo-{}.csv'.format(i)).iloc[:,4])
    fluos3.append(pd.read_csv('datasets/experiment/plate-1/fluo-{}.csv'.format(i)).iloc[:,5])
pd.concat(fluos3, axis=1).T.to_csv('datasets/experiment/plate-1/plate1-fluos3-add.csv', index=False)
'''

In [None]:
datas[0].to_csv('datasets/experiment/plate-3/plate3-ods.csv')
datas[1].to_csv('datasets/experiment/plate-3/plate3-fluos.csv')
datas[2].to_csv('datasets/experiment/plate-3/plate3-fluos-gain-lower-1.csv')
datas[3].to_csv('datasets/experiment/plate-3/plate3-fluos-gain-lower-2.csv')

### AND Induction Matrix

In [None]:
gates = ['e11x32STPhoRadA', 'e15x32NpuSspS2', 'e16x33NrdA2', 'e20x32gp411', 'e32x30SspGyrB', 'e34x30MjaKlbA',
         'e38x32gp418', 'e41x32NrdJ1', 'e42x32STIMPDH1', 'invalid']

In [None]:
def read_map(plate_map, plate_idx):
    
    plate_map = plate_map.melt(id_vars=['group'])
    plate_map['variable'] = plate_map['variable'].astype(int)
    plate_map['well'] = plate_map['group'] + plate_map['variable'].apply(lambda x: '{:02d}'.format(x))
    plate_map.loc[~plate_map['group'].isin(['G', 'H']), 'cuma'] = plate_map['group'].apply(lambda x: ord(x)) - 65
    plate_map.loc[~plate_map['group'].isin(['G', 'H']), 'ara'] = (plate_map['variable'] - 1) % 6
    plate_map.loc[plate_map['group'].isin(['G', 'H']), 'cuma'] = plate_idx
    plate_map.loc[plate_map['group'].isin(['G', 'H']), 'ara'] = (plate_map['variable'] - 1) % 3
    return plate_map[['well', 'value', 'cuma', 'ara']]

def generate_metadata(well, plate_map, gates):
    
    sample_map = {
        'E1': gates[0],
        'E2': gates[1],
        'P_3K3': 'positive_control_3K3',
        'P_4A3': 'positive_control_4AE',
        'N_3K3': 'negative_control_3K3',
        'N_4A3': 'negative_control_4AE',
        'B_K': 'blank_K',
        'B_A': 'blank_A'
    }
    df = pd.merge(well, plate_map, on='well', how='left').dropna(subset=['value']).reset_index(drop=True)
    df['short_name'] = df['value'].map(sample_map) + '_' + df['cuma'].astype(int).astype(str) + df['ara'].astype(int).astype(str)
    return df.dropna()

In [None]:
fluos, ods = [], []
for i in tqdm(range(1, 6)):
    plate_map = read_map(pd.read_csv('datasets/induction_matrix/plate_map.csv'), i)
    raw_data = pd.read_csv('datasets/induction_matrix/plate{}-fluo-od.csv'.format(i), skiprows=[0])
    metadata = generate_metadata(raw_data['well'], plate_map, gates[(i*2)-2:(i*2)])
    data = raw_data[raw_data['well'].isin(metadata['well'])].reset_index(drop=True)
    datas = generate_data(data, metadata['short_name'], h=24, num_data=1)
    fluos.append(datas[0])
    #ods.append(datas[1])
pd.concat(fluos, axis=1).to_csv('datasets/induction_matrix/avg_fluos.csv')
#pd.concat(ods, axis=1).to_csv('datasets/induction_matrix/avg_ods.csv')

### XOR Induction Matrix

In [None]:
gates = ['e20-33', 'e11-15']

In [None]:
def read_map(plate_map, plate_idx):
    
    plate_map = pd.read_csv('datasets/induction_matrix_xor/plate_map.csv')
    plate_map = plate_map.melt(id_vars=['group'])
    plate_map['variable'] = plate_map['variable'].astype(int)
    plate_map['well'] = plate_map['group'] + plate_map['variable'].apply(lambda x: '{:02d}'.format(x))
    plate_map.loc[~plate_map['group'].isin(['G', 'H']), 'ara'] = plate_map['group'].apply(lambda x: ord(x)) - 65
    plate_map.loc[~plate_map['group'].isin(['G', 'H']), 'rha'] = (plate_map['variable'] - 1) % 6
    plate_map.loc[plate_map['group'].isin(['G', 'H']), 'ara'] = plate_idx
    plate_map.loc[plate_map['group'].isin(['G', 'H']), 'rha'] = plate_idx
    return plate_map[['well', 'value', 'ara', 'rha']]

def generate_metadata(well, plate_map, gates):
    
    sample_map = {
        'E1': gates[0],
        'E2': gates[1]
    }
    df = pd.merge(well, plate_map, on='well', how='left').dropna(subset=['value']).reset_index(drop=True)
    df['short_name'] = df['value'].map(sample_map) + '_' + df['cuma'].astype(int).astype(str) + df['ara'].astype(int).astype(str)
    return df.dropna()

In [None]:
fluos, ods = [], []
for i in tqdm(range(1, 6)):
    plate_map = read_map(pd.read_csv('datasets/induction_matrix_xor/plate_map.csv'), i)
    raw_data = pd.read_csv('datasets/induction_matrix/plate{}-fluo-od.csv'.format(i), skiprows=[0])
    metadata = generate_metadata(raw_data['well'], plate_map, gates[(i*2)-2:(i*2)])
    data = raw_data[raw_data['well'].isin(metadata['well'])].reset_index(drop=True)
    datas = generate_data(data, metadata['short_name'], h=24, num_data=1)
    fluos.append(datas[0])
    #ods.append(datas[1])
pd.concat(fluos, axis=1).to_csv('datasets/induction_matrix/avg_fluos.csv')

In [None]:
plate_map

### Exercise

In [None]:
def read_map(plate_map):
    
    plate_map = plate_map.melt(id_vars=['group'])
    plate_map['variable'] = plate_map['variable'].astype(int)
    plate_map['well'] = plate_map['group'] + plate_map['variable'].apply(lambda x: "{:02d}".format(x))
    plate_map.loc[~plate_map['group'].isin(['C', 'D', 'G', 'H']), 'inducer'] = 0
    plate_map.loc[plate_map['group'].isin(['C', 'D', 'G', 'H']), 'inducer'] = 1
    return plate_map[['well', 'value', 'inducer']]

plate_map = read_map(pd.read_csv('datasets/exercise/plate_map.csv'))

In [None]:
def generate_metadata(well, plate_map):
    df = pd.merge(well, plate_map, on='well', how='left').dropna(subset=['value']).reset_index(drop=True)
    return df.dropna()

raw_data = pd.read_csv('datasets/exercise/first-exercise.csv', skiprows=[0])
metadata = generate_metadata(raw_data['well'], plate_map)
data = raw_data[raw_data['well'].isin(metadata['well'])].reset_index(drop=True)

In [None]:
fluo, od, fluo_half = generate_data(data, metadata['value'], 24, col='value')

In [None]:
i = 3 * 12
promoters = fluo.iloc[:,i:i+10]
#promoters
promoters.plot()

In [None]:
i = 2 * 12
promoters = od.iloc[:,i:i+9]
#promoters
for i in range(promoters.shape[1]):
    plt.plot(od.index/60, promoters.iloc[:, i], label=promoters.columns[i])
plt.ylabel('OD')
plt.xlabel('Hour')
plt.legend()
sns.despine()

In [None]:
a = []
ECF = od[['E720', 'E721', 'E722', 'E723', 'E724', 'E725', 'E726']]
for i in range(0, 32, 4):
    a.append(ECF.iloc[:,i+2:i+3])
ECFs = pd.concat(a, axis=1)
ECFs

In [None]:
for i in range(ECFs.shape[1]):
    plt.plot(ECFs.index/60, ECFs.iloc[:, i], label=ECFs.columns[i])
plt.ylabel('Fluo (a.u)')
plt.xlabel('Hour')
plt.legend()
sns.despine()

In [None]:
fluos = pd.read_csv('datasets/induction_matrix/induction_fluo.csv', index_col='time')
ods = pd.read_csv('datasets/induction_matrix/induction_od.csv', index_col='time')
gates = ['e11x32STPhoRadA', 'e15x32NpuSspS2', 'e16x33NrdA2', 'e20x32gp411', 'e32x30SspGyrB',
         'e34x30MjaKlbA', 'e38x32gp418', 'e41x32NrdJ1', 'e42x32STIMPDH1']
g = []
for gate in gates[:-2]:
    fluo = fluos['{}_55'.format(gate)]
    g.append(fluo)
    plt.plot(fluo.index/60, fluo, label=gate)
plt.legend()
sns.despine()

In [None]:
gdf = pd.concat(g, axis=1)
gdf

In [None]:
f, axs = plt.subplots(2, 5, sharex=False, sharey=False, figsize=(16, 5))
axr = axs.ravel()
for i, ax in enumerate(axr):
    if i < len(gates[:-2]):
        ax.plot(gdf.index/60, gdf.iloc[:,i], label='gate')
        ax.plot(ECFs.index/60, ECFs.iloc[:,i], label='intact')
        ax.set_title(gates[i])
        ax.legend(loc=4)
        sns.despine()
    else:
        ax.set_visible(False)
plt.tight_layout()

In [None]:
constructs = pd.read_csv('datasets/dictionary.csv')
constructs

In [None]:
constructs[constructs['short_name'].isin(gates)]

In [None]:
def parse_minutes(x):
    
    spl = x.split(' ')
    hours = int(spl[0]) * 60
    mins = int(spl[2]) if spl[2] != '' else 0
    return hours + mins

def transpose_data(df):
    
    df.set_index('short_name', inplace=True)
    df = df.transpose().reset_index()
    df['time'] = df['index'].apply(parse_minutes)
    df = df.set_index('time')
    df = df.drop('index', axis=1)
    return df

def generate_data(df, plate, name, h=20, m=0):
    
    start_idx = 3
    mid_idx = h * 3 + start_idx + 1 - m
    end_idx = h * 3 + mid_idx + 1 - m

    fluo = (df.iloc[:, start_idx:mid_idx]).astype(float)
    od = (df.iloc[:, mid_idx:end_idx]).astype(float)
    fluo_half = (df.iloc[:, end_idx:]).astype(float)
    
    fluo = pd.concat([name, fluo], axis=1)
    od = pd.concat([name, od], axis=1)
    fluo_half = pd.concat([name, fluo_half], axis=1)
    
    fluo = transpose_data(fluo)
    od = transpose_data(od)
    fluo_half = transpose_data(fluo_half)
    
    return fluo, od, fluo_half

In [None]:
def read_plate_data(df):
    
    df.columns = df.iloc[0]
    df.drop(df.index[0], inplace=True)
    df.dropna(inplace=True)
    return df.reset_index(drop=True)

read_plate_data()

### Marionette Strain

In [None]:
def read_plate_map(data):
    
    con = data.iloc[:,:2].reset_index()
    mar = data.iloc[:,2:].reset_index().melt(id_vars=['group'])
    mar['variable'] = mar['variable'].apply(lambda x: "{:02d}".format(int(x)))
    mar['well'] = mar['group'] + mar['variable']
    mar.drop(['group', 'variable'], axis=1, inplace=True)
    mar.rename(columns={'value':'id'}, inplace=True)
    return con, mar

con, mar = read_plate_map(pd.read_csv('datasets/mario_map.csv', index_col=['group']))

In [None]:
def read_plate_data(df):
    
    df.columns = df.iloc[0]
    df.drop(df.index[0], inplace=True)
    return df.reset_index(drop=True)

data = read_plate_data(pd.read_csv('datasets/marrionette.csv'))

In [None]:
merged = pd.merge(data['well'], mar, on='well', how='left')
merged = pd.merge(merged, constructs, on='id', how='left')
name = merged['short_name'].dropna().reset_index(drop=True) #just to make sure there is no null and indexing is correct

In [None]:
merged

In [None]:
mar_fluo, mar_od, mar_fluo_half = generate_data(data, mar, name, 24)
gates = name.unique().tolist()

In [None]:
def reformat_df(data):
    
    top10_wrapper = pd.DataFrame()
    mario_wrapper = pd.DataFrame()

    for gate in gates:

        top10 = data[gate].iloc[:,:4]
        top10.columns = [gate + '_00', gate + '_10', gate + '_01', gate + '_11']
        top10_wrapper = pd.concat([top10_wrapper, top10], axis=1)

        mario = data[gate].iloc[:,4:]
        mario.columns = [gate + '_00', gate + '_10', gate + '_01', gate + '_11']
        mario_wrapper = pd.concat([mario_wrapper, mario], axis=1)
        
    return top10_wrapper, mario_wrapper

In [None]:
top10_fluo, mario_fluo = reformat_df(mar_fluo)
#top10_od, mario_od = reformat_df(mar_od)

#mario_fluo.reset_index().to_csv('datasets/marionette_fluo_half.csv', index=False)
#mario_od.reset_index().to_csv('datasets/marionette_od.csv', index=False)

### ALL GATES

### Data from plate reader 1

In [None]:
def cleanse_plate(plate):
    plate = plate.melt(id_vars=['Unnamed: 0'])
    plate['value'] = plate['value'].apply(lambda x: x.split('.')[0])
    plate['variable'] = plate['variable'].apply(lambda x: "{:02d}".format(int(x)))
    plate['variable'] = plate['Unnamed: 0'] + plate['variable']
    plate.drop('Unnamed: 0', axis=1, inplace=True)
    return plate.reset_index(drop=True).rename(columns={'variable': 'Well', 'value': 'code_name'})

plate1 = cleanse_plate(pd.read_csv('datasets/plate1_map.csv'))
plate1

In [None]:
df11 = read_plate_data('and_gate_11') # up to 20h
print(df11.shape)
df12 = read_plate_data('and_gate_12') # up to 16h
print(df12.shape)
df13 = read_plate_data('and_gate_13') # up to 16h
print(df13.shape)

In [None]:
merged = pd.merge(df11['Well'], plate1, on='Well', how='left')
merged = pd.merge(merged, naming_map, on='code_name', how='left')
name1 = merged['short_name']

In [None]:
fluo11, od11, bulk_fluo11, fluo_half11, bulk_fluo_half11 = generate_data(df11, plate1, name1, 20)
fluo12, od12, bulk_fluo12, fluo_half12, bulk_fluo_half12 = generate_data(df12, plate1, name1, 16)
fluo13, od13, bulk_fluo13, fluo_half13, bulk_fluo_half13 = generate_data(df13, plate1, name1, 16)

In [None]:
def plot_all(data, num_row, num_col):
    f, axs = plt.subplots(num_row, num_col, sharex=True, sharey=False, figsize=(14, num_row*2))
    axr = axs.ravel()
    for i, ax in tqdm(enumerate(axr)):
        if i < data[0].shape[1]:
            for d in data:
                ax.plot(d.index/60, d.iloc[:, i])
            ax.set_title(data[0].columns[i])
            ax.set_xlabel('Time (h)')
        else:
            ax.set_visible(False)
    plt.tight_layout()
    sns.despine()
    
#plot bulk fluorescence data
plot_all([bulk_fluo_half11, bulk_fluo_half12, bulk_fluo_half13], 20, 5)

In [None]:
plot_all([od11, od12, od13], 20, 5)

In [None]:
bulk_fluo1 = (bulk_fluo11 + bulk_fluo12 + bulk_fluo13) / 3
fluo1 = (fluo11 + fluo12 + fluo13) / 3
od1 = (od11 + od12 + od13) / 3
fluo_half1 = (fluo_half11 + fluo_half12 + fluo_half13) / 3
bulk_fluo_half1 = (bulk_fluo_half11 + bulk_fluo_half12 + bulk_fluo_half13) / 3

In [None]:
bulk_fluo1.dropna().to_csv('datasets/bulk_fluo_plate_1_triplicate.csv')
fluo1.dropna().to_csv('datasets/fluo_plate_1_triplicate.csv')
od1.dropna().to_csv('datasets/od_plate_1_triplicate.csv')
bulk_fluo_half1.dropna().to_csv('datasets/bulk_fluo_half_plate_1_triplicate.csv')
fluo_half1.dropna().to_csv('datasets/fluo_half_plate_1_triplicate.csv')

In [None]:
bulk_fluo11.dropna().to_csv('datasets/bulk_fluo_plate_1_single.csv')
fluo11.dropna().to_csv('datasets/fluo_plate_1_single.csv')
od11.dropna().to_csv('datasets/od_plate_1_single.csv')
bulk_fluo_half11.dropna().to_csv('datasets/bulk_fluo_half_plate_1_single.csv')
fluo_half11.dropna().to_csv('datasets/fluo_half_plate_1_single.csv')

### Data from plate reader 2

In [None]:
plate2 = cleanse_plate(pd.read_csv('datasets/plate2_map.csv'))
df21 = read_plate_data('and_gate_21') # up to 20h
print(df21.shape)
df22 = read_plate_data('and_gate_22') # up to 16h
print(df22.shape)
df23 = read_plate_data('and_gate_23') # up to 16h
print(df23.shape)

In [None]:
merged = pd.merge(df21['Well'], plate2, on='Well', how='left')
merged = pd.merge(merged, naming_map, on='code_name', how='left')
name2 = merged['short_name']

In [None]:
fluo21, od21, bulk_fluo21, fluo_half21, bulk_fluo_half21 = generate_data(df21, plate2, name2, 21, 2)
fluo22, od22, bulk_fluo22, fluo_half22, bulk_fluo_half22 = generate_data(df22, plate2, name2, 22, 0)
fluo23, od23, bulk_fluo23, fluo_half23, bulk_fluo_half23 = generate_data(df23, plate2, name2, 22, 1)

In [None]:
plot_all([bulk_fluo_half21, bulk_fluo_half22, bulk_fluo_half23], 20, 5)

In [None]:
plot_all([od21, od22, od23], 20, 5)

In [None]:
bulk_fluo2 = (bulk_fluo21 + bulk_fluo22 + bulk_fluo23) / 3
fluo2 = (fluo21 + fluo22 + fluo23) / 3
od2 = (od21 + od22 + od23) / 3
fluo_half2 = (fluo_half21 + fluo_half22 + fluo_half23) / 3
bulk_fluo_half2 = (bulk_fluo_half21 + bulk_fluo_half22 + bulk_fluo_half23) / 3

In [None]:
bulk_fluo2.to_csv('datasets/bulk_fluo_plate_2_triplicate.csv')
fluo2.to_csv('datasets/fluo_plate_2_triplicate.csv')
fluo_half2.to_csv('datasets/fluo_half_plate_2_triplicate.csv')
bulk_fluo_half2.to_csv('datasets/bulk_fluo_half_plate_2_triplicate.csv')
od2.to_csv('datasets/od_plate_2_triplicate.csv')

In [None]:
bulk_fluo21.to_csv('datasets/bulk_fluo_plate_2_single.csv')
fluo21.to_csv('datasets/fluo_plate_2_single.csv')
fluo_half21.to_csv('datasets/fluo_half_plate_2_single.csv')
bulk_fluo_half21.to_csv('datasets/bulk_fluo_half_plate_2_single.csv')
od21.to_csv('datasets/od_plate_2_single.csv')

### Induction Matrix

In [None]:
induction = read_plate_data('induction') # up to 20h
print(induction.shape)

In [None]:
induction_plate = {
    'A01': 'A18', 'A02': 'A18', 'A03': 'A18', 
    'B01': 'A18', 'B02': 'A18', 'B03': 'A18', 
    'C01': 'A18', 'C02': 'A18', 'C03': 'A18', 
    'D01': 'A195', 'D02': 'A195', 'D03': 'A195', 
    'E01': 'A195', 'E02': 'A195', 'E03': 'A195', 
    'F01': 'A195', 'F02': 'A195', 'F03': 'A195', 
    'A04': 'A29', 'A05': 'A29', 'A06': 'A29', 
    'B04': 'A29', 'B05': 'A29', 'B06': 'A29', 
    'C04': 'A29', 'C05': 'A29', 'C06': 'A29', 
    'D04': 'A259', 'D05': 'A259', 'D06': 'A259', 
    'E04': 'A259', 'E05': 'A259', 'E06': 'A259', 
    'F04': 'A259', 'F05': 'A259', 'F06': 'A259', 
    'A07': 'A76', 'A08': 'A76', 'A09': 'A76', 
    'B07': 'A76', 'B08': 'A76', 'B09': 'A76', 
    'C07': 'A76', 'C08': 'A76', 'C09': 'A76', 
    'D07': 'A267', 'D08': 'A267', 'D09': 'A267', 
    'E07': 'A267', 'E08': 'A267', 'E09': 'A267', 
    'F07': 'A267', 'F08': 'A267', 'F09': 'A267', 
    'A10': 'A109', 'A11': 'A109', 'A12': 'A109', 
    'B10': 'A109', 'B11': 'A109', 'B12': 'A109', 
    'C10': 'A109', 'C11': 'A109', 'C12': 'A109', 
    'D10': 'A294', 'D11': 'A294', 'D12': 'A294', 
    'E10': 'A294', 'E11': 'A294', 'E12': 'A294', 
    'F10': 'A294', 'F11': 'A294', 'F12': 'A294', 
    'G01': 'A323', 'G02': 'A323', 'G03': 'A323', 
    'G04': 'A323', 'G05': 'A323', 'G06': 'A323', 
    'H01': 'A323', 'H02': 'A323', 'H03': 'A323', 
}
induction_row = {
    'A': 0, 'B': 1, 'C': 2, 'D': 0, 'E': 1, 'F': 2,
    'G': 0, 'H': 1 #column g needs to be manually adjusted later
}
induction_col = {
    '01': 0, '02': 1, '03': 2, '04': 0, '05': 1, '06': 2, 
    '07': 0, '08': 1, '09': 2, '10': 0, '11': 1, '12': 2 
}

In [None]:
index = induction.iloc[:,:3]
index['code_name'] = index['Well'].map(induction_plate)
col_idx = index['Well'].str[1:]
row_idx = index['Well'].str[:1]
index['ind1_lvl'] = row_idx.map(induction_row)
index['ind2_lvl'] = col_idx.map(induction_col)
index.loc[index['Well'].isin(['G04', 'G05', 'G06']), 'ind1_lvl'] = 2
#index = index.dropna()
index.loc[index['code_name'].isnull(), 'code_name'] = 'control'
name = pd.merge(index[['code_name', 'ind1_lvl', 'ind2_lvl']], naming_map[['code_name', 'short_name']], on='code_name', how='left')
name

In [None]:
index[index['code_name']=='A323']

In [None]:
def parse_minutes(x):
    spl = x.split(' ')
    hours = int(spl[0]) * 60
    mins = int(spl[2]) if spl[2] != '' else 0
    return hours + mins

def transpose_data(df_raw):
    df = df_raw.copy()
    df.set_index('short_name', inplace=True)
    #df = df.transpose().reset_index()
    #df['time'] = df['index'].apply(parse_minutes)
    #df = df.set_index('time')
    #df = df.drop('index', axis=1)
    return df

def generate_data_induce(df, h=24):
    
    start_idx = 3
    mid_idx = h * 3 + start_idx
    fluo = (df.iloc[:, start_idx:mid_idx]).astype(float)
    od = (df.iloc[:, mid_idx:]).astype(float)
    bulk_fluo = fluo * od
    
    fluo = pd.concat([name, fluo], axis=1)
    od = pd.concat([name, od], axis=1)
    bulk_fluo = pd.concat([name, bulk_fluo], axis=1)
    
    #bulk_fluo = transpose_data(bulk_fluo)
    #fluo = transpose_data(fluo)
    #od = transpose_data(od)
    
    return fluo, od, bulk_fluo

fluo, od, bulk_fluo = generate_data_induce(induction)
bulk_fluo

In [None]:
bulk_fluo[bulk_fluo['short_name']=='e11x32STPhoRadA']

In [None]:
bulk_fluo.to_csv('datasets/bulk_fluo_induction.csv', index=False)
fluo.to_csv('datasets/fluo_induction.csv', index=False)
od.to_csv('datasets/od_induction.csv', index=False)

### XOR Gate Top 10

In [None]:
def read_plate_map(df):
    data = pd.read_csv('datasets/xor_map.csv', index_col=['Group'])
    con = data.iloc[:,:2].reset_index()
    xor = data.iloc[:,2:]
    xor = xor.reset_index().melt(id_vars=['Group'])
    xor['variable'] = xor['variable'].apply(lambda x: "{:02d}".format(int(x)))
    xor['Well'] = xor['Group'] + xor['variable']
    xor.drop(['Group', 'variable'], axis=1, inplace=True)
    xor.rename(columns={'value':'code_name'}, inplace=True)
    return con, xor

con, xor = read_plate_map(pd.read_csv('datasets/xor_map.csv'))

In [None]:
def read_plate_data(df):
    df.columns = df.iloc[0]
    df.drop(df.index[0], inplace=True)
    #df.dropna(inplace=True)
    return df.reset_index(drop=True)

data = read_plate_data(pd.read_csv('datasets/xor_gate.csv'))
data.head()

In [None]:
merged = pd.merge(data['Well'], xor, on='Well', how='left')
#merged = pd.merge(merged, naming_map, on='code_name', how='left')
merged.rename(columns={'code_name': 'short_name'}, inplace=True)
name = merged['short_name'].dropna().reset_index(drop=True)

In [None]:
fluox, odx, fluo_halfx = generate_data(data, xor, name, 24)

In [None]:
cols = ['ECF20/33', 'ECF11/15']
fluo_xor = pd.DataFrame()
od_xor = pd.DataFrame()
for c in cols:
    temp = pd.concat([fluox[c].iloc[:,i] for i in range(0, 16, 4)], axis=1)
    temp.columns = [c + '_' + "{:02b}".format(int(i)) for i in range(4)]
    fluo_xor = pd.concat([fluo_xor, temp], axis=1)
    
    temp = pd.concat([odx[c].iloc[:,i] for i in range(0, 16, 4)], axis=1)
    temp.columns = [c + '_' + "{:02b}".format(int(i)) for i in range(4)]
    od_xor = pd.concat([od_xor, temp], axis=1)

In [None]:
fluo_xor.to_csv('datasets/bulk_fluo_xor.csv')
od_xor.to_csv('datasets/od_xor.csv')

### First-round Data

In [None]:
df = pd.read_csv('datasets/raw.csv')
df.columns = df.iloc[0]
df.drop(df.index[0], inplace=True)
df.dropna(inplace=True)
print(df.shape)
df.head()

In [None]:
map_ecf = {'Sample X1': 'e15',
'Sample X2': 'e22',
'Sample X3': 'e32',
'Sample X4': 'e33',
'Sample X5': 'e34',
'Sample X6': 'e41',
'Sample X7': 'e42',
'Sample X8': 'e15',
'Sample X9': 'e22',
'Sample X10': 'e38',
'Sample X11': 'e16',
'Sample X12': 'e33',
'Sample X13': 'e15',
'Sample X14': 'e16',
'Sample X15': 'e17',
'Sample X16': 'e20',
'Sample X17': 'e22',
'Sample X18': 'e26',
'Sample X19': 'e32',
'Sample X20': 'e33',
'Sample X21': 'e34'}
map_int = {'Sample X1': 'SspGyrB',
'Sample X2': 'SspGyrB',
'Sample X3': 'SspGyrB',
'Sample X4': 'SspGyrB',
'Sample X5': 'SspGyrB',
'Sample X6': 'SspGyrB',
'Sample X7': 'SspGyrB',
'Sample X8': 'TerThyXS2',
'Sample X9': 'TerThyXS2',
'Sample X10': 'TerThyXS2',
'Sample X11': 'TerThyXS1',
'Sample X12': 'TerThyXS1',
'Sample X13': 'STPhoRadA',
'Sample X14': 'STPhoRadA',
'Sample X15': 'STPhoRadA',
'Sample X16': 'STPhoRadA',
'Sample X17': 'STPhoRadA',
'Sample X18': 'STPhoRadA',
'Sample X19': 'STPhoRadA',
'Sample X20': 'STPhoRadA',
'Sample X21': 'STPhoRadA'}

In [None]:
df['ECF'] = df['Content'].map(map_ecf)
df['Intein'] = df['Content'].map(map_int)
df.loc[df['Group'].isin(['A', 'E']), 'Ara'] = 0
df.loc[df['Group'].isin(['A', 'E']), 'Cuma'] = 0
df.loc[df['Group'].isin(['B', 'F']), 'Ara'] = 1
df.loc[df['Group'].isin(['B', 'F']), 'Cuma'] = 0
df.loc[df['Group'].isin(['C', 'G']), 'Ara'] = 0
df.loc[df['Group'].isin(['C', 'G']), 'Cuma'] = 1
df.loc[df['Group'].isin(['D', 'H']), 'Ara'] = 1
df.loc[df['Group'].isin(['D', 'H']), 'Cuma'] = 1
df = pd.concat([df[['ECF', 'Intein', 'Ara', 'Cuma']],
                 df.drop(['ECF', 'Intein', 'Ara', 'Cuma', 'Well', 'Content', 'Group'], axis=1)],
                 axis=1)
df.head()

In [None]:
cols = list(range(0,77))
fluo = df.iloc[:, cols]
#fluo.to_csv('datasets/fluoOD-all.csv', index=False)
fluo.head()

In [None]:
cols = list(range(0,4)) + list(range(77,150))
od = df.iloc[:, cols]
#od.to_csv('datasets/OD-all.csv', index=False)
od.head()

In [None]:
def parse_minutes(x):
    spl = x.split(' ')
    hours = int(spl[0]) * 60
    mins = int(spl[2]) if spl[2] != '' else 0
    return hours + mins

fluo11 = fluo[(fluo['Ara']==1) & (fluo['Cuma']==0)]
fluo11['index'] = fluo11['ECF'] + fluo11['Intein']
fluo11.set_index('index', inplace=True)
fluo11.drop(['ECF', 'Intein', 'Ara', 'Cuma'], axis=1, inplace=True)
fluo11 = fluo11.transpose().reset_index()

fluo11['time'] = fluo11[0].apply(parse_minutes)
fluo11 = fluo11.set_index('time')
fluo11 = fluo11.drop(0, axis=1)
fluo11

In [None]:
od11 = od[(od['Ara']==1) & (od['Cuma']==0)]
od11['index'] = od11['ECF'] + od11['Intein']
od11.set_index('index', inplace=True)
od11.drop(['ECF', 'Intein', 'Ara', 'Cuma'], axis=1, inplace=True)
od11 = od11.transpose().reset_index()

od11['time'] = od11[0].apply(parse_minutes)
od11 = od11.set_index('time')
od11 = od11.drop(0, axis=1)
od11

In [None]:
for col in fluo11.columns:
    try:
        fluo11[col] = fluo11[col].astype(float)
    except:
        print(col)
        continue

In [None]:
for col in od11.columns:
    od11[col] = od11[col].astype(float)

In [None]:
fluo11.to_csv('datasets/fluo-10.csv')
od11.to_csv('datasets/od-10.csv')
(fluo11 * od11).to_csv('datasets/bulk-fluo-10.csv')

### Sequence to Function

In [None]:
filenames = sorted(os.listdir('datasets/sequences/'))
buffer = {}
for filename in tqdm(filenames):
    gb_file = "datasets/sequences/" + filename
    for gb_record in SeqIO.parse(open(gb_file,"r"), "genbank") :
        buffer[filename[:-3]] = str(gb_record.seq)
df = pd.DataFrame.from_dict(buffer, orient='index').reset_index()
df.columns = ['full_name', 'sequence']

In [None]:
constructs = pd.read_csv('datasets/constructs.csv')
constructs['full_name'] = constructs['full_name'].str.replace("*", "")
df2 = pd.merge(df, constructs, on="full_name", how="left")
df2 = df2.dropna()
df2 = df2[['id', 'short_name', 'full_name', 'sequence']]
fluo1 = pd.read_csv('datasets/bulk_fluo_plate_1_single.csv', index_col='time')
fluo2 = pd.read_csv('datasets/bulk_fluo_plate_2_single.csv', index_col='time')
fluo = pd.concat([fluo1, fluo2], axis=1)
data = fluo.T.reset_index().drop(1220, axis=1)
data.rename(columns={'index': 'short_name'}, inplace=True)
#data.columns = ['short_name', 'fluo_20h']
df_final = pd.merge(df2, data, on='short_name', how='left').dropna()
df_final

In [None]:
df_final.isnull().sum()

In [None]:
df_final.to_csv('datasets/sequence_data.csv')