In [1]:
import pandas as pd
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from tqdm import tqdm

In [2]:
def parse_minutes(x):
    
    spl = x.split('.')[0].split(' ')
    hours = int(spl[0]) * 60
    mins = int(spl[2]) if spl[2] != '' else 0
    return hours + mins

def read_map(plate_map, ctrl='H', num_inducers=1, induction_lvl=12, ctrl_replicate=2):
    
    plate_map = plate_map.melt(id_vars=['group'])
    plate_map['variable'] = plate_map['variable'].astype(int)
    plate_map['Well'] = plate_map['group'] + plate_map['variable'].apply(lambda x: '{:02d}'.format(x))
    #plate_map.loc[~plate_map['group'].isin([ctrl]), 'suffix'] = (plate_map['group'].apply(lambda x: ord(x)) - 65).astype(str) + ((plate_map['variable'] - 1) % induction_lvl).astype(str)
    plate_map.loc[~plate_map['group'].isin([ctrl]), 'suffix'] = (plate_map['variable'] - 1) % induction_lvl
    plate_map.loc[plate_map['group'].isin([ctrl]), 'suffix'] = (plate_map['variable'] - 1) % ctrl_replicate
    return plate_map[['Well', 'value', 'suffix']].dropna()

def generate_metadata(well, plate_map):
    
    dictionary = pd.read_csv('datasets/dictionary.csv')
    sample_map = {
        'P62': 'pBAD-rJ/Ara',
        'P63': 'araC-pBAD-rJ/Ara',
        'P64': 'pCin-rJ/OHC14',
        'P65': 'pCymRC-rJ/Cuma',
        'P66': 'cymRC-pCymRC-rJ/Cuma',
        'P66.1': 'cymRC-pCymRC-rJ/Cuma.1',
        'P67': 'pLux2-rJ/AHL',
        'P68': 'pLuxB-rJ/AHL',
        'P69': 'pPhlF-rJ/DAPG',
        'P70': 'pSalTTC-rJ/Sal',
        'P71': 'pVanCC-rJ/Van',
        'P72': 'rhaS-pRhaB-rJ/Rha',
        'P73': 'araC-pBAD/Ara',
        'E720': 'e11x33',
        'E721': 'e15x33',
        'E722': 'e16x33',
        'E723': 'e20x33',
        'E724': 'e32x33',
        'E725': 'e34x33',
        'E726': 'e38x33',
        'E727': 'e41x33',
        'E728': 'e42x33',
        'G726': 'e11x32STPhoRadA',
        'G721': 'e15x32NpuSspS2',
        'G722': 'e16x33NrdA2',
        'G723': 'e20x32gp411',
        'G724': 'e32x30SspGyrB',
        'G725': 'e34x30MjaKlbA',
        'G726': 'e38x32gp418',
        'G727': 'e41x32NrdJ1',
        'G728': 'e42x32STIMPDH1',
        'A65': 'e16x30SspGyrB',
        'A191': 'e32x33NrdJ1',
        'BK': 'blank-kan',
        'BA': 'blank-amp',
        '3K3-N': 'negative-control-kan',
        '4A3-N': 'negative-control-amp',
        '4A3-P': 'positive-control-amp'
    }
    #sample_map.update(control_map)
    df = pd.merge(well, plate_map, on='Well', how='left').dropna(subset=['value']).reset_index(drop=True)
    df['short_name'] = df['value'].map(sample_map) + '_' + df['suffix'].astype(int).astype(str)
    return df.dropna()


def transpose_data(df, col):
    
    df.set_index(col, inplace=True)
    df = df.transpose().reset_index()
    df['time'] = df['index'].apply(parse_minutes)
    df = df.set_index('time').drop('index', axis=1)
    return df

def generate_data(df, name, h=24, m=0, num_data=4, start_idx=3, col='short_name'):
    
    datas = []
    idxs = [start_idx]
    for i in range(1, num_data+1):
        idxs.append(h * 3 + idxs[i-1] + 1 + int(m/20))
        data = (df.iloc[:, idxs[i-1]:idxs[i]]).astype(float)
        data = pd.concat([name, data], axis=1)
        data = transpose_data(data, col)
        datas.append(data)
    return datas

In [3]:
folder = '031-new-promoters-characterization-rep3'
plate_map = read_map(pd.read_csv('datasets/experiment/{}/plate_map.csv'.format(folder)))
raw_data = pd.read_csv('datasets/experiment/{}/raw.csv'.format(folder), skiprows=[0])
metadata = generate_metadata(raw_data['Well'], plate_map)
data = raw_data[raw_data['Well'].isin(metadata['Well'])].reset_index(drop=True)
datas = generate_data(data, metadata['short_name'], h=18, m=40, num_data=8)

In [5]:
datas[1].head()

short_name,araC-pBAD/Ara_0,araC-pBAD/Ara_1,araC-pBAD/Ara_2,araC-pBAD/Ara_3,araC-pBAD/Ara_4,araC-pBAD/Ara_5,araC-pBAD/Ara_6,araC-pBAD/Ara_7,araC-pBAD/Ara_8,araC-pBAD/Ara_9,araC-pBAD/Ara_10,araC-pBAD/Ara_11,pBAD-rJ/Ara_0,pBAD-rJ/Ara_1,pBAD-rJ/Ara_2,pBAD-rJ/Ara_3,pBAD-rJ/Ara_4,pBAD-rJ/Ara_5,pBAD-rJ/Ara_6,pBAD-rJ/Ara_7,pBAD-rJ/Ara_8,pBAD-rJ/Ara_9,pBAD-rJ/Ara_10,pBAD-rJ/Ara_11,araC-pBAD-rJ/Ara_0,araC-pBAD-rJ/Ara_1,araC-pBAD-rJ/Ara_2,araC-pBAD-rJ/Ara_3,araC-pBAD-rJ/Ara_4,araC-pBAD-rJ/Ara_5,araC-pBAD-rJ/Ara_6,araC-pBAD-rJ/Ara_7,araC-pBAD-rJ/Ara_8,araC-pBAD-rJ/Ara_9,araC-pBAD-rJ/Ara_10,araC-pBAD-rJ/Ara_11,cymRC-pCymRC-rJ/Cuma_0,cymRC-pCymRC-rJ/Cuma_1,cymRC-pCymRC-rJ/Cuma_2,cymRC-pCymRC-rJ/Cuma_3,cymRC-pCymRC-rJ/Cuma_4,cymRC-pCymRC-rJ/Cuma_5,cymRC-pCymRC-rJ/Cuma_6,cymRC-pCymRC-rJ/Cuma_7,cymRC-pCymRC-rJ/Cuma_8,cymRC-pCymRC-rJ/Cuma_9,cymRC-pCymRC-rJ/Cuma_10,cymRC-pCymRC-rJ/Cuma_11,pCymRC-rJ/Cuma_0,pCymRC-rJ/Cuma_1,pCymRC-rJ/Cuma_2,pCymRC-rJ/Cuma_3,pCymRC-rJ/Cuma_4,pCymRC-rJ/Cuma_5,pCymRC-rJ/Cuma_6,pCymRC-rJ/Cuma_7,pCymRC-rJ/Cuma_8,pCymRC-rJ/Cuma_9,pCymRC-rJ/Cuma_10,pCymRC-rJ/Cuma_11,pLux2-rJ/AHL_0,pLux2-rJ/AHL_1,pLux2-rJ/AHL_2,pLux2-rJ/AHL_3,pLux2-rJ/AHL_4,pLux2-rJ/AHL_5,pLux2-rJ/AHL_6,pLux2-rJ/AHL_7,pLux2-rJ/AHL_8,pLux2-rJ/AHL_9,pLux2-rJ/AHL_10,pLux2-rJ/AHL_11,cymRC-pCymRC-rJ/Cuma.1_0,cymRC-pCymRC-rJ/Cuma.1_1,cymRC-pCymRC-rJ/Cuma.1_2,cymRC-pCymRC-rJ/Cuma.1_3,cymRC-pCymRC-rJ/Cuma.1_4,cymRC-pCymRC-rJ/Cuma.1_5,cymRC-pCymRC-rJ/Cuma.1_6,cymRC-pCymRC-rJ/Cuma.1_7,cymRC-pCymRC-rJ/Cuma.1_8,cymRC-pCymRC-rJ/Cuma.1_9,cymRC-pCymRC-rJ/Cuma.1_10,cymRC-pCymRC-rJ/Cuma.1_11,blank-kan_0,blank-kan_1,negative-control-kan_0,negative-control-kan_1,blank-amp_0,blank-amp_1,negative-control-amp_0,negative-control-amp_1,positive-control-amp_0,positive-control-amp_1
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1,Unnamed: 82_level_1,Unnamed: 83_level_1,Unnamed: 84_level_1,Unnamed: 85_level_1,Unnamed: 86_level_1,Unnamed: 87_level_1,Unnamed: 88_level_1,Unnamed: 89_level_1,Unnamed: 90_level_1,Unnamed: 91_level_1,Unnamed: 92_level_1,Unnamed: 93_level_1,Unnamed: 94_level_1
0,22.0,35.0,19.0,-2.0,21.0,6.0,5.0,6.0,-1.0,4.0,27.0,5.0,-155.0,47.0,72.0,86.0,65.0,96.0,80.0,73.0,92.0,80.0,100.0,85.0,80.0,109.0,109.0,83.0,75.0,91.0,96.0,87.0,85.0,90.0,82.0,68.0,51.0,64.0,78.0,82.0,70.0,53.0,58.0,60.0,55.0,47.0,59.0,81.0,67.0,70.0,57.0,69.0,37.0,50.0,59.0,66.0,57.0,78.0,46.0,51.0,42.0,67.0,73.0,44.0,58.0,58.0,69.0,66.0,47.0,53.0,61.0,52.0,9.0,32.0,67.0,27.0,50.0,38.0,13.0,32.0,54.0,31.0,41.0,49.0,,,77.0,44.0,,,44.0,108.0,1042.0,829.0
20,29.0,32.0,42.0,37.0,16.0,47.0,14.0,15.0,7.0,6.0,31.0,17.0,-162.0,37.0,74.0,70.0,72.0,80.0,64.0,41.0,61.0,76.0,96.0,73.0,63.0,65.0,95.0,69.0,56.0,73.0,73.0,67.0,66.0,81.0,95.0,73.0,52.0,40.0,67.0,67.0,40.0,48.0,57.0,55.0,65.0,45.0,62.0,60.0,64.0,80.0,73.0,56.0,53.0,62.0,79.0,110.0,98.0,111.0,100.0,120.0,75.0,54.0,64.0,49.0,61.0,55.0,52.0,43.0,39.0,44.0,66.0,57.0,59.0,37.0,37.0,41.0,34.0,21.0,29.0,21.0,42.0,48.0,61.0,51.0,,,67.0,28.0,,,67.0,96.0,942.0,870.0
40,30.0,30.0,35.0,22.0,20.0,24.0,-3.0,15.0,1.0,4.0,9.0,16.0,-151.0,44.0,65.0,69.0,56.0,61.0,62.0,48.0,44.0,73.0,65.0,63.0,68.0,59.0,48.0,67.0,58.0,63.0,67.0,43.0,54.0,58.0,68.0,54.0,72.0,18.0,63.0,45.0,32.0,53.0,55.0,42.0,86.0,104.0,121.0,115.0,41.0,57.0,42.0,35.0,37.0,121.0,254.0,388.0,390.0,451.0,467.0,494.0,49.0,73.0,57.0,40.0,49.0,40.0,86.0,82.0,93.0,82.0,111.0,97.0,40.0,42.0,39.0,30.0,26.0,4.0,17.0,41.0,68.0,120.0,129.0,114.0,,,56.0,27.0,,,51.0,93.0,1268.0,1187.0
60,43.5,33.5,33.5,20.5,17.5,20.5,23.5,26.5,32.5,36.5,45.5,48.5,-145.5,66.5,67.5,67.5,76.5,79.5,66.5,47.5,58.5,62.5,86.5,77.5,57.5,77.5,84.5,65.5,53.5,85.5,63.5,63.5,61.5,70.5,77.5,69.5,79.5,41.5,68.5,56.5,53.5,57.5,55.5,69.5,142.5,178.5,221.5,223.5,61.5,46.5,47.5,35.5,55.5,138.5,436.5,778.5,819.5,1014.5,1041.5,1056.5,62.5,66.5,65.5,58.5,46.5,80.5,95.5,172.5,182.5,221.5,226.5,233.5,37.5,34.5,53.5,19.5,38.5,18.5,22.5,48.5,145.5,194.5,203.5,257.5,,,80.5,52.5,,,69.5,78.5,1709.5,1657.5
80,44.0,42.0,28.0,16.0,18.0,15.0,25.0,21.0,22.0,21.0,40.0,32.0,-145.0,58.0,79.0,56.0,60.0,70.0,83.0,39.0,65.0,55.0,52.0,78.0,69.0,68.0,82.0,68.0,51.0,72.0,68.0,61.0,76.0,53.0,73.0,70.0,61.0,24.0,49.0,60.0,35.0,60.0,59.0,92.0,161.0,236.0,258.0,321.0,63.0,37.0,67.0,51.0,48.0,142.0,449.0,985.0,1379.0,1787.0,1848.0,1920.0,63.0,53.0,59.0,50.0,42.0,87.0,161.0,286.0,356.0,375.0,407.0,426.0,60.0,38.0,46.0,21.0,22.0,6.0,23.0,49.0,156.0,267.0,303.0,351.0,,,77.0,50.0,,,78.5,80.5,2096.5,2112.5


In [18]:
(datas[5]['araC-pBAD/Ara_0'] - datas[5][['blank-kan_0', 'blank-kan_1']].mean(axis=1)).head()

time
0     22.0
20    29.0
40    30.0
60    43.5
80    44.0
dtype: float64

In [28]:
datas[0].to_csv('datasets/experiment/{}/ods.csv'.format(folder))
datas[1].to_csv('datasets/experiment/{}/fluos.csv'.format(folder))
datas[2].to_csv('datasets/experiment/{}/fluos-lower-1.csv'.format(folder))
datas[3].to_csv('datasets/experiment/{}/fluos-lower-2.csv'.format(folder))
datas[4].to_csv('datasets/experiment/{}/raw-ods.csv'.format(folder))
datas[5].to_csv('datasets/experiment/{}/raw-fluos.csv'.format(folder))
datas[6].to_csv('datasets/experiment/{}/raw-fluos-lower-1.csv'.format(folder))
datas[7].to_csv('datasets/experiment/{}/raw-fluos-lower-2.csv'.format(folder))