In [1]:
import pandas as pd
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from tqdm import tqdm

In [24]:
def parse_minutes(x):
    
    spl = x.split('.')[0].split(' ')
    hours = int(spl[0]) * 60
    mins = int(spl[2]) if spl[2] != '' else 0
    return hours + mins

def read_map(plate_map, ctrl='H', num_inducers=1, induction_lvl=12, ctrl_replicate=2):
    
    plate_map = plate_map.melt(id_vars=['group'])
    plate_map['variable'] = plate_map['variable'].astype(int)
    plate_map['Well'] = plate_map['group'] + plate_map['variable'].apply(lambda x: '{:02d}'.format(x))
    #plate_map.loc[~plate_map['group'].isin([ctrl]), 'suffix'] = (plate_map['group'].apply(lambda x: ord(x)) - 65).astype(str) + ((plate_map['variable'] - 1) % induction_lvl).astype(str)
    plate_map.loc[~plate_map['group'].isin([ctrl]), 'suffix'] = (plate_map['variable'] - 1) % induction_lvl
    plate_map.loc[plate_map['group'].isin([ctrl]), 'suffix'] = (plate_map['variable'] - 1) % ctrl_replicate
    return plate_map[['Well', 'value', 'suffix']].dropna()

def generate_metadata(well, plate_map):
    
    dictionary = pd.read_csv('datasets/dictionary.csv')
    sample_map = {
        'P62': 'pBAD-rJ/Ara',
        'P63': 'araC-pBAD-rJ/Ara',
        'P64': 'pCin-rJ/OHC14',
        'P65': 'pCymRC-rJ/Cuma',
        'P66': 'cymRC-pCymRC-rJ/Cuma',
        'P66.1': 'cymRC-pCymRC-rJ/Cuma.1',
        'P67': 'pLux2-rJ/AHL',
        'P68': 'pLuxB-rJ/AHL',
        'P69': 'pPhlF-rJ/DAPG',
        'P70': 'pSalTTC-rJ/Sal',
        'P71': 'pVanCC-rJ/Van',
        'P72': 'rhaS-pRhaB-rJ/Rha',
        'P73': 'araC-pBAD/Ara',
        'E720': 'e11x33',
        'E721': 'e15x33',
        'E722': 'e16x33',
        'E723': 'e20x33',
        'E724': 'e32x33',
        'E725': 'e34x33',
        'E726': 'e38x33',
        'E727': 'e41x33',
        'E728': 'e42x33',
        'G726': 'e11x32STPhoRadA',
        'G721': 'e15x32NpuSspS2',
        'G722': 'e16x33NrdA2',
        'G723': 'e20x32gp411',
        'G724': 'e32x30SspGyrB',
        'G725': 'e34x30MjaKlbA',
        'G726': 'e38x32gp418',
        'G727': 'e41x32NrdJ1',
        'G728': 'e42x32STIMPDH1',
        'A65': 'e16x30SspGyrB',
        'A191': 'e32x33NrdJ1',
        'BK': 'blank-kan',
        'BA': 'blank-amp',
        '3K3-N': 'negative-control-kan',
        '4A3-N': 'negative-control-amp',
        '4A3-P': 'positive-control-amp'
    }
    #sample_map.update(control_map)
    df = pd.merge(well, plate_map, on='Well', how='left').dropna(subset=['value']).reset_index(drop=True)
    df['short_name'] = df['value'].map(sample_map) + '_' + df['suffix'].astype(int).astype(str)
    return df.dropna()


def transpose_data(df, col):
    
    df.set_index(col, inplace=True)
    df = df.transpose().reset_index()
    df['time'] = df['index'].apply(parse_minutes)
    df = df.set_index('time').drop('index', axis=1)
    return df

def generate_data(df, name, h=24, m=0, num_data=4, start_idx=3, col='short_name'):
    
    datas = []
    idxs = [start_idx]
    for i in range(1, num_data+1):
        idxs.append(h * 3 + idxs[i-1] + 1 + int(m/20))
        data = (df.iloc[:, idxs[i-1]:idxs[i]]).astype(float)
        data = pd.concat([name, data], axis=1)
        data = transpose_data(data, col)
        datas.append(data)
    return datas

In [27]:
folder = '031-new-promoters-characterization-rep3'
plate_map = read_map(pd.read_csv('datasets/experiment/{}/plate_map.csv'.format(folder)))
raw_data = pd.read_csv('datasets/experiment/{}/raw.csv'.format(folder), skiprows=[0])
metadata = generate_metadata(raw_data['Well'], plate_map)
data = raw_data[raw_data['Well'].isin(metadata['Well'])].reset_index(drop=True)
datas = generate_data(data, metadata['short_name'], h=18, m=40, num_data=8)

In [28]:
datas[0].to_csv('datasets/experiment/{}/ods.csv'.format(folder))
datas[1].to_csv('datasets/experiment/{}/fluos.csv'.format(folder))
datas[2].to_csv('datasets/experiment/{}/fluos-lower-1.csv'.format(folder))
datas[3].to_csv('datasets/experiment/{}/fluos-lower-2.csv'.format(folder))
datas[4].to_csv('datasets/experiment/{}/raw-ods.csv'.format(folder))
datas[5].to_csv('datasets/experiment/{}/raw-fluos.csv'.format(folder))
datas[6].to_csv('datasets/experiment/{}/raw-fluos-lower-1.csv'.format(folder))
datas[7].to_csv('datasets/experiment/{}/raw-fluos-lower-2.csv'.format(folder))