In [1]:
import os
from Bio import SeqIO
import pandas as pd
pd.set_option('display.max_columns', None)
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from tqdm import tqdm

In [3]:
constructs = pd.read_csv('datasets/constructs.csv')
constructs

Unnamed: 0,id,short_name,full_name
0,A1,e11x32M86,3K3-P11*-gfp-B15-P(ara)-sECF11*-SP1-P(Cym)-32-...
1,A2,e11x30STNpuSspS2,3K3-P11*-gfp-B15-P(ara)-sECF11*-SP2-P(Cym)-30-...
2,A3,e11x32NpuSspS2,3K3-P11*-gfp-B15-P(ara)-sECF11*-SP2-P(Cym)-32-...
3,A4,e11x30NpuSspS1,3K3-P11*-gfp-B15-P(ara)-sECF11*-SP2-P(Cym)-30-...
4,A5,e11x32gp411,3K3-P11*-gfp-B15-P(ara)-sECF11*-SP3-P(Cym)-32-...
...,...,...,...
360,3K-E422,gfp_med,
361,E422,gfp_low,
362,C+,positive_control,
363,C-,negative_control,


In [4]:
def parse_minutes(x):
    
    spl = x.split(' ')
    hours = int(spl[0]) * 60
    mins = int(spl[2]) if spl[2] != '' else 0
    return hours + mins

def read_plate_data(df):
    
    df.columns = df.iloc[0]
    df.drop(df.index[0], inplace=True)
    df.dropna(inplace=True)
    return df.reset_index(drop=True)

def transpose_data(df):
    
    df.set_index('short_name', inplace=True)
    df = df.transpose().reset_index()
    df['time'] = df['index'].apply(parse_minutes)
    df = df.set_index('time')
    df = df.drop('index', axis=1)
    return df

def generate_data(df, plate, name, h=20, m=0):
    
    start_idx = 3
    mid_idx = h * 3 + start_idx + 1 - m
    end_idx = h * 3 + mid_idx + 1 - m

    fluo = (df.iloc[:, start_idx:mid_idx]).astype(float)
    od = (df.iloc[:, mid_idx:end_idx]).astype(float)
    fluo_half = (df.iloc[:, end_idx:]).astype(float)
    
    fluo = pd.concat([name, fluo], axis=1)
    od = pd.concat([name, od], axis=1)
    fluo_half = pd.concat([name, fluo_half], axis=1)
    
    fluo = transpose_data(fluo)
    od = transpose_data(od)
    fluo_half = transpose_data(fluo_half)
    
    return fluo, od, fluo_half

### Marionette Strain

In [5]:
def read_plate_map(data):
    
    con = data.iloc[:,:2].reset_index()
    mar = data.iloc[:,2:].reset_index().melt(id_vars=['group'])
    mar['variable'] = mar['variable'].apply(lambda x: "{:02d}".format(int(x)))
    mar['well'] = mar['group'] + mar['variable']
    mar.drop(['group', 'variable'], axis=1, inplace=True)
    mar.rename(columns={'value':'id'}, inplace=True)
    return con, mar

con, mar = read_plate_map(pd.read_csv('datasets/mario_map.csv', index_col=['group']))

In [6]:
def read_plate_data(df):
    
    df.columns = df.iloc[0]
    df.drop(df.index[0], inplace=True)
    return df.reset_index(drop=True)

data = read_plate_data(pd.read_csv('datasets/marrionette.csv'))

In [7]:
merged = pd.merge(data['well'], mar, on='well', how='left')
merged = pd.merge(merged, constructs, on='id', how='left')
name = merged['short_name'].dropna().reset_index(drop=True) #just to make sure there is no null and indexing is correct

In [34]:
merged

Unnamed: 0,well,id,short_name,full_name
0,A01,A18,e11x32STPhoRadA,3K3-P11*-gfp-B15-P(ara)-sECF11*-SP11-P(Cym)-32...
1,A02,A29,e15x32NpuSspS2,3K3-P15*-gfp-B15-P(ara)-sECF15*-SP2-P(Cym)-32-...
2,A03,A76,e16x33NrdA2,3K3-P16*-gfp-B15-P(ara)-sECF16*-SP31-P(Cym)-33...
3,A04,A109,e20x32gp411,3K3-P20*-gfp-B15-P(ara)-sECF20*-SP3-P(Cym)-32-...
4,A05,A195,e32x30SspGyrB,3K3-P32*-gfp-B15-P(ara)-sECF32*-SP8-P(Cym)-30-...
...,...,...,...,...
91,H08,A294,e41x32NrdJ1,3K3-P41*-gfp-B15-P(ara)-sECF41*-SP5-P(Cym)-32-...
92,H09,A323,e42x32STIMPDH1,3K3-P42*-gfp-B15-P(ara)-sECF42*-SP6-P(Cym)-32-...
93,H10,C-,negative_control,
94,H11,C+,positive_control,


In [8]:
mar_fluo, mar_od, mar_fluo_half = generate_data(data, mar, name, 24)
gates = name.unique().tolist()

In [9]:
def reformat_df(data):
    
    top10_wrapper = pd.DataFrame()
    mario_wrapper = pd.DataFrame()

    for gate in gates:

        top10 = data[gate].iloc[:,:4]
        top10.columns = [gate + '_00', gate + '_10', gate + '_01', gate + '_11']
        top10_wrapper = pd.concat([top10_wrapper, top10], axis=1)

        mario = data[gate].iloc[:,4:]
        mario.columns = [gate + '_00', gate + '_10', gate + '_01', gate + '_11']
        mario_wrapper = pd.concat([mario_wrapper, mario], axis=1)
        
    return top10_wrapper, mario_wrapper

In [10]:
top10_fluo, mario_fluo = reformat_df(mar_fluo)
#top10_od, mario_od = reformat_df(mar_od)

#mario_fluo.reset_index().to_csv('datasets/marionette_fluo_half.csv', index=False)
#mario_od.reset_index().to_csv('datasets/marionette_od.csv', index=False)

### Induction Matrix

In [3]:
fluos = []
ods = []

In [4]:
gates = ['e11x32STPhoRadA', 'e15x32NpuSspS2', 'e16x33NrdA2', 'e20x32gp411', 'e32x30SspGyrB', 'e34x30MjaKlbA',
         'e38x32gp418', 'e41x32NrdJ1', 'e42x32STIMPDH1', 'positive_control', 'negative_control', 'blank']

In [5]:
def read_plate_map(raw_data):
    
    data = raw_data.melt(id_vars=['group'])
    data.rename(columns={'value':'id'}, inplace=True)
    data['variable'] = data['variable'].astype(int)
    data['well'] = data['group'] + data['variable'].apply(lambda x: "{:02d}".format(x))
    data['group'] = data['group'].apply(lambda x: ord(x)) #convert alphabet into number
    for i in range(65, 71):
        data.loc[data['group']==i, 'ara'] = i-65
    for i in range(1, 7):
        data.loc[data['variable'].isin([i, i+6]), 'cuma'] = i-1
    return data[['id', 'well', 'ara', 'cuma']]

plate_map = read_plate_map(pd.read_csv('datasets/induction_matrix/induction_map.csv'))

In [23]:
def read_plate_data(df, plate_map, gate, plate_number):
    
    df.columns = df.iloc[0]
    df.drop(df.index[0], inplace=True)
    df = df.reset_index(drop=True)
    sample_map = {
        'E1': gate,
        #'E2': gate[1],
        'P_3K3': 'positive_control_3K3',
        'P_4A3': 'positive_control_4AE',
        'N_3K3': 'negative_control_3K3',
        'N_4A3': 'negative_control_4AE',
        'B': 'blank'
    }
    df = pd.merge(df, plate_map, on='well', how='left').dropna(subset=['id']).reset_index(drop=True)
    df.loc[df['id'].isin(['P_3K3', 'P_4AE', 'N_3K3', 'N_4AE', 'B']), 'cuma'] = np.NaN
    df['short_name'] = df['id'].map(sample_map)
    sel = ['id', 'short_name', 'cuma', 'ara']
    df[['cuma', 'ara']] = df[['cuma', 'ara']].fillna(9).astype(int).astype(str)
    df.loc[df['ara']!='9', 'short_name'] = df['short_name'] + '_' + df['cuma'] + df['ara']
    df.loc[df['ara']=='9', 'short_name'] = df['short_name'] + '_' + str(plate_number)
    
    return df['short_name'], df.drop(sel, axis=1)

name, data = read_plate_data(pd.read_csv('datasets/induction_matrix/plate5.csv'), plate_map, gates[8], 5)

In [24]:
name = name.fillna('unknown_samples')

In [7]:
def transpose_data(df):
    
    df.set_index('short_name', inplace=True)
    df = df.transpose().reset_index()
    df['time'] = df['index'].apply(parse_minutes)
    df = df.set_index('time')
    df = df.drop('index', axis=1)
    return df

def generate_data_simple(df, name, h=20, m=0):
    
    start_idx = 3
    mid_idx = h * 3 + start_idx + 1 - m
    end_idx = h * 3 + mid_idx + 1 - m

    fluo = (df.iloc[:, start_idx:mid_idx]).astype(float)
    od = (df.iloc[:, mid_idx:end_idx]).astype(float)
    fluo_half = (df.iloc[:, end_idx:]).astype(float)
    
    fluo = pd.concat([name, fluo], axis=1)
    od = pd.concat([name, od], axis=1)
    fluo_half = pd.concat([name, fluo_half], axis=1)
    
    fluo = transpose_data(fluo)
    od = transpose_data(od)
    fluo_half = transpose_data(fluo_half)
    
    return fluo, od, fluo_half

In [25]:
im_fluo, im_od, im_fluo_half = generate_data_simple(data, name, 24)

In [26]:
im_fluo

short_name,e42x32STIMPDH1_00,e42x32STIMPDH1_10,e42x32STIMPDH1_20,e42x32STIMPDH1_30,e42x32STIMPDH1_40,e42x32STIMPDH1_50,unknown_samples,unknown_samples,unknown_samples,e42x32STIMPDH1_01,e42x32STIMPDH1_11,e42x32STIMPDH1_21,e42x32STIMPDH1_31,e42x32STIMPDH1_41,e42x32STIMPDH1_51,unknown_samples,unknown_samples,unknown_samples,e42x32STIMPDH1_02,e42x32STIMPDH1_12,e42x32STIMPDH1_22,e42x32STIMPDH1_32,e42x32STIMPDH1_42,e42x32STIMPDH1_52,unknown_samples,unknown_samples,unknown_samples,e42x32STIMPDH1_03,e42x32STIMPDH1_13,e42x32STIMPDH1_23,e42x32STIMPDH1_33,e42x32STIMPDH1_43,e42x32STIMPDH1_53,unknown_samples,unknown_samples,unknown_samples,e42x32STIMPDH1_04,e42x32STIMPDH1_14,e42x32STIMPDH1_24,e42x32STIMPDH1_34,e42x32STIMPDH1_44,e42x32STIMPDH1_54,unknown_samples,unknown_samples,unknown_samples,e42x32STIMPDH1_05,e42x32STIMPDH1_15,e42x32STIMPDH1_25,e42x32STIMPDH1_35,e42x32STIMPDH1_45,e42x32STIMPDH1_55,unknown_samples,unknown_samples,unknown_samples,negative_control_3K3_5,negative_control_3K3_5,negative_control_3K3_5,positive_control_3K3_5,positive_control_3K3_5,positive_control_3K3_5,negative_control_4AE_5,negative_control_4AE_5,negative_control_4AE_5,positive_control_4AE_5,positive_control_4AE_5,positive_control_4AE_5,blank_5,blank_5,blank_5,blank_5,blank_5,blank_5
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1
0,37.000,37.000,67.000,63.000,64.000,94.000,116.000,71.000,,24.000,74.000,72.000,70.000,96.000,99.000,80.000,108.000,,57.000,71.000,86.000,75.000,101.000,101.000,124.000,103.000,,28.000,75.000,86.000,84.000,113.000,121.000,125.000,91.000,,84.000,81.000,62.000,89.000,94.000,106.000,115.000,94.000,,22.000,38.000,94.000,66.000,64.000,85.000,68.000,84.000,,42.000,62.000,69.000,5234.000,5420.000,5881.000,75.333,95.333,78.333,1263.333,871.333,912.333,,,,,,
20,35.333,34.333,61.333,69.333,60.333,71.333,77.333,63.333,,26.333,57.333,76.333,42.333,86.333,80.333,58.333,80.333,,37.333,67.333,64.333,85.333,83.333,94.333,97.333,81.333,,27.333,50.333,92.333,108.333,102.333,104.333,105.333,80.333,,55.333,66.333,51.333,81.333,105.333,93.333,115.333,94.333,,44.333,58.333,77.333,99.333,47.333,62.333,73.333,94.333,,91.333,81.333,107.333,4258.333,4153.333,4171.333,82.667,81.667,61.667,773.667,763.667,765.667,,,,,,
40,39.333,35.333,75.333,57.333,52.333,81.333,63.333,90.333,,34.333,52.333,42.333,63.333,72.333,70.333,63.333,65.333,,54.333,61.333,79.333,69.333,85.333,75.333,80.333,93.333,,38.333,40.333,69.333,76.333,88.333,71.333,97.333,61.333,,94.333,49.333,54.333,73.333,89.333,67.333,68.333,74.333,,24.333,62.333,60.333,71.333,52.333,44.333,79.333,61.333,,67.333,63.333,95.333,4564.333,4358.333,4402.333,88.667,74.667,69.667,1033.667,972.667,1025.667,,,,,,
60,60.333,36.333,82.333,58.333,62.333,75.333,75.333,79.333,,41.333,49.333,60.333,73.333,72.333,106.333,70.333,77.333,,38.333,53.333,77.333,71.333,97.333,85.333,87.333,81.333,,48.333,73.333,87.333,78.333,86.333,126.333,68.333,74.333,,62.333,51.333,57.333,76.333,84.333,57.333,78.333,68.333,,69.333,56.333,54.333,61.333,91.333,95.333,63.333,57.333,,94.333,102.333,108.333,4839.333,4662.333,4709.333,78.000,93.000,52.000,1368.000,1304.000,1393.000,,,,,,
80,53.000,53.000,68.000,63.000,78.000,74.000,87.000,79.000,,25.000,61.000,66.000,68.000,86.000,92.000,83.000,73.000,,77.000,74.000,93.000,82.000,71.000,83.000,92.000,83.000,,57.000,62.000,90.000,113.000,115.000,115.000,78.000,71.000,,65.000,75.000,63.000,90.000,84.000,77.000,101.000,65.000,,52.000,64.000,106.000,89.000,66.000,71.000,73.000,78.000,,77.000,103.000,103.000,5319.000,5108.000,5140.000,103.000,89.000,109.000,1802.000,1729.000,1768.000,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1360,4058.333,3942.333,4135.333,4265.333,4080.333,3860.333,3686.333,3550.333,,4038.333,6386.333,9120.333,8352.333,7359.333,7007.333,7124.333,7674.333,,4060.333,8268.333,21409.333,21330.333,19358.333,20682.333,22340.333,25277.333,,4067.333,7841.333,29916.333,35240.333,35316.333,38905.333,43718.333,47811.333,,4073.333,6672.333,33560.333,43607.333,43127.333,50855.333,59208.333,63689.333,,3995.333,6115.333,34286.333,49304.333,53749.333,57291.333,69811.333,73298.333,,3852.333,3661.333,3653.333,146708.333,145890.333,144766.333,3989.000,4001.000,3922.000,44430.000,58158.000,59290.000,,,,,,
1380,4042.000,3977.000,4173.000,4276.000,4156.000,3891.000,3722.000,3553.000,,4059.000,6470.000,9177.000,8443.000,7402.000,6966.000,7157.000,7617.000,,4108.000,8262.000,21239.000,21300.000,19458.000,20635.000,22332.000,25274.000,,4174.000,7855.000,30142.000,35102.000,35246.000,38748.000,43772.000,47712.000,,4094.000,6737.000,33442.000,43324.000,43022.000,50644.000,58720.000,63378.000,,4060.000,6151.000,34418.000,49122.000,53557.000,56698.000,68951.000,73046.000,,3891.000,3638.000,3717.000,146979.000,145881.000,144266.000,4010.000,3955.000,3963.000,44653.000,59016.000,62068.000,,,,,,
1400,4091.667,3989.667,4205.667,4291.667,4207.667,3916.667,3728.667,3598.667,,4091.667,6510.667,9200.667,8442.667,7401.667,7007.667,7136.667,7721.667,,4105.667,8315.667,21266.667,21286.667,19411.667,20504.667,22248.667,25147.667,,4194.667,7851.667,30017.667,35080.667,35092.667,38616.667,43546.667,47765.667,,4151.667,6787.667,33542.667,43429.667,42625.667,50260.667,58483.667,63011.667,,4098.667,6176.667,34279.667,49072.667,53254.667,56573.667,69084.667,72700.667,,3945.667,3690.667,3761.667,146531.667,146030.667,144139.667,4026.667,4019.667,4043.667,44636.667,66508.667,59605.667,,,,,,
1420,4137.000,4037.000,4234.000,4345.000,4235.000,3961.000,3805.000,3654.000,,4155.000,6491.000,9247.000,8437.000,7441.000,7036.000,7162.000,7682.000,,4180.000,8350.000,21281.000,21335.000,19329.000,20518.000,22185.000,25111.000,,4203.000,7882.000,30039.000,35018.000,35086.000,38643.000,43516.000,47645.000,,4199.000,6818.000,33477.000,43515.000,42744.000,50164.000,58225.000,62898.000,,4156.000,6211.000,34108.000,48803.000,53215.000,56475.000,68891.000,72709.000,,3972.000,3760.000,3784.000,147101.000,146154.000,144443.000,4109.000,4120.000,4040.000,44816.000,60889.000,61878.000,,,,,,


In [27]:
fluos.append(im_fluo)
ods.append(im_od)

In [28]:
len(fluos)

5

In [29]:
fluos2 = pd.concat(fluos, axis=1)
ods2 = pd.concat(ods, axis=1)

In [30]:
fluos2.to_csv('datasets/induction_matrix/induction_fluo.csv')
ods2.to_csv('datasets/induction_matrix/induction_od.csv')

### ALL GATES

### Data from plate reader 1

In [5]:
def cleanse_plate(plate):
    plate = plate.melt(id_vars=['Unnamed: 0'])
    plate['value'] = plate['value'].apply(lambda x: x.split('.')[0])
    plate['variable'] = plate['variable'].apply(lambda x: "{:02d}".format(int(x)))
    plate['variable'] = plate['Unnamed: 0'] + plate['variable']
    plate.drop('Unnamed: 0', axis=1, inplace=True)
    return plate.reset_index(drop=True).rename(columns={'variable': 'Well', 'value': 'code_name'})

plate1 = cleanse_plate(pd.read_csv('datasets/plate1_map.csv'))
plate1

Unnamed: 0,Well,code_name
0,A01,A5
1,B01,A37
2,C01,A59
3,D01,A79
4,E01,A95
...,...,...
91,D12,A94
92,E12,A105
93,F12,A121
94,G12,A137


In [6]:
df11 = read_plate_data('and_gate_11') # up to 20h
print(df11.shape)
df12 = read_plate_data('and_gate_12') # up to 16h
print(df12.shape)
df13 = read_plate_data('and_gate_13') # up to 16h
print(df13.shape)

AttributeError: 'str' object has no attribute 'iloc'

In [None]:
merged = pd.merge(df11['Well'], plate1, on='Well', how='left')
merged = pd.merge(merged, naming_map, on='code_name', how='left')
name1 = merged['short_name']

In [None]:
fluo11, od11, bulk_fluo11, fluo_half11, bulk_fluo_half11 = generate_data(df11, plate1, name1, 20)
fluo12, od12, bulk_fluo12, fluo_half12, bulk_fluo_half12 = generate_data(df12, plate1, name1, 16)
fluo13, od13, bulk_fluo13, fluo_half13, bulk_fluo_half13 = generate_data(df13, plate1, name1, 16)

In [None]:
def plot_all(data, num_row, num_col):
    f, axs = plt.subplots(num_row, num_col, sharex=True, sharey=False, figsize=(14, num_row*2))
    axr = axs.ravel()
    for i, ax in tqdm(enumerate(axr)):
        if i < data[0].shape[1]:
            for d in data:
                ax.plot(d.index/60, d.iloc[:, i])
            ax.set_title(data[0].columns[i])
            ax.set_xlabel('Time (h)')
        else:
            ax.set_visible(False)
    plt.tight_layout()
    sns.despine()
    
#plot bulk fluorescence data
plot_all([bulk_fluo_half11, bulk_fluo_half12, bulk_fluo_half13], 20, 5)

In [None]:
plot_all([od11, od12, od13], 20, 5)

In [None]:
bulk_fluo1 = (bulk_fluo11 + bulk_fluo12 + bulk_fluo13) / 3
fluo1 = (fluo11 + fluo12 + fluo13) / 3
od1 = (od11 + od12 + od13) / 3
fluo_half1 = (fluo_half11 + fluo_half12 + fluo_half13) / 3
bulk_fluo_half1 = (bulk_fluo_half11 + bulk_fluo_half12 + bulk_fluo_half13) / 3

In [None]:
bulk_fluo1.dropna().to_csv('datasets/bulk_fluo_plate_1_triplicate.csv')
fluo1.dropna().to_csv('datasets/fluo_plate_1_triplicate.csv')
od1.dropna().to_csv('datasets/od_plate_1_triplicate.csv')
bulk_fluo_half1.dropna().to_csv('datasets/bulk_fluo_half_plate_1_triplicate.csv')
fluo_half1.dropna().to_csv('datasets/fluo_half_plate_1_triplicate.csv')

In [None]:
bulk_fluo11.dropna().to_csv('datasets/bulk_fluo_plate_1_single.csv')
fluo11.dropna().to_csv('datasets/fluo_plate_1_single.csv')
od11.dropna().to_csv('datasets/od_plate_1_single.csv')
bulk_fluo_half11.dropna().to_csv('datasets/bulk_fluo_half_plate_1_single.csv')
fluo_half11.dropna().to_csv('datasets/fluo_half_plate_1_single.csv')

### Data from plate reader 2

In [None]:
plate2 = cleanse_plate(pd.read_csv('datasets/plate2_map.csv'))
df21 = read_plate_data('and_gate_21') # up to 20h
print(df21.shape)
df22 = read_plate_data('and_gate_22') # up to 16h
print(df22.shape)
df23 = read_plate_data('and_gate_23') # up to 16h
print(df23.shape)

In [None]:
merged = pd.merge(df21['Well'], plate2, on='Well', how='left')
merged = pd.merge(merged, naming_map, on='code_name', how='left')
name2 = merged['short_name']

In [None]:
fluo21, od21, bulk_fluo21, fluo_half21, bulk_fluo_half21 = generate_data(df21, plate2, name2, 21, 2)
fluo22, od22, bulk_fluo22, fluo_half22, bulk_fluo_half22 = generate_data(df22, plate2, name2, 22, 0)
fluo23, od23, bulk_fluo23, fluo_half23, bulk_fluo_half23 = generate_data(df23, plate2, name2, 22, 1)

In [None]:
plot_all([bulk_fluo_half21, bulk_fluo_half22, bulk_fluo_half23], 20, 5)

In [None]:
plot_all([od21, od22, od23], 20, 5)

In [None]:
bulk_fluo2 = (bulk_fluo21 + bulk_fluo22 + bulk_fluo23) / 3
fluo2 = (fluo21 + fluo22 + fluo23) / 3
od2 = (od21 + od22 + od23) / 3
fluo_half2 = (fluo_half21 + fluo_half22 + fluo_half23) / 3
bulk_fluo_half2 = (bulk_fluo_half21 + bulk_fluo_half22 + bulk_fluo_half23) / 3

In [None]:
bulk_fluo2.to_csv('datasets/bulk_fluo_plate_2_triplicate.csv')
fluo2.to_csv('datasets/fluo_plate_2_triplicate.csv')
fluo_half2.to_csv('datasets/fluo_half_plate_2_triplicate.csv')
bulk_fluo_half2.to_csv('datasets/bulk_fluo_half_plate_2_triplicate.csv')
od2.to_csv('datasets/od_plate_2_triplicate.csv')

In [None]:
bulk_fluo21.to_csv('datasets/bulk_fluo_plate_2_single.csv')
fluo21.to_csv('datasets/fluo_plate_2_single.csv')
fluo_half21.to_csv('datasets/fluo_half_plate_2_single.csv')
bulk_fluo_half21.to_csv('datasets/bulk_fluo_half_plate_2_single.csv')
od21.to_csv('datasets/od_plate_2_single.csv')

### Induction Matrix

In [None]:
induction = read_plate_data('induction') # up to 20h
print(induction.shape)

In [None]:
induction_plate = {
    'A01': 'A18', 'A02': 'A18', 'A03': 'A18', 
    'B01': 'A18', 'B02': 'A18', 'B03': 'A18', 
    'C01': 'A18', 'C02': 'A18', 'C03': 'A18', 
    'D01': 'A195', 'D02': 'A195', 'D03': 'A195', 
    'E01': 'A195', 'E02': 'A195', 'E03': 'A195', 
    'F01': 'A195', 'F02': 'A195', 'F03': 'A195', 
    'A04': 'A29', 'A05': 'A29', 'A06': 'A29', 
    'B04': 'A29', 'B05': 'A29', 'B06': 'A29', 
    'C04': 'A29', 'C05': 'A29', 'C06': 'A29', 
    'D04': 'A259', 'D05': 'A259', 'D06': 'A259', 
    'E04': 'A259', 'E05': 'A259', 'E06': 'A259', 
    'F04': 'A259', 'F05': 'A259', 'F06': 'A259', 
    'A07': 'A76', 'A08': 'A76', 'A09': 'A76', 
    'B07': 'A76', 'B08': 'A76', 'B09': 'A76', 
    'C07': 'A76', 'C08': 'A76', 'C09': 'A76', 
    'D07': 'A267', 'D08': 'A267', 'D09': 'A267', 
    'E07': 'A267', 'E08': 'A267', 'E09': 'A267', 
    'F07': 'A267', 'F08': 'A267', 'F09': 'A267', 
    'A10': 'A109', 'A11': 'A109', 'A12': 'A109', 
    'B10': 'A109', 'B11': 'A109', 'B12': 'A109', 
    'C10': 'A109', 'C11': 'A109', 'C12': 'A109', 
    'D10': 'A294', 'D11': 'A294', 'D12': 'A294', 
    'E10': 'A294', 'E11': 'A294', 'E12': 'A294', 
    'F10': 'A294', 'F11': 'A294', 'F12': 'A294', 
    'G01': 'A323', 'G02': 'A323', 'G03': 'A323', 
    'G04': 'A323', 'G05': 'A323', 'G06': 'A323', 
    'H01': 'A323', 'H02': 'A323', 'H03': 'A323', 
}
induction_row = {
    'A': 0, 'B': 1, 'C': 2, 'D': 0, 'E': 1, 'F': 2,
    'G': 0, 'H': 1 #column g needs to be manually adjusted later
}
induction_col = {
    '01': 0, '02': 1, '03': 2, '04': 0, '05': 1, '06': 2, 
    '07': 0, '08': 1, '09': 2, '10': 0, '11': 1, '12': 2 
}

In [None]:
index = induction.iloc[:,:3]
index['code_name'] = index['Well'].map(induction_plate)
col_idx = index['Well'].str[1:]
row_idx = index['Well'].str[:1]
index['ind1_lvl'] = row_idx.map(induction_row)
index['ind2_lvl'] = col_idx.map(induction_col)
index.loc[index['Well'].isin(['G04', 'G05', 'G06']), 'ind1_lvl'] = 2
#index = index.dropna()
index.loc[index['code_name'].isnull(), 'code_name'] = 'control'
name = pd.merge(index[['code_name', 'ind1_lvl', 'ind2_lvl']], naming_map[['code_name', 'short_name']], on='code_name', how='left')
name

In [None]:
index[index['code_name']=='A323']

In [None]:
def parse_minutes(x):
    spl = x.split(' ')
    hours = int(spl[0]) * 60
    mins = int(spl[2]) if spl[2] != '' else 0
    return hours + mins

def transpose_data(df_raw):
    df = df_raw.copy()
    df.set_index('short_name', inplace=True)
    #df = df.transpose().reset_index()
    #df['time'] = df['index'].apply(parse_minutes)
    #df = df.set_index('time')
    #df = df.drop('index', axis=1)
    return df

def generate_data_induce(df, h=24):
    
    start_idx = 3
    mid_idx = h * 3 + start_idx
    fluo = (df.iloc[:, start_idx:mid_idx]).astype(float)
    od = (df.iloc[:, mid_idx:]).astype(float)
    bulk_fluo = fluo * od
    
    fluo = pd.concat([name, fluo], axis=1)
    od = pd.concat([name, od], axis=1)
    bulk_fluo = pd.concat([name, bulk_fluo], axis=1)
    
    #bulk_fluo = transpose_data(bulk_fluo)
    #fluo = transpose_data(fluo)
    #od = transpose_data(od)
    
    return fluo, od, bulk_fluo

fluo, od, bulk_fluo = generate_data_induce(induction)
bulk_fluo

In [None]:
bulk_fluo[bulk_fluo['short_name']=='e11x32STPhoRadA']

In [None]:
bulk_fluo.to_csv('datasets/bulk_fluo_induction.csv', index=False)
fluo.to_csv('datasets/fluo_induction.csv', index=False)
od.to_csv('datasets/od_induction.csv', index=False)

### XOR Gate Top 10

In [None]:
def read_plate_map(df):
    data = pd.read_csv('datasets/xor_map.csv', index_col=['Group'])
    con = data.iloc[:,:2].reset_index()
    xor = data.iloc[:,2:]
    xor = xor.reset_index().melt(id_vars=['Group'])
    xor['variable'] = xor['variable'].apply(lambda x: "{:02d}".format(int(x)))
    xor['Well'] = xor['Group'] + xor['variable']
    xor.drop(['Group', 'variable'], axis=1, inplace=True)
    xor.rename(columns={'value':'code_name'}, inplace=True)
    return con, xor

con, xor = read_plate_map(pd.read_csv('datasets/xor_map.csv'))

In [None]:
def read_plate_data(df):
    df.columns = df.iloc[0]
    df.drop(df.index[0], inplace=True)
    #df.dropna(inplace=True)
    return df.reset_index(drop=True)

data = read_plate_data(pd.read_csv('datasets/xor_gate.csv'))
data.head()

In [None]:
merged = pd.merge(data['Well'], xor, on='Well', how='left')
#merged = pd.merge(merged, naming_map, on='code_name', how='left')
merged.rename(columns={'code_name': 'short_name'}, inplace=True)
name = merged['short_name'].dropna().reset_index(drop=True)

In [None]:
fluox, odx, fluo_halfx = generate_data(data, xor, name, 24)

In [None]:
cols = ['ECF20/33', 'ECF11/15']
fluo_xor = pd.DataFrame()
od_xor = pd.DataFrame()
for c in cols:
    temp = pd.concat([fluox[c].iloc[:,i] for i in range(0, 16, 4)], axis=1)
    temp.columns = [c + '_' + "{:02b}".format(int(i)) for i in range(4)]
    fluo_xor = pd.concat([fluo_xor, temp], axis=1)
    
    temp = pd.concat([odx[c].iloc[:,i] for i in range(0, 16, 4)], axis=1)
    temp.columns = [c + '_' + "{:02b}".format(int(i)) for i in range(4)]
    od_xor = pd.concat([od_xor, temp], axis=1)

In [None]:
fluo_xor.to_csv('datasets/bulk_fluo_xor.csv')
od_xor.to_csv('datasets/od_xor.csv')

### First-round Data

In [None]:
df = pd.read_csv('datasets/raw.csv')
df.columns = df.iloc[0]
df.drop(df.index[0], inplace=True)
df.dropna(inplace=True)
print(df.shape)
df.head()

In [None]:
map_ecf = {'Sample X1': 'e15',
'Sample X2': 'e22',
'Sample X3': 'e32',
'Sample X4': 'e33',
'Sample X5': 'e34',
'Sample X6': 'e41',
'Sample X7': 'e42',
'Sample X8': 'e15',
'Sample X9': 'e22',
'Sample X10': 'e38',
'Sample X11': 'e16',
'Sample X12': 'e33',
'Sample X13': 'e15',
'Sample X14': 'e16',
'Sample X15': 'e17',
'Sample X16': 'e20',
'Sample X17': 'e22',
'Sample X18': 'e26',
'Sample X19': 'e32',
'Sample X20': 'e33',
'Sample X21': 'e34'}
map_int = {'Sample X1': 'SspGyrB',
'Sample X2': 'SspGyrB',
'Sample X3': 'SspGyrB',
'Sample X4': 'SspGyrB',
'Sample X5': 'SspGyrB',
'Sample X6': 'SspGyrB',
'Sample X7': 'SspGyrB',
'Sample X8': 'TerThyXS2',
'Sample X9': 'TerThyXS2',
'Sample X10': 'TerThyXS2',
'Sample X11': 'TerThyXS1',
'Sample X12': 'TerThyXS1',
'Sample X13': 'STPhoRadA',
'Sample X14': 'STPhoRadA',
'Sample X15': 'STPhoRadA',
'Sample X16': 'STPhoRadA',
'Sample X17': 'STPhoRadA',
'Sample X18': 'STPhoRadA',
'Sample X19': 'STPhoRadA',
'Sample X20': 'STPhoRadA',
'Sample X21': 'STPhoRadA'}

In [None]:
df['ECF'] = df['Content'].map(map_ecf)
df['Intein'] = df['Content'].map(map_int)
df.loc[df['Group'].isin(['A', 'E']), 'Ara'] = 0
df.loc[df['Group'].isin(['A', 'E']), 'Cuma'] = 0
df.loc[df['Group'].isin(['B', 'F']), 'Ara'] = 1
df.loc[df['Group'].isin(['B', 'F']), 'Cuma'] = 0
df.loc[df['Group'].isin(['C', 'G']), 'Ara'] = 0
df.loc[df['Group'].isin(['C', 'G']), 'Cuma'] = 1
df.loc[df['Group'].isin(['D', 'H']), 'Ara'] = 1
df.loc[df['Group'].isin(['D', 'H']), 'Cuma'] = 1
df = pd.concat([df[['ECF', 'Intein', 'Ara', 'Cuma']],
                 df.drop(['ECF', 'Intein', 'Ara', 'Cuma', 'Well', 'Content', 'Group'], axis=1)],
                 axis=1)
df.head()

In [None]:
cols = list(range(0,77))
fluo = df.iloc[:, cols]
#fluo.to_csv('datasets/fluoOD-all.csv', index=False)
fluo.head()

In [None]:
cols = list(range(0,4)) + list(range(77,150))
od = df.iloc[:, cols]
#od.to_csv('datasets/OD-all.csv', index=False)
od.head()

In [None]:
def parse_minutes(x):
    spl = x.split(' ')
    hours = int(spl[0]) * 60
    mins = int(spl[2]) if spl[2] != '' else 0
    return hours + mins

fluo11 = fluo[(fluo['Ara']==1) & (fluo['Cuma']==0)]
fluo11['index'] = fluo11['ECF'] + fluo11['Intein']
fluo11.set_index('index', inplace=True)
fluo11.drop(['ECF', 'Intein', 'Ara', 'Cuma'], axis=1, inplace=True)
fluo11 = fluo11.transpose().reset_index()

fluo11['time'] = fluo11[0].apply(parse_minutes)
fluo11 = fluo11.set_index('time')
fluo11 = fluo11.drop(0, axis=1)
fluo11

In [None]:
od11 = od[(od['Ara']==1) & (od['Cuma']==0)]
od11['index'] = od11['ECF'] + od11['Intein']
od11.set_index('index', inplace=True)
od11.drop(['ECF', 'Intein', 'Ara', 'Cuma'], axis=1, inplace=True)
od11 = od11.transpose().reset_index()

od11['time'] = od11[0].apply(parse_minutes)
od11 = od11.set_index('time')
od11 = od11.drop(0, axis=1)
od11

In [None]:
for col in fluo11.columns:
    try:
        fluo11[col] = fluo11[col].astype(float)
    except:
        print(col)
        continue

In [None]:
for col in od11.columns:
    od11[col] = od11[col].astype(float)

In [None]:
fluo11.to_csv('datasets/fluo-10.csv')
od11.to_csv('datasets/od-10.csv')
(fluo11 * od11).to_csv('datasets/bulk-fluo-10.csv')

### Sequence to Function

In [2]:
filenames = sorted(os.listdir('datasets/sequences/'))
buffer = {}
for filename in tqdm(filenames):
    gb_file = "datasets/sequences/" + filename
    for gb_record in SeqIO.parse(open(gb_file,"r"), "genbank") :
        buffer[filename[:-3]] = str(gb_record.seq)
df = pd.DataFrame.from_dict(buffer, orient='index').reset_index()
df.columns = ['full_name', 'sequence']

100%|██████████| 355/355 [00:01<00:00, 299.54it/s]


In [13]:
constructs = pd.read_csv('datasets/constructs.csv')
constructs['full_name'] = constructs['full_name'].str.replace("*", "")
df2 = pd.merge(df, constructs, on="full_name", how="left")
df2 = df2.dropna()
df2 = df2[['id', 'short_name', 'full_name', 'sequence']]
fluo1 = pd.read_csv('datasets/bulk_fluo_plate_1_single.csv', index_col='time')
fluo2 = pd.read_csv('datasets/bulk_fluo_plate_2_single.csv', index_col='time')
fluo = pd.concat([fluo1, fluo2], axis=1)
data = fluo.T.reset_index().drop(1220, axis=1)
data.rename(columns={'index': 'short_name'}, inplace=True)
#data.columns = ['short_name', 'fluo_20h']
df_final = pd.merge(df2, data, on='short_name', how='left').dropna()
df_final

  constructs['full_name'] = constructs['full_name'].str.replace("*", "")


Unnamed: 0,id,short_name,full_name,sequence,0,20,40,60,80,100,120,140,160,180,200,220,240,260,280,300,320,340,360,380,400,420,440,460,480,500,520,540,560,580,600,620,640,660,680,700,720,740,760,780,800,820,840,860,880,900,920,940,960,980,1000,1020,1040,1060,1080,1100,1120,1140,1160,1180,1200
3,A18,e11x32STPhoRadA,3K3-P11-gfp-B15-P(ara)-sECF11-SP11-P(Cym)-32-(...,TGCCACCTGACGTCTAAGAAACCATTATTATCATGACATTAACCTA...,36.20450,-123.98541,-385.857160,-14.89200,3.82501,36.29976,39.46566,28.59230,67.85850,749.06865,2942.10049,6863.80695,11021.14318,16089.68025,22241.14200,29534.11230,38314.78605,49202.17155,59555.58368,61088.68145,62059.03209,64442.41440,67196.02244,70129.93043,73928.80947,77564.50499,82043.73947,85646.97729,89723.85500,93543.08068,97331.30756,100810.89216,103986.58300,106829.20620,108622.16640,110419.21514,111831.98865,113745.84936,114921.83844,116100.53323,116717.60250,117561.54698,118486.65873,119346.34712,119884.19075,119964.83323,121134.38625,121480.66612,122688.45860,122670.17410,124115.81230,124686.27831,125398.10328,126375.57600,127123.82523,127476.36564,127894.02930,128569.76450,128820.78768,128710.99332,130043.56617
8,A20,e11x32CthTer,3K3-P11-gfp-B15-P(ara)-sECF11-SP20-P(Cym)-32-s...,TGCCACCTGACGTCTAAGAAACCATTATTATCATGACATTAACCTA...,51.52824,95.13588,118.499997,74.05932,50.82480,52.64940,59.61240,0.65512,54.82233,66.33195,151.60090,684.20970,1316.32160,2068.52935,3202.74828,4710.76221,6785.05600,9090.26140,10290.86160,9736.09182,10035.53655,10143.41400,10543.65148,10740.25224,10937.40800,11134.43520,11427.46448,11682.94050,11915.52648,12171.71538,12350.62737,12350.47840,12321.67200,12373.26314,12402.85670,12346.81446,12229.75104,12222.93624,12171.02982,12316.52715,12174.34145,12099.07110,12097.40847,12234.64840,12032.65388,12093.48928,12068.09307,12015.96475,12115.35360,12149.84628,12295.98666,12178.69755,12274.97172,12319.89560,12288.54405,12352.45500,12331.22751,12476.21264,12395.23875,12598.14375,12587.09244
11,A5,e11x32gp411,3K3-P11-gfp-B15-P(ara)-sECF11-SP3-P(Cym)-32-sg...,TGCCACCTGACGTCTAAGAAACCATTATTATCATGACATTAACCTA...,-7.95690,-171.07916,-249.729480,16.87728,45.30954,30.79230,79.61772,60.88710,65.83080,291.53670,2152.41516,5120.52805,8786.86000,12921.87360,17844.94215,23332.33972,30056.49241,37710.21940,46762.43781,57480.48417,64585.66530,65248.70129,66003.65772,67650.19720,71332.08320,74048.04000,78639.91971,83247.16400,88693.54368,94841.99100,100280.87764,107331.32146,112134.19200,116777.70920,121694.87901,123891.18140,127191.03408,128935.33804,131723.63145,135518.79240,138493.00465,143246.59232,146306.74110,148192.74918,150926.32571,150817.49936,156097.17054,157081.07820,156762.96516,163127.07928,161485.78236,166272.36240,164888.33519,169559.42370,172330.50614,172859.41938,173275.76822,178951.20912,178986.39836,177490.23384,180732.79836
12,A23,e11x32NrdA2,3K3-P11-gfp-B15-P(ara)-sECF11-SP31-P(Cym)-32-s...,TGCCACCTGACGTCTAAGAAACCATTATTATCATGACATTAACCTA...,-69.45120,-86.64228,-125.931280,-11.25367,16.56139,13.21575,27.64211,21.98376,43.82400,1452.51838,5883.48155,12803.70130,21272.06772,31481.67870,43703.09129,57868.70760,74315.92278,94729.98744,117415.14980,129090.25404,130911.31918,132368.17644,137820.43577,144681.91872,152750.75196,161360.34489,170053.23544,179274.08742,188083.83100,197337.13000,205729.81342,213732.37340,218902.91556,224463.90964,229802.55396,233871.90792,238832.32331,242905.81796,245958.38226,249963.16025,251381.46069,253194.49937,253268.14902,253404.31744,253358.43116,253301.09157,253117.42500,253170.63792,253173.21990,253145.68980,253084.60244,252975.20364,252938.36835,252970.74102,252927.29363,252881.36649,252688.37308,252843.08984,252666.28668,252778.69654,252783.57335
13,A24,e11x33NrdA2,3K3-P11-gfp-B15-P(ara)-sECF11-SP31-P(Cym)-33-s...,TGCCACCTGACGTCTAAGAAACCATTATTATCATGACATTAACCTA...,115.31576,131.82396,104.071429,85.78220,66.43464,48.31920,62.65038,44.93046,81.15289,1792.20945,5914.98016,10934.76560,17820.27776,26858.11456,37730.41196,51065.27846,66882.85384,81737.64467,84534.49845,83814.59475,86803.38176,89099.51346,90938.85840,93135.15950,94458.35256,95625.74045,96727.68990,97278.06808,97496.60968,97563.64788,98046.62545,98099.49214,98037.30442,98405.69913,98358.19035,98411.24160,98595.02952,98237.23455,98284.84992,98576.29248,98086.70625,98461.03488,98661.50805,99173.21724,98595.97818,99221.58864,99053.08456,99274.45434,99877.55085,100253.29476,100888.69582,101326.23956,102117.00512,102324.26483,102340.48782,102578.34174,102370.30875,102223.50741,102493.76912,102919.98339,102875.23865
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
330,A337,e42x30MjaKlbA,3K3-P42-gfp-B15-P(ara)-sECF42-SP33-P(Cym)-30-s...,TGCCACCTGACGTCTAAGAAACCATTATTATCATGACATTAACCTA...,-19.16065,10.53087,16.990200,5.80437,-25.72899,-21.88352,6.98372,-2.88192,-4.27750,-9.34032,8.09251,-14.09589,-29.57556,18.83947,-9.59040,-46.80984,-47.53571,-73.90506,-51.56372,2.00166,85.92325,225.82566,344.22503,358.60437,383.81356,411.44700,422.98256,560.93700,504.10360,542.79168,602.74633,556.91575,634.50456,637.49560,688.38885,656.24796,648.79088,686.11308,662.11226,675.31140,652.21224,665.23821,694.28964,775.68078,750.73782,705.56024,778.51872,704.19258,861.05495,812.56260,842.61240,828.31006,908.08788,907.29096,882.58130,929.03451,966.89060,966.49692,999.07205,993.79410,1007.72637
334,A320,e42x32NrdJ1,3K3-P42-gfp-B15-P(ara)-sECF42-SP5-P(Cym)-32-sN...,TGCCACCTGACGTCTAAGAAACCATTATTATCATGACATTAACCTA...,-8.66188,-11.24354,21.791000,4.98080,-25.51616,9.12659,15.40165,13.46352,8.38706,-7.02620,11.38612,129.67080,602.07680,1267.80455,1823.65344,2610.13446,3811.76095,4965.92560,6233.91648,6971.39240,8402.95824,10405.28388,12683.45232,14781.30462,16723.57320,18202.97451,19424.71188,20566.21056,21195.80319,21608.98344,21943.49630,22134.17073,22283.09748,22561.30854,22539.99678,22551.74514,22672.31148,22543.53192,22661.97552,22579.27896,22599.03562,22410.39654,22603.27878,22606.48896,22617.93400,22756.68150,22604.31615,22610.27333,22695.85628,22817.24592,22769.12366,22734.31680,22900.30420,22942.88553,22820.47911,22950.20322,22885.13385,22943.61252,22874.55694,22970.78576,22950.04501
335,A321,e42x33NrdJ1,3K3-P42-gfp-B15-P(ara)-sECF42-SP5-P(Cym)-33-sN...,TGCCACCTGACGTCTAAGAAACCATTATTATCATGACATTAACCTA...,-107.62488,-47.11187,-48.626370,-153.59331,-48.22355,-45.04311,8.31372,-2.71860,0.53130,-7.67340,181.21885,945.52601,2310.05478,3846.87420,5597.70948,7972.69284,10204.39679,11899.66490,12491.57332,13134.58151,13785.22090,14543.67684,15169.27707,15720.60720,16243.52379,16580.34447,16979.84334,17175.87160,17045.08917,17073.03282,17185.58160,17159.80278,17167.08546,17133.85806,17072.94615,17057.84080,17105.13532,17160.03182,17287.65444,17247.15610,17213.04320,17260.36263,17261.43159,17338.55184,17209.02806,17351.98160,17272.67976,17356.89384,17449.12650,17421.87885,17376.48640,17545.91432,17416.16043,17596.32480,17405.77036,17390.99544,17460.11605,17616.64975,17501.69520,17640.01798,17507.91446
337,A323,e42x32STIMPDH1,3K3-P42-gfp-B15-P(ara)-sECF42-SP6-P(Cym)-32-(S...,TGCCACCTGACGTCTAAGAAACCATTATTATCATGACATTAACCTA...,-63.20652,-0.37449,-25.328100,-20.05260,-39.19010,-40.06893,-38.83100,-2.28975,-7.50312,-45.30336,-19.68384,18.22520,88.07040,441.73636,917.92745,1436.61356,1999.22866,2769.49890,4123.31480,5303.05914,6332.07046,7211.15100,9367.46284,12614.90726,16415.63696,20423.85830,24363.30292,28248.08216,31572.55280,34139.49348,36560.51337,38739.47525,40365.55880,42077.28294,42975.94634,43975.58733,44522.95680,45162.69296,45703.88317,45816.16020,46113.37200,46050.98652,46667.73000,46819.70462,46851.82875,46932.21096,46854.11599,46922.63189,46810.91016,46692.64710,47066.95004,46795.21728,46726.29906,46933.70388,46754.60228,46611.23571,46995.70530,46945.88031,46740.57011,46911.56670,47021.20576


In [14]:
df_final.isnull().sum()

id            0
short_name    0
full_name     0
sequence      0
0             0
             ..
1120          0
1140          0
1160          0
1180          0
1200          0
Length: 65, dtype: int64

In [15]:
df_final.to_csv('datasets/sequence_data.csv')