In [136]:
import numpy as np
import pandas as pd
from tqdm import tqdm
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
from itertools import permutations
import seaborn as sns
import matplotlib.pyplot as plt
from Bio.Restriction import BsaI, BsmBI
from Bio import SeqIO
from Bio.Seq import Seq
from primers import primers

#### Reading template plasmids

In [171]:
#import fasta files
fastas = ['promoters', 'gates']
plasmids = pd.DataFrame([(p.id, str(p.seq)) for fasta in fastas \
                         for p in list(SeqIO.parse('datasets/jump/{}.fasta'.format(fasta), 'fasta'))], \
                        columns=['name', 'sequence'])
plasmids['sequence'] = plasmids['sequence'].str.upper()
#add plasmid ids
dictionary = pd.read_csv('datasets/dictionary.csv')
plasmids = pd.merge(dictionary, plasmids, left_on='full_name', right_on='name', how='right') \
            [['id', 'short_name', 'name', 'sequence']]
plasmids.head()

Unnamed: 0,id,short_name,name,sequence
0,P73,araC-PBAD-noRJ,3K3-araC-PBAD-30-gfp-B15,TGCCACCTGACGTCTAAGAAACCATTATTATCATGACATTAACCTA...
1,P63,araC-PBAD,3K3-araC-PBAD-(rJ)30-gfp-B15,TGCCACCTGACGTCTAAGAAACCATTATTATCATGACATTAACCTA...
2,P66,cymR-PCymRC,3K3-cymR-PcymRC-(rJ)30-gfp-B15,TGCCACCTGACGTCTAAGAAACCATTATTATCATGACATTAACCTA...
3,P62,PBAD,3K3-PBAD-(rJ)30-gfp-B15,TGCCACCTGACGTCTAAGAAACCATTATTATCATGACATTAACCTA...
4,P64,Pcin,3K3-Pcin-(rJ)30-gfp-B15,TGCCACCTGACGTCTAAGAAACCATTATTATCATGACATTAACCTA...


In [172]:
#import parts
parts = pd.read_csv('datasets/jump/basic-parts.csv')
parts['sequence'] = parts['sequence'].str.upper()
parts.head()

Unnamed: 0,name,type,sites,sequence
0,P11,Promoter,P,GCCTCCACACCGCTCGTCACATCCTGTGATCCATTCCCCGCCCATC...
1,E11-PhoRadA-N,CDS,NOC,ATGAGCGATAGTCCGCAGAAACTGGGTCGTAATGAATGGAATGCAT...
2,E11-PhoRadA-C,CDS,NOC,ACAGATGTAACGATTAAAAGAATAATATCCAAAGGAGAACTTGAAT...
3,P20,Promoter,P,GCGCGGATAAAAATTTCATTTGCCCGCGACGGTTTTTCCGCCCATC...
4,E20-gp411-N,CDS,NOC,AATGAAACCGATCCTGATCTGGAACTGCTGAAACGTATTGGTAATA...


In [177]:
result = pd.DataFrame([(part[0], plasmid[0], plasmid[1]) for _, part in parts.iterrows() \
                      for _, plasmid in plasmids.iterrows() if plasmid[3].find(part[3])!=-1], \
                      columns=['name', 'template_id', 'template_name'])
result

Unnamed: 0,name,template_id,template_name
0,P11,A18,e11x32STPhoRadA
1,E11-PhoRadA-N,A18,e11x32STPhoRadA
2,E11-PhoRadA-C,A18,e11x32STPhoRadA
3,P20,A109,e20x32gp411
4,E20-gp411-N,A109,e20x32gp411
5,E20-gp411-C,A109,e20x32gp411
6,P38,A267,e38x32gp418
7,E38-gp418-N,A267,e38x32gp418
8,E38-gp418-C,A267,e38x32gp418
9,P42,A323,e42x32STIMPDH1


List of parts that are not found in template plasmids

In [178]:
list(set(basic_parts['name'].tolist()).difference(set(result['name'].tolist())))

['J23101']

#### Parts Domestication

In [179]:
forbidden_sites = np.array([(enz, str(Seq(enz).reverse_complement())) \
                            for enz in [BsaI.site, BsmBI.site]]).ravel().tolist()

In [180]:
#check any forbidden sites
parts['num_forbidden_sites'] = parts['sequence'].apply(lambda x: np.sum([x.count(a) for a in forbidden_sites]))

#find parts with any forbidden sites
parts[parts['num_forbidden_sites']>0]

Unnamed: 0,name,type,sites,sequence,num_forbidden_sites
18,GFPmut3b,CDS,NOC,CGTAAAGGAGAAGAACTTTTCACTGGAGTTGTCCCAATTCTTGTTG...,1


In [181]:
valid_parts = parts[parts['num_forbidden_sites']==0].reset_index(drop=True)
valid_parts

Unnamed: 0,name,type,sites,sequence,num_forbidden_sites
0,P11,Promoter,P,GCCTCCACACCGCTCGTCACATCCTGTGATCCATTCCCCGCCCATC...,0
1,E11-PhoRadA-N,CDS,NOC,ATGAGCGATAGTCCGCAGAAACTGGGTCGTAATGAATGGAATGCAT...,0
2,E11-PhoRadA-C,CDS,NOC,ACAGATGTAACGATTAAAAGAATAATATCCAAAGGAGAACTTGAAT...,0
3,P20,Promoter,P,GCGCGGATAAAAATTTCATTTGCCCGCGACGGTTTTTCCGCCCATC...,0
4,E20-gp411-N,CDS,NOC,AATGAAACCGATCCTGATCTGGAACTGCTGAAACGTATTGGTAATA...,0
5,E20-gp411-C,CDS,NOC,CTGAAAAAAATCCTGAAAATCGAGGAACTGGATGAACGCGAACTGA...,0
6,P38,Promoter,P,CAGTACAAAATTTTTTAGATGCGTTGTACAACCCTCACGGGGGTGG...,0
7,E38-gp418-N,CDS,NOC,CCTGCCGCACGTCCGGCACGTATTACCAATCAGCGTGATGGTGCAG...,0
8,E38-gp418-C,CDS,NOC,TGTGAAATCTTTGAAAACGAGATCGACTGGGATGAAATTGCCAGCA...,0
9,P42,Promoter,P,CAGTACAAAATTTTTTAGATGCGTTCGCTGTCGATCCGGCCCGTCG...,0


#### Design Overhangs & Primers

In [182]:
valid_parts = pd.merge(valid_parts, pd.read_csv('datasets/jump/overhang.csv'), on='sites', how='left') \
                [['name', 'sequence', 'overhang_1', 'overhang_2']]
valid_parts.head()

Unnamed: 0,name,sequence,overhang_1,overhang_2
0,P11,GCCTCCACACCGCTCGTCACATCCTGTGATCCATTCCCCGCCCATC...,GGAG,TACT
1,E11-PhoRadA-N,ATGAGCGATAGTCCGCAGAAACTGGGTCGTAATGAATGGAATGCAT...,AATG,GCTT
2,E11-PhoRadA-C,ACAGATGTAACGATTAAAAGAATAATATCCAAAGGAGAACTTGAAT...,AATG,GCTT
3,P20,GCGCGGATAAAAATTTCATTTGCCCGCGACGGTTTTTCCGCCCATC...,GGAG,TACT
4,E20-gp411-N,AATGAAACCGATCCTGATCTGGAACTGCTGAAACGTATTGGTAATA...,AATG,GCTT


In [183]:
prefix, suffix = forbidden_sites[2] + forbidden_sites[0] + 'A', 'T' + forbidden_sites[1] + forbidden_sites[3]
valid_parts['bases'] = prefix + valid_parts['overhang_1'] + valid_parts['sequence'] \
                        + valid_parts['overhang_2'] + suffix
fragments = valid_parts[['name', 'bases']]
fragments.head()

Unnamed: 0,name,bases
0,P11,CGTCTCGGTCTCAGGAGGCCTCCACACCGCTCGTCACATCCTGTGA...
1,E11-PhoRadA-N,CGTCTCGGTCTCAAATGATGAGCGATAGTCCGCAGAAACTGGGTCG...
2,E11-PhoRadA-C,CGTCTCGGTCTCAAATGACAGATGTAACGATTAAAAGAATAATATC...
3,P20,CGTCTCGGTCTCAGGAGGCGCGGATAAAAATTTCATTTGCCCGCGA...
4,E20-gp411-N,CGTCTCGGTCTCAAATGAATGAAACCGATCCTGATCTGGAACTGCT...


In [184]:
def design_primers(parts, prefix, suffix, start=1):
    
    primers_list = []
    counter = 0
    for i, part in tqdm(parts.iterrows()):
        
        try:
            fp, rp = primers(part['sequence'], add_fwd=prefix + part['overhang_1'],
                             add_rev=str(Seq(suffix).reverse_complement()) + str(Seq(part['overhang_2']).reverse_complement()))
            
            #forward primers
            primers_list.append(('(P{}J-RM)_{}.F'.format(str(counter+start).zfill(3), part['name']), fp.seq, 
                                 fp.tm, fp.tm_total, fp.gc, fp.dg, fp.fwd, fp.offtargets, fp.penalty))
            counter += 1
            #reverse primers
            primers_list.append(('(P{}J-RM)_{}.R'.format(str(counter+start).zfill(3), part['name']), rp.seq,
                                 rp.tm, rp.tm_total, rp.gc, rp.dg, rp.fwd, rp.offtargets, rp.penalty))
            counter += 1
        
        except:
            print('Error at', i, part['name'], ': cannot find any feasible primer design, check your fragments!')
            continue
        
    return pd.DataFrame(primers_list)
        
df_primers = design_primers(valid_parts, prefix, suffix)

17it [00:06,  3.75it/s]

Error at 15 RBS32 : cannot find any feasible primer design, check your fragments!
Error at 16 RBS33 : cannot find any feasible primer design, check your fragments!


24it [00:08,  2.74it/s]


In [185]:
df_primers.columns = ['name', 'sequence', 'tm', 'tm_total', 'gc', 'dg', 'fwd', 'offtargets', 'penalty']
df_primers

Unnamed: 0,name,sequence,tm,tm_total,gc,dg,fwd,offtargets,penalty
0,(P001J-RM)_P11.F,CGTCTCGGTCTCAGGAGGCCTCCACACCGCTCG,68.5,79.9,0.7,-1.76807,True,0,16.63614
1,(P002J-RM)_P11.R,CGTCTCGGTCTCAAGTAAGAGGTGTTACGATAGATGGGCG,68.2,75.5,0.5,-0.11021,False,0,7.42042
2,(P003J-RM)_E11-PhoRadA-N.F,CGTCTCGGTCTCAAATGATGAGCGATAGTCCGCA,62.8,74.1,0.5,-2.760765,True,0,11.32153
3,(P004J-RM)_E11-PhoRadA-N.R,CGTCTCGGTCTCAAAGCTTATTATCTTCTCAGTACCTCTTCC,62.1,73.0,0.5,0.0,False,0,3.1
4,(P005J-RM)_E11-PhoRadA-C.F,CGTCTCGGTCTCAAATGACAGATGTAACGATTAAAAGAATAATATC,60.6,70.1,0.3,-3.57113,True,0,16.14226
5,(P006J-RM)_E11-PhoRadA-C.R,CGTCTCGGTCTCAAAGCTTATTAGACGCTCGCG,60.5,74.5,0.5,-1.53651,False,0,10.57302
6,(P007J-RM)_P20.F,CGTCTCGGTCTCAGGAGGCGCGGATAAAAATTTCATTTG,63.5,75.3,0.5,-3.634815,True,0,8.76963
7,(P008J-RM)_P20.R,CGTCTCGGTCTCAAGTATGGGTTCAACGATAGATGG,63.0,73.4,0.5,0.0,False,0,4.0
8,(P009J-RM)_E20-gp411-N.F,CGTCTCGGTCTCAAATGAATGAAACCGATCCTGATC,60.0,72.2,0.5,-6.3339,True,0,17.6678
9,(P010J-RM)_E20-gp411-N.R,CGTCTCGGTCTCAAAGCTTATTATTCTTTAACATACAGGCAC,59.6,71.4,0.4,0.0,False,0,5.7


#### High-level Design

In [141]:
parts = pd.read_csv('datasets/jump/parts.csv')
sensors = parts[parts['type']=='sensor'].drop(['type', 'site'], axis=1)
rbs = parts[(parts['type']=='rbs') & (parts['site']=='R')].drop(['type', 'site'], axis=1)
rbs_rep = parts[(parts['type']=='rbs') & (parts['site']=='RN')].drop(['type', 'site'], axis=1)
ecfs = parts[parts['type']=='cds'].drop(['type', 'site'], axis=1)
reps = parts[parts['type']=='reporter'].drop(['type', 'site'], axis=1)
term = parts[(parts['type']=='terminator') & (parts['site']=='T')].drop(['type', 'site'], axis=1)
term_rep = parts[(parts['type']=='terminator') & (parts['site']=='CT')].drop(['type', 'site'], axis=1)

In [142]:
circuit1 = {
    'name': 'circuit1',
    'sensors': ['P{}'.format(i) for i in range(4)],
    'connections': ['W0', 'W1'],
    'gates': [
        'AND({},{},{})'.format('W0', 'P0', 'P1'),
        'AND({},{},{})'.format('W1', 'P2', 'P3'),
        'AND({},{},{})'.format('R0', 'W0', 'W1')
    ]
}
circuit2 = {
    'name': 'circuit2',
    'sensors': ['P{}'.format(i) for i in range(3)],
    'connections': ['W0'],
    'gates': [
        'AND({},{},{})'.format('W0', 'P0', 'P1'),
        'AND({},{},{})'.format('R0', 'P2', 'W0')
    ]
}
circuit3 = {
    'name': 'circuit2',
    'sensors': ['P{}'.format(i) for i in range(2)],
    'connections': [],
    'gates': [
        'AND({},{},{})'.format('R0', 'P0', 'P1')
    ]
}
circuit2

{'name': 'circuit2',
 'sensors': ['P0', 'P1', 'P2'],
 'connections': ['W0'],
 'gates': ['AND(W0,P0,P1)', 'AND(R0,P2,W0)']}

In [143]:
num_plasmid_sys = 1
circuit = circuit1.copy()
num_fragments = (len(circuit['gates']) * 2) + 1 if num_plasmid_sys==1 else len(circuit['gates']) * 2
num_fragments

7

In [144]:
vector1_2 = ['1A', '1B*']
vector1_3 = ['1A', '1B', '1C*']
#vector1_4 = ['1A', '1B', '1C', '1D']
vector1_4 = ['1B', '1C', '1D0', '1E']
vector1_5 = ['1A', '1B', '1C', '1D0', '1E']
vector1_6 = ['1A0', '1A1', '1B', '1C', '1D0', '1E']
#vector1_7 = ['1A0', '1A1', '1B', '1C', '1D0', '1E0', '1F']
vec_1 = [vector1_2, vector1_3, vector1_4, vector1_5, vector1_6]

vector2_2 = ['2A', '2B*']
vector2_3 = ['2A', '2B', '2C*']
vector2_4 = ['2A', '2B', '2C', '2D']
vector2_5 = ['2A', '2B', '2C', '2D0', '2E']
vec_2 = [vector2_2, vector2_3, vector2_4, vector2_5]

In [156]:
def generate_arrangement(circuit, vectors):
    
    arr = pd.DataFrame(list(permutations(ecfs['part_name'].tolist(), len(circuit['gates']))))
    if len(arr)==0:
        print('Not enough number of gates!')
    else:
        
        inputs = []
        for gate in circuit['gates']:
            inputs.extend(gate.split('(')[1].replace(')', '').split(',')[1:])
        
        for i, inp in enumerate(inputs):
            if inp.startswith('P'):
                arr[inp] = inp
            elif inp.startswith('W'):
                arr[inp] = 'P' + arr[int(inp[-1])].str[1:3]
            
        if len(vectors) < len(circuit['gates']):
            print('Not enough vectors!')
            return None
        else:
            for i in range(len(circuit['gates'])):
                arr['vector_n_{}'.format(i)] = vectors[i][0]
                arr['vector_c_{}'.format(i)] = vectors[i][1]
            return arr

vec_iter = iter(vector1_6)
vector_list = list(zip(vec_iter, vec_iter)) #group vectors into a tuple of two vectors
arr = generate_arrangement(circuit1, vector_list)
arr

Unnamed: 0,0,1,2,P0,P1,P2,P3,W0,W1,vector_n_0,vector_c_0,vector_n_1,vector_c_1,vector_n_2,vector_c_2
0,E11-PhoRadA,E20-gp411,E38-gp418,P0,P1,P2,P3,P11,P20,1A0,1A1,1B,1C,1D0,1E
1,E11-PhoRadA,E38-gp418,E20-gp411,P0,P1,P2,P3,P11,P38,1A0,1A1,1B,1C,1D0,1E
2,E20-gp411,E11-PhoRadA,E38-gp418,P0,P1,P2,P3,P20,P11,1A0,1A1,1B,1C,1D0,1E
3,E20-gp411,E38-gp418,E11-PhoRadA,P0,P1,P2,P3,P20,P38,1A0,1A1,1B,1C,1D0,1E
4,E38-gp418,E11-PhoRadA,E20-gp411,P0,P1,P2,P3,P38,P11,1A0,1A1,1B,1C,1D0,1E
5,E38-gp418,E20-gp411,E11-PhoRadA,P0,P1,P2,P3,P38,P20,1A0,1A1,1B,1C,1D0,1E


In [158]:
components = pd.DataFrame()
num_gates = len(circuit1['gates'])
gates = arr.iloc[:,:num_gates]
inputs = arr.iloc[:,num_gates:num_gates*3]
vectors = arr.iloc[:,num_gates*3:]
for n in range(num_gates):
    temp = pd.concat([inputs.iloc[:,n*2], inputs.iloc[:,n*2+1], gates.iloc[:,n], vectors.iloc[:,n*2], vectors.iloc[:,n*2+1]], axis=1)
#    temp = arr[['P{}-1'.format(n), 'P{}-2'.format(n), n, 'V{}-1'.format(n), 'V{}-2'.format(n)]]
    temp.columns = ['promoter_n', 'promoter_c', 'ecf', 'vector_n', 'vector_c']
    components = pd.concat([components, temp], axis=0)
components = components.drop_duplicates().reset_index(drop=True)
components

Unnamed: 0,promoter_n,promoter_c,ecf,vector_n,vector_c
0,P0,P1,E11-PhoRadA,1A0,1A1
1,P0,P1,E20-gp411,1A0,1A1
2,P0,P1,E38-gp418,1A0,1A1
3,P2,P3,E20-gp411,1B,1C
4,P2,P3,E38-gp418,1B,1C
5,P2,P3,E11-PhoRadA,1B,1C
6,P11,P20,E38-gp418,1D0,1E
7,P11,P38,E20-gp411,1D0,1E
8,P20,P11,E38-gp418,1D0,1E
9,P20,P38,E11-PhoRadA,1D0,1E


In [159]:
permute_sensors = False
prom = None
if permute_sensors:
    prom = pd.DataFrame(['P0', 'P1', 'P2', 'P3'], columns=['_id']).merge(sensors, how='cross')
else:
    prom = pd.DataFrame(['P0', 'P1', 'P2', 'P3'], columns=['_id'])
    prom = pd.concat([prom, sensors], axis=1)
prom

Unnamed: 0,_id,part_name
0,P0,PBAD
1,P1,PCin
2,P2,PSalTTC
3,P3,PLuxB


In [160]:
components2 = pd.merge(components, prom, how='left', left_on='promoter_n', right_on='_id')
components2 = pd.merge(components2, prom, how='left', left_on='promoter_c', right_on='_id')
components2 = components2[components2['part_name_x'] != components2['part_name_y']]
components2.loc[components2['part_name_x'].isnull(), 'part_name_x'] = components2['promoter_n']
components2.loc[components2['part_name_y'].isnull(), 'part_name_y'] = components2['promoter_c']
components2 = components2[['part_name_x', 'part_name_y', 'ecf', 'vector_n', 'vector_c']]
components2.columns = ['promoter_n', 'promoter_c', 'ecf_n', 'vector_n', 'vector_c']
components2['ecf_c'] = components2['ecf_n'] + '_C'
components2['ecf_n'] = components2['ecf_n'] + '_N'
components2 = components2.drop_duplicates().reset_index(drop=True)
components2

Unnamed: 0,promoter_n,promoter_c,ecf_n,vector_n,vector_c,ecf_c
0,PBAD,PCin,E11-PhoRadA_N,1A0,1A1,E11-PhoRadA_C
1,PBAD,PCin,E20-gp411_N,1A0,1A1,E20-gp411_C
2,PBAD,PCin,E38-gp418_N,1A0,1A1,E38-gp418_C
3,PSalTTC,PLuxB,E20-gp411_N,1B,1C,E20-gp411_C
4,PSalTTC,PLuxB,E38-gp418_N,1B,1C,E38-gp418_C
5,PSalTTC,PLuxB,E11-PhoRadA_N,1B,1C,E11-PhoRadA_C
6,P11,P20,E38-gp418_N,1D0,1E,E38-gp418_C
7,P11,P38,E20-gp411_N,1D0,1E,E20-gp411_C
8,P20,P11,E38-gp418_N,1D0,1E,E38-gp418_C
9,P20,P38,E11-PhoRadA_N,1D0,1E,E11-PhoRadA_C


In [161]:
level_2 = components2.merge(rbs, how='cross').merge(term, how='cross') \
                .merge(rbs, how='cross').merge(term, how='cross') \
                .drop_duplicates().reset_index(drop=True)
level_2.columns = ['promoter_n', 'promoter_c', 'ecf_n', 'vector_n', 'vector_c', 'ecf_c', 'rbs_n', 'terminator_n', 'rbs_c', 'terminator_c']
level_2 = level_2.drop_duplicates().reset_index(drop=True)
level_2['_id'] = 'pJ2-' + level_2['promoter_n'] + '-' + level_2['promoter_c'] + '-' + level_2['ecf_n'].str[:3]
level_2['_id_n'] = 'pJ1-' + level_2['promoter_n'] + '-' + level_2['ecf_n'].str[:3] + '-N'
level_2['_id_c'] = 'pJ1-' + level_2['promoter_c'] + '-' + level_2['ecf_c'].str[:3] + '-C'
level_2['vector'] = '2A'
level_2 = level_2[['_id', '_id_n', '_id_c', 'vector', \
                   'promoter_n', 'rbs_n', 'ecf_n', 'terminator_n', 'vector_n', \
                   'promoter_c', 'rbs_c', 'ecf_c', 'terminator_c', 'vector_c']]
level_2

Unnamed: 0,_id,_id_n,_id_c,vector,promoter_n,rbs_n,ecf_n,terminator_n,vector_n,promoter_c,rbs_c,ecf_c,terminator_c,vector_c
0,pJ2-PBAD-PCin-E11,pJ1-PBAD-E11-N,pJ1-PCin-E11-C,2A,PBAD,RBS33,E11-PhoRadA_N,B0015,1A0,PCin,RBS33,E11-PhoRadA_C,B0015,1A1
1,pJ2-PBAD-PCin-E20,pJ1-PBAD-E20-N,pJ1-PCin-E20-C,2A,PBAD,RBS33,E20-gp411_N,B0015,1A0,PCin,RBS33,E20-gp411_C,B0015,1A1
2,pJ2-PBAD-PCin-E38,pJ1-PBAD-E38-N,pJ1-PCin-E38-C,2A,PBAD,RBS33,E38-gp418_N,B0015,1A0,PCin,RBS33,E38-gp418_C,B0015,1A1
3,pJ2-PSalTTC-PLuxB-E20,pJ1-PSalTTC-E20-N,pJ1-PLuxB-E20-C,2A,PSalTTC,RBS33,E20-gp411_N,B0015,1B,PLuxB,RBS33,E20-gp411_C,B0015,1C
4,pJ2-PSalTTC-PLuxB-E38,pJ1-PSalTTC-E38-N,pJ1-PLuxB-E38-C,2A,PSalTTC,RBS33,E38-gp418_N,B0015,1B,PLuxB,RBS33,E38-gp418_C,B0015,1C
5,pJ2-PSalTTC-PLuxB-E11,pJ1-PSalTTC-E11-N,pJ1-PLuxB-E11-C,2A,PSalTTC,RBS33,E11-PhoRadA_N,B0015,1B,PLuxB,RBS33,E11-PhoRadA_C,B0015,1C
6,pJ2-P11-P20-E38,pJ1-P11-E38-N,pJ1-P20-E38-C,2A,P11,RBS33,E38-gp418_N,B0015,1D0,P20,RBS33,E38-gp418_C,B0015,1E
7,pJ2-P11-P38-E20,pJ1-P11-E20-N,pJ1-P38-E20-C,2A,P11,RBS33,E20-gp411_N,B0015,1D0,P38,RBS33,E20-gp411_C,B0015,1E
8,pJ2-P20-P11-E38,pJ1-P20-E38-N,pJ1-P11-E38-C,2A,P20,RBS33,E38-gp418_N,B0015,1D0,P11,RBS33,E38-gp418_C,B0015,1E
9,pJ2-P20-P38-E11,pJ1-P20-E11-N,pJ1-P38-E11-C,2A,P20,RBS33,E11-PhoRadA_N,B0015,1D0,P38,RBS33,E11-PhoRadA_C,B0015,1E


In [162]:
n_terms = level_2[['_id_n', 'promoter_n', 'rbs_n', 'ecf_n', 'terminator_n', 'vector_n']]
n_terms.columns = ['_id', 'promoter', 'rbs', 'cds', 'terminator', 'vector']
c_terms = level_2[['_id_c', 'promoter_c', 'rbs_c', 'ecf_c', 'terminator_c', 'vector_c']]
c_terms.columns = ['_id', 'promoter', 'rbs', 'cds', 'terminator', 'vector']
level_1 = n_terms.append(c_terms).drop_duplicates().reset_index(drop=True)
level_1

Unnamed: 0,_id,promoter,rbs,cds,terminator,vector
0,pJ1-PBAD-E11-N,PBAD,RBS33,E11-PhoRadA_N,B0015,1A0
1,pJ1-PBAD-E20-N,PBAD,RBS33,E20-gp411_N,B0015,1A0
2,pJ1-PBAD-E38-N,PBAD,RBS33,E38-gp418_N,B0015,1A0
3,pJ1-PSalTTC-E20-N,PSalTTC,RBS33,E20-gp411_N,B0015,1B
4,pJ1-PSalTTC-E38-N,PSalTTC,RBS33,E38-gp418_N,B0015,1B
5,pJ1-PSalTTC-E11-N,PSalTTC,RBS33,E11-PhoRadA_N,B0015,1B
6,pJ1-P11-E38-N,P11,RBS33,E38-gp418_N,B0015,1D0
7,pJ1-P11-E20-N,P11,RBS33,E20-gp411_N,B0015,1D0
8,pJ1-P20-E38-N,P20,RBS33,E38-gp418_N,B0015,1D0
9,pJ1-P20-E11-N,P20,RBS33,E11-PhoRadA_N,B0015,1D0


In [163]:
output_prom = pd.DataFrame('P' + ecfs['part_name'].str.split('-', expand=True)[0].str[1:])
output_gate = output_prom.merge(rbs_rep, how='cross').merge(reps, how='cross').merge(term_rep, how='cross')
output_gate.columns = ['promoter', 'rbs', 'cds', 'terminator']
output_gate['_id'] = output_gate['promoter'] + '-' + output_gate['rbs'].str[-2:] + '-' + \
                           output_gate['cds']
output_gate = output_gate[['_id', 'promoter', 'rbs', 'cds', 'terminator']]
output_gate['vector'] = '1A-p15a'
output_gate

Unnamed: 0,_id,promoter,rbs,cds,terminator,vector
0,P11-34-sGFP,P11,RBS34,sGFP,B0015,1A-p15a
1,P11-34-mCherry,P11,RBS34,mCherry,B0015,1A-p15a
2,P20-34-sGFP,P20,RBS34,sGFP,B0015,1A-p15a
3,P20-34-mCherry,P20,RBS34,mCherry,B0015,1A-p15a
4,P38-34-sGFP,P38,RBS34,sGFP,B0015,1A-p15a
5,P38-34-mCherry,P38,RBS34,mCherry,B0015,1A-p15a
