In [1]:
import numpy as np
import pandas as pd
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
from tqdm import tqdm
from itertools import permutations
import seaborn as sns
import matplotlib.pyplot as plt
from Bio.Restriction import BsaI, BsmBI
from Bio import SeqIO
from Bio.Seq import Seq
from primers import primers
import primer3 as pr
import warnings
warnings.filterwarnings("ignore")

#### Reading template plasmids

In [2]:
#import fasta files
fastas = ['promoters', 'gates']
plasmids = pd.DataFrame([(p.id, str(p.seq)) for fasta in fastas \
                        for p in list(SeqIO.parse('datasets/jump/{}.fasta'.format(fasta), 'fasta'))], \
                        columns=['name', 'sequence'])
plasmids['sequence'] = plasmids['sequence'].str.upper()

#add plasmid-ids
plasmids = pd.merge(pd.read_csv('datasets/dictionary.csv'), plasmids, \
                    left_on='full_name', right_on='name', how='right')[['id', 'short_name', 'name', 'sequence']]
plasmids.head()

Unnamed: 0,id,short_name,name,sequence
0,P73,araC-PBAD-noRJ,3K3-araC-PBAD-30-gfp-B15,TGCCACCTGACGTCTAAGAAACCATTATTATCATGACATTAACCTA...
1,P63,araC-PBAD,3K3-araC-PBAD-(rJ)30-gfp-B15,TGCCACCTGACGTCTAAGAAACCATTATTATCATGACATTAACCTA...
2,P66,cymR-PCymRC,3K3-cymR-PcymRC-(rJ)30-gfp-B15,TGCCACCTGACGTCTAAGAAACCATTATTATCATGACATTAACCTA...
3,P62,PBAD,3K3-PBAD-(rJ)30-gfp-B15,TGCCACCTGACGTCTAAGAAACCATTATTATCATGACATTAACCTA...
4,P64,Pcin,3K3-Pcin-(rJ)30-gfp-B15,TGCCACCTGACGTCTAAGAAACCATTATTATCATGACATTAACCTA...


In [3]:
#import parts
parts = pd.read_csv('datasets/jump/basic-parts.csv')
parts['sequence'] = parts['sequence'].str.upper()
parts.head()

Unnamed: 0,name,type,sites,sequence
0,P11,Promoter,P,GCCTCCACACCGCTCGTCACATCCTGTGATCCATTCCCCGCCCATC...
1,E11-PhoRadA-N,CDS,NOC,ATGAGCGATAGTCCGCAGAAACTGGGTCGTAATGAATGGAATGCAT...
2,E11-PhoRadA-C,CDS,NOC,ACAGATGTAACGATTAAAAGAATAATATCCAAAGGAGAACTTGAAT...
3,P20,Promoter,P,GCGCGGATAAAAATTTCATTTGCCCGCGACGGTTTTTCCGCCCATC...
4,E20-gp411-N,CDS,NOC,AATGAAACCGATCCTGATCTGGAACTGCTGAAACGTATTGGTAATA...


In [4]:
result = pd.DataFrame([(part[0], plasmid[0]) for _, part in parts.iterrows() \
                      for _, plasmid in plasmids.iterrows() if plasmid[3].find(part[3])!=-1], \
                      columns=['name', 'template_id']).groupby('name')['template_id'] \
                      .apply(lambda x: ', '.join(x)).reset_index()
result

Unnamed: 0,name,template_id
0,B0015,"P73, P63, P66, P62, P64, P65, P67, P68, P69, P..."
1,E11-PhoRadA-C,A18
2,E11-PhoRadA-N,A18
3,E20-gp411-C,A109
4,E20-gp411-N,A109
5,E38-gp418-C,A267
6,E38-gp418-N,A267
7,E42-IMPDH1-C,A323
8,E42-IMPDH1-N,A323
9,GFPmut3b,"P73, P63, P66, P62, P64, P65, P67, P68, P69, P..."


List of parts that are not found in template plasmids

In [5]:
missing = list(set(parts['name'].tolist()).difference(set(result['name'].tolist())))
missing

['J23101']

#### Parts Domestication

In [6]:
forbidden_sites = np.array([(enz, str(Seq(enz).reverse_complement())) \
                            for enz in [BsaI.site, BsmBI.site]]).ravel().tolist()
#check any forbidden sites
parts['num_forbidden_sites'] = parts['sequence'].apply(lambda x: np.sum([x.count(a) for a in forbidden_sites]))

In [7]:
parts.head()

Unnamed: 0,name,type,sites,sequence,num_forbidden_sites
0,P11,Promoter,P,GCCTCCACACCGCTCGTCACATCCTGTGATCCATTCCCCGCCCATC...,0
1,E11-PhoRadA-N,CDS,NOC,ATGAGCGATAGTCCGCAGAAACTGGGTCGTAATGAATGGAATGCAT...,0
2,E11-PhoRadA-C,CDS,NOC,ACAGATGTAACGATTAAAAGAATAATATCCAAAGGAGAACTTGAAT...,0
3,P20,Promoter,P,GCGCGGATAAAAATTTCATTTGCCCGCGACGGTTTTTCCGCCCATC...,0
4,E20-gp411-N,CDS,NOC,AATGAAACCGATCCTGATCTGGAACTGCTGAAACGTATTGGTAATA...,0


In [8]:
#find parts with any forbidden sites
invalid_parts = parts[parts['num_forbidden_sites']>0]
invalid_parts

Unnamed: 0,name,type,sites,sequence,num_forbidden_sites
18,GFPmut3b,CDS,NOC,CGTAAAGGAGAAGAACTTTTCACTGGAGTTGTCCCAATTCTTGTTG...,1


In [9]:
removed = invalid_parts['name'].tolist() + missing
valid_parts = parts[~parts['name'].isin(removed)].reset_index(drop=True)
valid_parts.head()

Unnamed: 0,name,type,sites,sequence,num_forbidden_sites
0,P11,Promoter,P,GCCTCCACACCGCTCGTCACATCCTGTGATCCATTCCCCGCCCATC...,0
1,E11-PhoRadA-N,CDS,NOC,ATGAGCGATAGTCCGCAGAAACTGGGTCGTAATGAATGGAATGCAT...,0
2,E11-PhoRadA-C,CDS,NOC,ACAGATGTAACGATTAAAAGAATAATATCCAAAGGAGAACTTGAAT...,0
3,P20,Promoter,P,GCGCGGATAAAAATTTCATTTGCCCGCGACGGTTTTTCCGCCCATC...,0
4,E20-gp411-N,CDS,NOC,AATGAAACCGATCCTGATCTGGAACTGCTGAAACGTATTGGTAATA...,0


#### Design Overhangs & Primers

In [10]:
fragments = pd.merge(valid_parts, pd.read_csv('datasets/jump/overhang.csv'), on='sites', how='left') \
                [['name', 'sequence', 'left_site', 'right_site']]
prefix, suffix = forbidden_sites[2] + forbidden_sites[0] + 'A', 'T' + forbidden_sites[1] + forbidden_sites[3]
fragments['left_overhang'] = prefix + fragments['left_site']
fragments['right_overhang'] = fragments['right_site'] + suffix
fragments['bases'] = fragments['left_overhang'] + fragments['sequence'] + fragments['right_overhang']
#following benchling convention to start from index 1
fragments['size'] = fragments['bases'].apply(lambda x: len(x)+1)
#if need to print out the result for benchling import
# fragments[['name', 'bases']].to_csv('datasets/jump/fragment-output.csv')
fragments.head()

Unnamed: 0,name,sequence,left_site,right_site,left_overhang,right_overhang,bases,size
0,P11,GCCTCCACACCGCTCGTCACATCCTGTGATCCATTCCCCGCCCATC...,GGAG,TACT,CGTCTCGGTCTCAGGAG,TACTTGAGACCGAGACG,CGTCTCGGTCTCAGGAGGCCTCCACACCGCTCGTCACATCCTGTGA...,96
1,E11-PhoRadA-N,ATGAGCGATAGTCCGCAGAAACTGGGTCGTAATGAATGGAATGCAT...,AATG,GCTT,CGTCTCGGTCTCAAATG,GCTTTGAGACCGAGACG,CGTCTCGGTCTCAAATGATGAGCGATAGTCCGCAGAAACTGGGTCG...,767
2,E11-PhoRadA-C,ACAGATGTAACGATTAAAAGAATAATATCCAAAGGAGAACTTGAAT...,AATG,GCTT,CGTCTCGGTCTCAAATG,GCTTTGAGACCGAGACG,CGTCTCGGTCTCAAATGACAGATGTAACGATTAAAAGAATAATATC...,467
3,P20,GCGCGGATAAAAATTTCATTTGCCCGCGACGGTTTTTCCGCCCATC...,GGAG,TACT,CGTCTCGGTCTCAGGAG,TACTTGAGACCGAGACG,CGTCTCGGTCTCAGGAGGCGCGGATAAAAATTTCATTTGCCCGCGA...,95
4,E20-gp411-N,AATGAAACCGATCCTGATCTGGAACTGCTGAAACGTATTGGTAATA...,AATG,GCTT,CGTCTCGGTCTCAAATG,GCTTTGAGACCGAGACG,CGTCTCGGTCTCAAATGAATGAAACCGATCCTGATCTGGAACTGCT...,623


In [12]:
fragments[['name', 'bases']].to_csv('datasets/jump/fragments-output.csv')

In [None]:
def design_primers(parts, prefix, suffix):
    
    primers_list = []
    #counter = 0
    for _, part in tqdm(parts.iterrows()):
        
        try:
            fp, rp = primers(part['sequence'], add_fwd=prefix + part['left_site'],
                             add_rev=str(Seq(suffix).reverse_complement()) + str(Seq(part['right_site']).reverse_complement()))
            
            #forward primers
            primers_list.append((#'(P{}J-RM)_{}.F'.format(str(counter+start).zfill(3), part['name']), \
                                 part['name'], fp.seq, 
                                 fp.tm, fp.tm_total, fp.gc, fp.dg, fp.fwd, fp.offtargets, fp.penalty))
            #counter += 1
            #reverse primers
            primers_list.append((#'(P{}J-RM)_{}.R'.format(str(counter+start).zfill(3), part['name']), \
                                 part['name'], rp.seq,
                                 rp.tm, rp.tm_total, rp.gc, rp.dg, rp.fwd, rp.offtargets, rp.penalty))
            #counter += 1
        
        except:
            print('Error on', part['name'], ': cannot find any feasible primer design, check your fragments!')
            continue
        
    return pd.DataFrame(primers_list, columns=['part_name', 'sequence', 'tm', 'tm_total', 'gc', 'dg', 'fwd', 'offtargets', 'penalty'])
        
raw_primers = design_primers(fragments, prefix, suffix)

In [None]:
start = 1
primer_seq = raw_primers[['fwd', 'sequence']].drop_duplicates().reset_index(drop=True)
primer_seq['primer_name'] = pd.Series(primer_seq.index).apply(lambda x: 'P' + str(x+start).zfill(3) + 'J')
primer_seq.loc[primer_seq['fwd'], 'primer_name'] = primer_seq['primer_name'] + '.F'
primer_seq.loc[~primer_seq['fwd'], 'primer_name'] = primer_seq['primer_name'] + '.R'
final_primers = pd.merge(primer_seq[['primer_name', 'sequence']], raw_primers.drop('part_name', axis=1), \
                         on='sequence', how='left').drop_duplicates().reset_index(drop=True)
final_primers

In [None]:
final_primers.to_csv('datasets/jump/final_primers.csv', index=False)

### Parts Amplification via Phusion PCR

#### Generate PCR Parameters

In [None]:
def get_annealed_part(a, b):
    return [a[i].replace(b[i], '') for i in range(len(a))]

def calculate_tm(seq):
    return pr.calcTm(seq, dna_conc=250, dntp_conc=10,
                      tm_method='santalucia', salt_corrections_method='santalucia')

pcr_primers = pd.merge(pd.merge(primer_seq, raw_primers[['part_name', 'sequence']], on='sequence', how='right'), \
                fragments[['name', 'bases', 'left_overhang', 'right_overhang', 'size']], \
                left_on='part_name', right_on='name', how='left')
pcr_primers.loc[~pcr_primers['fwd'], 'sequence'] = pcr_primers['sequence']. \
            apply(lambda x: str(Seq(x).reverse_complement()))
pcr_primers.loc[pcr_primers['fwd'], 'overhang_part'] = pcr_primers['left_overhang']
pcr_primers.loc[~pcr_primers['fwd'], 'overhang_part'] = pcr_primers['right_overhang']
pcr_primers['anneal_part'] = get_annealed_part(pcr_primers['sequence'], pcr_primers['overhang_part'])
pcr_primers['tm'] = np.round(pcr_primers['anneal_part'].apply(calculate_tm), 1)
pcr_primers['tm_product'] = np.round(pcr_primers['bases'].apply(calculate_tm), 1)
pcr_primers['ext_time'] = pcr_primers['size'].apply(lambda x: max([60, x/1000 * 60]))
fwd_primers = pcr_primers[pcr_primers['fwd']] \
              [['part_name', 'primer_name', 'tm', 'tm_product', 'size', 'ext_time']]
rev_primers = pcr_primers[~pcr_primers['fwd']] \
              [['part_name', 'primer_name', 'tm', 'tm_product', 'size', 'ext_time']]

In [None]:
pcr = pd.merge(fwd_primers, rev_primers, on=['part_name', 'tm_product', 'size', 'ext_time'], how='outer')
pcr['ta'] = 0.3 * np.max([pcr['tm_x'], pcr['tm_y']], axis=0) + 0.7 * pcr['tm_product'] - 14.9
pcr['primer_name_x'] = pcr['primer_name_x'].str.split('_', expand=True)[0] \
                       .str.replace('(', '').str.replace(')', '')
pcr['primer_name_y'] = pcr['primer_name_y'].str.split('_', expand=True)[0] \
                       .str.replace('(', '').str.replace(')', '')
pcr_rxn = pcr[['part_name', 'size', 'primer_name_x', 'primer_name_y', 'ta', 'ext_time']] \
   .sort_values('ta').reset_index(drop=True)
pcr_rxn

In [None]:
pcr_rxn.to_csv('datasets/jump/pcr_rxn.csv', index=False)

#### Master Mix Calculation

In [None]:
phusion_pcr = {
    'H2O': 12.4,
    'HF/GC Buffer': 4,
    'dNTPs': 0.4,
    #'FP': 1,
    #'RP': 1,
    #'DNA': 1,
    'DMSO': 0.6,
    'Phusion': 0.2,
    'Excess': 6
}

num_rxn = len(pcr_rxn)
master_mix = pd.DataFrame.from_dict(phusion_pcr, orient='index').rename(columns={0: 'vol'})[:-1]
master_mix['mix'] = master_mix['vol'] * num_rxn
factor = np.round((np.sum(master_mix)['mix'] + phusion_pcr['Excess'])/np.sum(master_mix)['vol'], 2)
master_mix['mix'] = master_mix['vol'] * factor
master_mix

#### Level 0 Assembly

In [None]:
lvl0_rxn = {
    #'Fragment': 2,
    'UAC': 1,
    'BsmBI': 0.5,
    'T4-DNA-Ligase': 0.25,
    'Buffer': 2,
    'H2O': 14.25,
    'Excess': 6
}

num_rxn = len(pcr_rxn)
master_mix = pd.DataFrame.from_dict(lvl0_rxn, orient='index').rename(columns={0: 'vol'})[:-1]
master_mix['mix'] = master_mix['vol'] * num_rxn
factor = np.round((np.sum(master_mix)['mix'] + lvl0_rxn['Excess'])/np.sum(master_mix)['vol'], 2)
master_mix['mix'] = master_mix['vol'] * factor
master_mix

### JUMP Assembly

#### Circuits Assembly Planning

In [2]:
parts = pd.read_csv('datasets/jump/parts.csv')
sensors = parts[parts['type']=='sensor'].drop(['type', 'site'], axis=1) \
            .rename(columns={'part_name': 'promoter'}).reset_index(drop=True)
rbs = parts[(parts['type']=='rbs') & (parts['site']=='R')].drop(['type', 'site'], axis=1) \
            .rename(columns={'part_name': 'rbs'}).reset_index(drop=True)
rbs_rep = parts[(parts['type']=='rbs') & (parts['site']=='RN')].drop(['type', 'site'], axis=1) \
            .rename(columns={'part_name': 'rbs'}).reset_index(drop=True)
ecfs = parts[parts['type']=='cds'].drop(['type', 'site'], axis=1) \
            .rename(columns={'part_name': 'cds'}).reset_index(drop=True)
reps = parts[parts['type']=='reporter'].drop(['type', 'site'], axis=1) \
            .rename(columns={'part_name': 'cds'}).reset_index(drop=True)
term = parts[(parts['type']=='terminator') & (parts['site']=='T')].drop(['type', 'site'], axis=1) \
            .rename(columns={'part_name': 'terminator'}).reset_index(drop=True)
term_rep = parts[(parts['type']=='terminator') & (parts['site']=='CT')].drop(['type', 'site'], axis=1) \
            .rename(columns={'part_name': 'terminator'}).reset_index(drop=True)

In [3]:
circuit1 = {
    'name': 'circuit1',
    'sensors': ['P{}'.format(i) for i in range(4)],
    'connections': ['W0', 'W1'],
    'gates': [
        'AND({},{},{})'.format('W0', 'P0', 'P1'),
        'AND({},{},{})'.format('W1', 'P2', 'P3'),
        'AND({},{},{})'.format('R0', 'W0', 'W1')
    ]
}
circuit2 = {
    'name': 'circuit2',
    'sensors': ['P{}'.format(i) for i in range(3)],
    'connections': ['W0'],
    'gates': [
        'AND({},{},{})'.format('W0', 'P0', 'P1'),
        'AND({},{},{})'.format('R0', 'P2', 'W0')
    ]
}
circuit3 = {
    'name': 'circuit3',
    'sensors': ['P{}'.format(i) for i in range(2)],
    'connections': [],
    'gates': [
        'AND({},{},{})'.format('R0', 'P0', 'P1')
    ]
}

In [4]:
CONFIG = {
    'single_plasmid': True, #2-plasmids system if False
    'permute_promoter': False,
    'odd_vectors': {
        2: ['1A', '1B*'],
        3: ['1A', '1B', '1C*'],
        4: ['1A', '1B', '1C', '1D'],
        5: ['1A', '1B', '1C', '1D0', '1E'],
        6: ['1A0', '1A1', '1B', '1C', '1D0', '1E'],
        7: ['1A0', '1A1', '1B', '1C', '1D0', '1E0', '1F'],
    },
    'even_vectors': {
        2: ['2A', '2B*'],
        3: ['2A', '2B', '2C*'],
        4: ['2A', '2B', '2C', '2D'],
        5: ['2A', '2B', '2C', '2D0', '2E']
    }
}

#### Gates in Isolation

In [22]:
selected_vectors = CONFIG['odd_vectors'][3] if CONFIG['single_plasmid'] else ['1A-p15a'] + CONFIG['odd_vectors'][2]
selected_vectors

['1A', '1B', '1C*']

In [25]:
#whether to permute promoters or not
if CONFIG['permute_promoter']:
    gates = sensors.merge(sensors, how='cross').merge(ecfs, how='cross').rename(columns={'promoter_x': 'promoter_n', 'promoter_y': 'promoter_c'})
    gates = gates[gates['promoter_n']!=gates['promoter_c']].reset_index(drop=True)
else:
    gates = ecfs.copy()
    gates['promoter_n'] = sensors.iat[0, 0]
    gates['promoter_c'] = sensors.iat[1, 0]

#splitting ECF
gates.rename(columns={'cds': 'ecf_n'}, inplace=True) 
gates['ecf_c'] = gates['ecf_n'] + '_C'
gates['ecf_n'] = gates['ecf_n'] + '_N'

#output promoter
gates['promoter_r'] = pd.DataFrame('P' + gates['ecf_n'].str.split('-', expand=True)[0].str[1:])
reporter = pd.DataFrame(gates['promoter_r']).merge(reps, how='cross') \
           .rename(columns={0: 'promoter_r', 'cds': 'reporter'})

if CONFIG['single_plasmid']:
    gates = pd.merge(reporter, gates, on='promoter_r', how='inner')
    gates = gates[['promoter_r', 'reporter', 'promoter_n', 'promoter_c', 'ecf_n', 'ecf_c']]
else:
    gates = gates[['promoter_n', 'promoter_c', 'ecf_n', 'ecf_c']]
gates['vector'] = '2A'
gates = gates.drop_duplicates().reset_index(drop=True)
gates

Unnamed: 0,promoter_r,reporter,promoter_n,promoter_c,ecf_n,ecf_c,vector
0,P11,sGFP,PBAD,PCin,E11-PhoRadA_N,E11-PhoRadA_C,2A
1,P11,sGFP,PBAD,PSalTTC,E11-PhoRadA_N,E11-PhoRadA_C,2A
2,P11,sGFP,PBAD,PLuxB,E11-PhoRadA_N,E11-PhoRadA_C,2A
3,P11,sGFP,PCin,PBAD,E11-PhoRadA_N,E11-PhoRadA_C,2A
4,P11,sGFP,PCin,PSalTTC,E11-PhoRadA_N,E11-PhoRadA_C,2A
5,P11,sGFP,PCin,PLuxB,E11-PhoRadA_N,E11-PhoRadA_C,2A
6,P11,sGFP,PSalTTC,PBAD,E11-PhoRadA_N,E11-PhoRadA_C,2A
7,P11,sGFP,PSalTTC,PCin,E11-PhoRadA_N,E11-PhoRadA_C,2A
8,P11,sGFP,PSalTTC,PLuxB,E11-PhoRadA_N,E11-PhoRadA_C,2A
9,P11,sGFP,PLuxB,PBAD,E11-PhoRadA_N,E11-PhoRadA_C,2A


In [12]:
counter = 1
reporter = reporter.merge(rbs_rep, how='cross').merge(term_rep, how='cross') \
           .rename(columns={'promoter_r': 'promoter', 'reporter': 'cds'})
reporter['vector'] = selected_vectors[0]
n_term = gates[['promoter_n', 'ecf_n']].rename(columns={'promoter_n': 'promoter', 'ecf_n': 'cds'})
n_term['vector'] = selected_vectors[1]
c_term = gates[['promoter_c', 'ecf_c']].rename(columns={'promoter_c': 'promoter', 'ecf_c': 'cds'})
c_term['vector'] = selected_vectors[2]

level_1 = reporter.append(n_term.append(c_term).merge(rbs, how='cross').merge(term, how='cross'))
level_1 = level_1[['promoter', 'rbs', 'cds', 'terminator', 'vector']].drop_duplicates().reset_index(drop=True)
level_1['name'] = level_1['promoter'] + '-' + level_1['rbs'].str[-2:] + '-' + \
                  level_1['cds'] + '_' + level_1['vector'].str[1:]
level_1['_id'] = 'A' +  (level_1.index.astype(int) + counter).astype(str)
level_1['component'] = 'gate'
level_1

Unnamed: 0,promoter,rbs,cds,terminator,vector,name,_id,component
0,P11,RBS34,sGFP,B0015,1A,P11-34-sGFP_A,A1,gate
1,P11,RBS34,mCherry,B0015,1A,P11-34-mCherry_A,A2,gate
2,P20,RBS34,sGFP,B0015,1A,P20-34-sGFP_A,A3,gate
3,P20,RBS34,mCherry,B0015,1A,P20-34-mCherry_A,A4,gate
4,P38,RBS34,sGFP,B0015,1A,P38-34-sGFP_A,A5,gate
5,P38,RBS34,mCherry,B0015,1A,P38-34-mCherry_A,A6,gate
6,PBAD,RBS33,E11-PhoRadA_N,B0015,1B,PBAD-33-E11-PhoRadA_N_B,A7,gate
7,PBAD,RBS33,E20-gp411_N,B0015,1B,PBAD-33-E20-gp411_N_B,A8,gate
8,PBAD,RBS33,E38-gp418_N,B0015,1B,PBAD-33-E38-gp418_N_B,A9,gate
9,PCin,RBS33,E11-PhoRadA_C,B0015,1C*,PCin-33-E11-PhoRadA_C_C*,A10,gate


In [13]:
counter = 1
level_2 = gates.copy()
if CONFIG['single_plasmid']:
    level_2 = pd.merge(level_2, level_1[['promoter', 'cds', '_id']], left_on=['promoter_r', 'reporter'], right_on=['promoter', 'cds'], how='left') \
              .rename(columns={'_id': '_id_r'})
level_2 = pd.merge(level_2, level_1[['promoter', 'cds', '_id']], left_on=['promoter_n', 'ecf_n'], right_on=['promoter', 'cds'], how='left') \
          .rename(columns={'_id': '_id_n'})
level_2 = pd.merge(level_2, level_1[['promoter', 'cds', '_id']], left_on=['promoter_c', 'ecf_c'], right_on=['promoter', 'cds'], how='left') \
          .rename(columns={'_id': '_id_c'})
cols = gates.columns.tolist() + [col for col in level_2.columns if col.startswith('_id')]
level_2 = level_2[cols].drop_duplicates().reset_index(drop=True)   
level_2['name'] = 'pJ2-' + level_2['promoter_n'] + '-' + level_2['promoter_c'] + '-' + level_2['ecf_n'].str[:3]
level_2['_id'] = 'B' + (level_2.index.astype(int) + counter).astype(str)
level_2['component'] = 'circuit'
level_2

Unnamed: 0,promoter_r,reporter,promoter_n,promoter_c,ecf_n,ecf_c,vector,_id_r,_id_n,_id_c,name,_id,component
0,P11,sGFP,PBAD,PCin,E11-PhoRadA_N,E11-PhoRadA_C,2A,A1,A7,A10,pJ2-PBAD-PCin-E11,B1,circuit
1,P11,mCherry,PBAD,PCin,E11-PhoRadA_N,E11-PhoRadA_C,2A,A2,A7,A10,pJ2-PBAD-PCin-E11,B2,circuit
2,P20,sGFP,PBAD,PCin,E20-gp411_N,E20-gp411_C,2A,A3,A8,A11,pJ2-PBAD-PCin-E20,B3,circuit
3,P20,mCherry,PBAD,PCin,E20-gp411_N,E20-gp411_C,2A,A4,A8,A11,pJ2-PBAD-PCin-E20,B4,circuit
4,P38,sGFP,PBAD,PCin,E38-gp418_N,E38-gp418_C,2A,A5,A9,A12,pJ2-PBAD-PCin-E38,B5,circuit
5,P38,mCherry,PBAD,PCin,E38-gp418_N,E38-gp418_C,2A,A6,A9,A12,pJ2-PBAD-PCin-E38,B6,circuit


#### Gates in Circuit

In [18]:
CONFIG = {
    'single_plasmid': True, #2-plasmids system if False
    'permute_promoter': True,
    'odd_vectors': {
        2: ['1A', '1B*'],
        3: ['1A', '1B', '1C*'],
        4: ['1A', '1B', '1C', '1D'],
        5: ['1A', '1B', '1C', '1D0', '1E'],
        6: ['1A0', '1A1', '1B', '1C', '1D0', '1E'],
        7: ['1A0', '1A1', '1B', '1C', '1D0', '1E0', '1F'],
    },
    'even_vectors': {
        2: ['2A', '2B*'],
        3: ['2A', '2B', '2C*'],
        4: ['2A', '2B', '2C', '2D'],
        5: ['2A', '2B', '2C', '2D0', '2E']
    }
}

circuit_design = circuit3.copy()

In [19]:
num_tus = len(circuit_design['gates']) * 2 + 1 if CONFIG['single_plasmid'] else len(circuit_design['gates']) * 2
selected_vectors = CONFIG['odd_vectors'][num_tus]
selected_vectors

['1A', '1B', '1C*']

In [20]:
prom = None
num_inputs = len(circuit_design['gates']) + 1
if CONFIG['permute_promoter']:
    prom = pd.DataFrame(list(permutations(sensors['promoter'].tolist(), num_inputs)))   
else:
    prom = sensors.T.reset_index(drop=True).iloc[:,:num_inputs]
prom.columns = ['P{}'.format(n) for n in range(num_inputs)]
for n in range(num_inputs):
    prom['P{}*'.format(n)] = 'P{}'.format(n)

In [21]:
def generate_arrangement(circuit, vectors):
    
    arr = pd.DataFrame(list(permutations(ecfs['cds'].tolist(), len(circuit['gates']))))
    if len(arr)==0:
        print('Not enough number of gates!')
    else:
        
        inputs = []
        for gate in circuit['gates']:
            inputs.extend(gate.split('(')[1].replace(')', '').split(',')[1:])
        
        for i, inp in enumerate(inputs):
            if inp.startswith('P'):
                arr[(inp) + '*'] = inp
            elif inp.startswith('W'):
                arr['P{}'.format(i)] = 'P' + arr[int(inp[-1])].str[1:3]
            
        if len(vectors) < len(circuit['gates']):
            print('Not enough vectors!')
            return None, None
        else:
            for i in range(len(circuit['gates'])):
                arr['vector_n_{}'.format(i)] = vectors[i][0]
                arr['vector_c_{}'.format(i)] = vectors[i][1]
            return arr, inputs

        
keys = ['P{}*'.format(n) for n in range(num_inputs)]
vector_iter = iter(selected_vectors[1:]) if CONFIG['single_plasmid'] else iter(selected_vectors)
vector_list = list(zip(vector_iter, vector_iter)) #group vectors into a tuple of two vectors
circuit, inputs = generate_arrangement(circuit_design, vector_list)
circuit = pd.merge(circuit, prom, on=keys, how='left').drop(keys, axis=1)
circuit

Unnamed: 0,0,vector_n_0,vector_c_0,P0,P1
0,E11-PhoRadA,1B,1C*,PBAD,PCin
1,E11-PhoRadA,1B,1C*,PBAD,PSalTTC
2,E11-PhoRadA,1B,1C*,PBAD,PLuxB
3,E11-PhoRadA,1B,1C*,PCin,PBAD
4,E11-PhoRadA,1B,1C*,PCin,PSalTTC
5,E11-PhoRadA,1B,1C*,PCin,PLuxB
6,E11-PhoRadA,1B,1C*,PSalTTC,PBAD
7,E11-PhoRadA,1B,1C*,PSalTTC,PCin
8,E11-PhoRadA,1B,1C*,PSalTTC,PLuxB
9,E11-PhoRadA,1B,1C*,PLuxB,PBAD


In [None]:
gates = circuit.copy()

In [None]:
reporter = reporter.merge(rbs_rep, how='cross').merge(term_rep, how='cross') \
           .rename(columns={'promoter_r': 'promoter', 'reporter': 'cds'})
reporter['vector'] = selected_vectors[0]
reporter

In [None]:
reporter = reporter.merge(rbs_rep, how='cross').merge(term_rep, how='cross') \
           .rename(columns={'promoter_r': 'promoter', 'reporter': 'cds'})
reporter['vector'] = selected_vectors[0]
n_term = gates[['promoter_n', 'ecf_n']].rename(columns={'promoter_n': 'promoter', 'ecf_n': 'cds'})
n_term['vector'] = selected_vectors[1]
c_term = gates[['promoter_c', 'ecf_c']].rename(columns={'promoter_c': 'promoter', 'ecf_c': 'cds'})
c_term['vector'] = selected_vectors[2]

level_1 = reporter.append(n_term.append(c_term).merge(rbs, how='cross').merge(term, how='cross'))
level_1 = level_1[['promoter', 'rbs', 'cds', 'terminator', 'vector']].drop_duplicates().reset_index(drop=True)
level_1['name'] = level_1['promoter'] + '-' + level_1['rbs'].str[-2:] + '-' + \
                  level_1['cds'] + '_' + level_1['vector'].str[1:]
level_1['_id'] = 'A' +  (level_1.index.astype(int) + counter).astype(str)
level_1['component'] = 'gate'
level_1

In [None]:
components = pd.DataFrame()
num_gates = len(circuit_design['gates'])
gates = circuit[[i for i in range(num_gates)]]
inputs = circuit[['P{}'.format(i) for i in range(num_gates*2)]]
vectors = circuit[[col for col in circuit.columns.tolist() if str(col).startswith('vector')]]
for n in range(num_gates):
    temp = pd.concat([inputs.iloc[:,n*2], inputs.iloc[:,n*2+1], gates.iloc[:,n], vectors.iloc[:,n*2], vectors.iloc[:,n*2+1]], axis=1)
    temp.columns = ['promoter_n', 'promoter_c', 'ecf_n', 'vector_n', 'vector_c']
    components = pd.concat([components, temp], axis=0)
components = components.drop_duplicates().reset_index(drop=True)
components['ecf_c'] = components['ecf_n'] + '_C'
components['ecf_n'] = components['ecf_n'] + '_N'
components

In [None]:
output_prom = pd.DataFrame('P' + ecfs['part_name'].str.split('-', expand=True)[0].str[1:])
output_gate = output_prom.merge(rbs_rep, how='cross').merge(reps, how='cross').merge(term_rep, how='cross')
output_gate.columns = ['promoter', 'rbs', 'cds', 'terminator']
output_gate['name'] = output_gate['promoter'] + '-' + output_gate['rbs'].str[-2:] + '-' + \
                           output_gate['cds']
output_gate = output_gate[['name', 'promoter', 'rbs', 'cds', 'terminator']]
if CONFIG['num_plasmids']==2:
    output_gate['vector'] = '1A-p15a'
else:
    output_gate['vector'] = selected_vectors[0]
output_gate['name'] = output_gate['name'] + '_' + output_gate['vector'].str[1:]
output_gate['component'] = 'circuit'
output_gate

In [None]:
level_2 = components.merge(rbs, how='cross').merge(term, how='cross') \
                .merge(rbs, how='cross').merge(term, how='cross') \
                .drop_duplicates().reset_index(drop=True)
level_2.columns = ['promoter_n', 'promoter_c', 'ecf_n', 'vector_n', 'vector_c', 'ecf_c', \
                   'rbs_n', 'terminator_n', 'rbs_c', 'terminator_c']
level_2 = level_2.drop_duplicates().reset_index(drop=True)
level_2['name'] = 'pJ2-' + level_2['promoter_n'] + '-' + level_2['promoter_c'] + '-' + level_2['ecf_n'].str[:3]
level_2['name_n'] = 'pJ1-' + level_2['promoter_n'] + '-' + level_2['ecf_n'].str[:3] + '-N' + '_' + level_2['vector_n'].str[1:]
level_2['name_c'] = 'pJ1-' + level_2['promoter_c'] + '-' + level_2['ecf_c'].str[:3] + '-C' + '_' + level_2['vector_c'].str[1:]
level_2['vector'] = '2A'
gate_level_2 = level_2[['name', 'name_n', 'name_c', 'vector',
                   'promoter_n', 'rbs_n', 'ecf_n', 'terminator_n', 'vector_n',
                   'promoter_c', 'rbs_c', 'ecf_c', 'terminator_c', 'vector_c']]
gate_level_2['promoter'] = 'P' + gate_level_2['ecf_n'].str[1:3]
gate_level_2 = pd.merge(gate_level_2, output_gate, on='promoter', how='left')
gate_level_2.rename(columns={'name_x': 'name', 'name_y': 'name_r', 'vector_x': 'vector'}, inplace=True)
#gate_level_2

In [None]:
n_terms = gate_level_2[['name_n', 'promoter_n', 'rbs_n', 'ecf_n', 'terminator_n', 'vector_n', 'component']]
n_terms.columns = ['name', 'promoter', 'rbs', 'cds', 'terminator', 'vector', 'component']
c_terms = gate_level_2[['name_c', 'promoter_c', 'rbs_c', 'ecf_c', 'terminator_c', 'vector_c', 'component']]
c_terms.columns = ['name', 'promoter', 'rbs', 'cds', 'terminator', 'vector', 'component']
level_1 = n_terms.append(c_terms).drop_duplicates()
start = 0
gate_all_lvl1 = output_gate.append(level_1).reset_index(drop=True)
gate_all_lvl1['_id'] = pd.Series(gate_all_lvl1.index).apply(lambda x: 'C' + str(x+1+start).zfill(3))
gate_all_lvl1[['_id', 'name', 'promoter', 'rbs', 'cds', 'terminator', 'vector', 'component']]

In [None]:
gate_all_lvl1[['_id', 'name', 'promoter', 'rbs', 'cds', 'terminator', 'vector', 'component']].to_csv('datasets/jump/circuit-level-1.csv', index=False)

In [None]:
for n in range(num_gates):
    arr2['ecf_n_{}'.format(n)] = arr2[n] + '_N'
    arr2['ecf_c_{}'.format(n)] = arr2[n] + '_C'
arr2 = arr2.drop([n for n in range(num_gates)], axis=1)
arr2

In [None]:
start = len(gate_all_lvl1)
gate_level_2_id = arr2.copy()
for n in range(num_gates):
    gate_level_2_id = pd.merge(gate_level_2_id, gate_all_lvl1[['_id', 'cds']],
            left_on='ecf_n_{}'.format(n), right_on='cds', how='left') \
            .rename(columns={'_id': '_id_n_{}'.format(n)})
    gate_level_2_id = pd.merge(gate_level_2_id, gate_all_lvl1[['_id', 'cds']],
            left_on='ecf_c_{}'.format(n), right_on='cds', how='left') \
            .rename(columns={'_id': '_id_c_{}'.format(n)})
cols = np.array([['_id_c_{}'.format(n), 'ecf_n_{}'.format(n), 'ecf_c_{}'.format(n)] for n in range(num_gates)]).ravel().tolist()
gate_level_2_id = gate_level_2_id[cols]

for n in range(num_gates):    
    gate_level_2_id = pd.merge(gate_level_2_id, gate_all_lvl1[['_id', 'cds']],
            left_on='ecf_n_{}'.format(n), right_on='cds', how='left') \
            .rename(columns={'_id': '_id_n_{}'.format(n)})
cols = np.array([['_id_n_{}'.format(n), 'ecf_n_{}'.format(n), '_id_c_{}'.format(n), 'ecf_c_{}'.format(n)] for n in range(num_gates)]).ravel().tolist()
gate_level_2_id = gate_level_2_id[cols]
gate_level_2_id

In [None]:
n_terms = circuit_level_2[['name_n', 'promoter_n', 'rbs_n', 'ecf_n', 'terminator_n', 'vector_n']]
n_terms.columns = ['name', 'promoter', 'rbs', 'cds', 'terminator', 'vector']
c_terms = circuit_level_2[['name_c', 'promoter_c', 'rbs_c', 'ecf_c', 'terminator_c', 'vector_c']]
c_terms.columns = ['name', 'promoter', 'rbs', 'cds', 'terminator', 'vector']
level_1 = n_terms.append(c_terms).drop_duplicates().reset_index(drop=True)
level_1['component'] = 'circuit'
level_1

In [None]:
circuit_all_lvl1 = output_gate.append(level_1).reset_index(drop=True)
circuit_all_lvl1

In [None]:
all_lvl1 = gate_all_lvl1.append(circuit_all_lvl1).drop_duplicates().reset_index(drop=True)
all_lvl1['_id'] = pd.Series(all_lvl1.index).apply(lambda x: 'G' + str(x+1).zfill(3))
all_lvl1 = all_lvl1[['_id', 'name', 'promoter', 'rbs', 'cds', 'terminator', 'vector', 'component']]
all_lvl1

In [None]:
all_lvl2 = gate_level_2.append(circuit_level_2).drop_duplicates().reset_index(drop=True) \
            [['name', 'name_n', 'name_c', 'vector', 'component']]
all_lvl2 = pd.merge(all_lvl2, all_lvl1, left_on='name_n', right_on='name', how='left') \
           [['name_x', '_id', 'name_n', 'name_c', 'vector_x', 'component_x']]
all_lvl2.columns = ['name', '_id_n', 'name_n', 'name_c', 'vector', 'component']
all_lvl2 = pd.merge(all_lvl2, all_lvl1, left_on='name_c', right_on='name', how='left') \
            [['name_x', '_id_n', 'name_n', '_id', 'name_c', 'vector_x', 'component_x']]
all_lvl2.columns = ['name', '_id_n', 'name_n', '_id_c', 'name_c', 'vector', 'component']
all_lvl2['_id'] = pd.Series(all_lvl2.index).apply(lambda x: 'G' + str(x+len(all_lvl1)+1).zfill(3))
all_lvl2 = all_lvl2[['_id', 'name', '_id_n', 'name_n', '_id_c', 'name_c', 'vector', 'component']]
all_lvl2