In [None]:
import numpy as np
import toml
import pygsheets
import requests
import re
import itertools as it
from tqdm.auto import tqdm
import Bio.Restriction as Restriction
from Bio.Seq import Seq
from Bio.SeqRecord import SeqRecord

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import paulssonlab.api as api
import paulssonlab.cloning.registry as registry
import paulssonlab.cloning.workflow as workflow
import paulssonlab.cloning.design as design
import paulssonlab.cloning.sequence as sequence
import paulssonlab.cloning.enzyme as enzyme
import paulssonlab.cloning.viennarna as viennarna
import paulssonlab.cloning.thermodynamics as thermodynamics
import paulssonlab.cloning.primers as primers
import paulssonlab.api.geneious as geneious
from paulssonlab.util import grouper

# Setup

In [None]:
config = toml.load("config.toml")

In [None]:
gc = pygsheets.authorize(service_account_file="credentials.json")

In [None]:
geneious_sessionmaker = geneious.connect(**config["geneious"])

In [None]:
reg = registry.Registry(
    gc,
    config["registry"]["folder"],
    geneious_sessionmaker=geneious_sessionmaker,
    geneious_folder="registry",
)

# Config

In [None]:
olib_oligos = reg[("oLIB", "oligos")]
plib_plasmids = reg[("pLIB", "plasmids")]
plib_maps = reg[("pLIB", "maps")]
lib_strains = reg[("LIB", "strains")]
flib_fragments = reg[("fLIB", "fragments")]
part_types = reg[("fLIB", "fragments", "Part types")]

In [None]:
name_prefix = "AM19_"
storage_vector_id = "pLIB112"
source_plasmid_ids = [f"pLIB{i}" for i in range(145, 159)]
part_enzyme = Restriction.BsaI
storage_enzyme = Restriction.BsmBI
bicistronic_enzyme = Restriction.BsmBI
num_random_bases = 6  # random bases to add between enzyme binding site and end of DNA
cds_flanks = ("a", "taataaggt")  # double stop
storage_flanks = (
    flib_fragments.find({"Name": "JUMP_storage_vector_prefix"})["Sequence"],
    flib_fragments.find({"Name": "JUMP_storage_vector_suffix"})["Sequence"],
)
cds_part_type = "CDS_CD"
cds_overhangs = workflow.overhangs_for(part_types[cds_part_type])
bicistronic_overhangs = workflow.overhangs_for(part_types["3G_BD"])
promoter_part_type = "Promoter_AB"
promoter_overhangs = workflow.overhangs_for(part_types[promoter_part_type])
reference = plib_plasmids[source_plasmid_ids[0]]["Reference"]

# deg tags
# tag_part_type = "ClpXP_tag"
# tag_overhangs = workflow.overhangs_for(part_types[tag_part_type])
# library_reverse_primer = "oLIB46"
# placeholder_enzyme = Restriction.BsaI
# tail_length_aa = 3  # aa
# tail_length = tail_length_aa * 3  # nt
# sspb_length_aa = 6  # aa (part of the SspB binding site that is mutagenized with NNK's; note we keep first two alanines fixed)
# sspb_length = sspb_length_aa * 3  # nt
# head_length = 6  # nt

# primers
tm_binding = 60
min_mfe = None

In [None]:
promoter_bounds = (
    "CGCTTAACGATCGTTGGCTG",
    "AGCTGTCACCGGATGTGCTTTCCGGTCTGATGAGTCCGTGAGGACGAAACAG",
)

In [None]:
cds_bounds = ("CGCCCGGAAGAGAGTCAATT", "TAATTGGTAACGAAT")
cds_bounds_override = {
    "pAJM.677": (cds_bounds[0], "TAAGATCCTATTCCAGCGGGATTAAAGAGGAGCGATTAAGC"),
    "pAJM.969": (cds_bounds[0], "TAATATTGAAAAAGGAAGAGT"),
}
cds_offset = 13  # ATG should be 13bp (or in one case, 14bp) after cds_bounds[0]

In [None]:
bicistronic_placeholder = design.golden_gate_placeholder(
    bicistronic_enzyme, None, *bicistronic_overhangs
)

# Marionette parts

## Sequences from plasmid maps

In [None]:
ajm_to_repressor = {}
ajm_to_aux = {}
ajm_to_promoter = {}
id_to_ajm = {}
ajm_to_id = {}
for id_ in source_plasmid_ids:
    plasmid = plib_plasmids[id_]
    orig_name = plasmid["Names"]
    repressor, aux, promoter = re.match(
        r"^(\S+) \+(?: (\S+) \+)? ?(\S+)-YFP Reporter.*", plasmid["Description"]
    ).groups()
    if repressor:
        repressor = name_prefix + repressor
    if aux:
        aux = name_prefix + aux
    if promoter:
        promoter = name_prefix + promoter
    id_to_ajm[id_] = orig_name
    ajm_to_id[orig_name] = id_
    ajm_to_repressor[orig_name] = repressor
    if aux:
        ajm_to_aux[orig_name] = aux
    ajm_to_promoter[orig_name] = promoter

In [None]:
%%time
source_plasmid_maps = {
    plib_plasmids[name]["Names"]: plib_maps[name] for name in source_plasmid_ids
}

In [None]:
%%time
promoter_locations = {
    name: np.array(
        sequence.amplicon_location(
            seq, promoter_bounds[0], sequence.reverse_complement(promoter_bounds[1])
        )
    )
    for name, seq in source_plasmid_maps.items()
}

In [None]:
promoter_source_seqs = {
    name: source_plasmid_maps[name].slice(*loc)
    for name, loc in promoter_locations.items()
}
# add extra, otherwise there's an undesired BsaI site at the 5' end
# TODO: adding this base to promoter_source_seqs instead of forward_overhang means we get a slightly lower Tm than we were expecting
# this was a mistake, would be more elegant to add it to overhang
promoter_source_seqs["pAJM.474"] = Seq("G") + promoter_source_seqs["pAJM.474"]
extra_promoter_description = {
    "pAJM.474": "Prepended a G to the promoter sequence to avoid creating an undesired BsaI site."
}

In [None]:
%%time
cds_locations = {}
for name, seq in source_plasmid_maps.items():
    bounds = cds_bounds_override.get(name, cds_bounds)
    loc = np.array(
        sequence.amplicon_location(
            seq, bounds[0], sequence.reverse_complement(bounds[1])
        )
    )
    loc += np.array([cds_offset, 0])
    cds_locations[name] = loc

In [None]:
cds_locations

In [None]:
def trim_cds(label, seq):
    start = seq.index("atg")
    if start > 0:
        print(f"{label}: trimming CDS start by {start} nt")
    stop = 0
    for idx, codon in enumerate(grouper(seq[start:], 3)):
        codon = "".join(codon)
        if codon in ["taa", "tga", "tag"]:
            stop = len(seq) - start - idx * 3
            print(
                f"{label}: trimming CDS end by {stop} nt ('{codon}' stop codon found)"
            )
            break
    return loc + np.array([start, -stop])

In [None]:
cds_locations_trimmed = {}
for name, loc in cds_locations.items():
    seq = source_plasmid_maps[name].slice(*loc).seq_lower()
    cds_locations_trimmed[name] = trim_cds(f"{name} ({ajm_to_repressor[name]})", seq)

In [None]:
cds_source_seqs = {
    name: source_plasmid_maps[name].slice(*loc)
    for name, loc in cds_locations_trimmed.items()
}

In [None]:
{name: len(seq) for name, seq in cds_source_seqs.items()}

In [None]:
%%time
aux_locations = {}
for name, bounds in cds_bounds_override.items():
    seq = source_plasmid_maps[name]
    loc = np.array(
        sequence.amplicon_location(
            seq, bounds[1], sequence.reverse_complement(cds_bounds[1])
        )
    )
    aux_locations[name] = loc

In [None]:
aux_locations_trimmed = {}
for name, loc in aux_locations.items():
    seq = source_plasmid_maps[name].slice(*loc).seq_lower()
    aux_locations_trimmed[name] = trim_cds(f"{name} ({ajm_to_aux[name]})", seq)

In [None]:
aux_source_seqs = {
    name: source_plasmid_maps[name].slice(*loc)
    for name, loc in aux_locations_trimmed.items()
}

In [None]:
{name: len(seq) for name, seq in aux_source_seqs.items()}

## Sequences from paper

In [None]:
cds_paper_seqs = {
    "pAJM.011": "ATGTCCAGATTAGATAAAAGTAAAGTGATTAACA GCGCATTAGAGCTGCTTAATGAGGTCGGAATCGAAGGTTTAACAACCCGTAAACTCGCCCAGAAGCTAGGTGTAGAGCAGCCTACATTGTATTGGCATGTAAAAAATAAGCGGGC TTTGCTCGACGCCTTAGCCATTGAGATGTTAGATAGGCACCATACTCACTTTTGCCCTTTAGAAGGGGAAAGCTGGCAAGATTTTTTACGTAATAACGCTAAAAGTTTTAGATGT GCTTTACTAAGTCATCGCGATGGAGCAAAAGTACATTTAGGTACACGGCCTACAGAAAAACAGTATGAAACTCTCGAAAATCAATTAGCCTTTTTATGCCAACAAGGTTTTTCAC TAGAGAATGCATTATATGCACTCAGCGCTGTGGGGCATTTTACTTTAGGTTGCGTATTGGAAGATCAAGAGCATCAAGTCGCTAAAGAAGAAAGGGAAACACCTACTACTGATAG TATGCCGCCATTATTACGACAAGCTATCGAATTATTTGATCACCAAGGTGCAGAGCCAGCCTTCTTATTCGGCCTTGAATTGATCATATGCGGATTAGAAAAACAACTTAAATGT GAAAGTGGGTCCTGA",
    "pAJM.336": "ATGAAACCAGTAACGTTATACGATGTCGCAGAGTATGCCGGTGTCTCTTATATG ACCGTTTCCCGCGTGGTGAACCAGGCCAGCCACGTTTCTGCGAAAACGCGGGAAAAAGTGGAAGCGGCGATGGTGGAGCTGAATTACATTCCCAACCGCGTGGCACAACAACTGG CGGGCAAACAGTCGTTGCTGATTGGCGTTGCCACCTCCAGTCTGGCCCTGCACGCGCCGTCGCAAATTGTCGCGGCGATTAAATCTCGCGCCGATCAACTGGGTGCCAGCGTGGT GGTGTCGATGGTAGAACGAAGCGGCGTCGAAGCCTGTAAAGCGGCGGTGCACAATCTTCTCGCGCAACGCGTCAGTGGGCTGATCATTAACTATCCGCTGGATGACCAGGATGCC ATTGCTGTGGAAGCTGCCTGCACTAATGTTCCGGCGTTATTTCTTGATGTCTCTGACCAGACACCCATCAACAGTATTATTTACTCCCATGAGGACGGTACGCGACTGGGCGTGG AGCATCTGGTCGCATTGGGTCACCAGCAAATCGCGCTGTTAGCGGGCCCATTAAGTTCTGTCTCGGCGCGTCTGCGTCTGGCTGGCTGGCATAAATATCTCACTCGCAATCAAAT TCAGCCGATAGCGGAACGGGAAGGCGACTGGAGTGCCATGTCCGGTTTTCAACAAACCATGCAAATGCTGAATGAGGGCATCGTTCCCACTGCGATGCTGGTTGCCAACGATCAG ATGGCGCTGGGCGCAATGCGCGCCATTACCGAGTCCGGGCTGCGCGTTGGTGCGGATATCTCGGTAGTGGGATACGACGATACCGAAGATAGCTCATGTTATATCCCGCCGTTAA CCACCATCAAACAGGATTTTCGCCTGCTGGGGCAAACCAGCGTGGACCGCTTGCTGCAACTCTCTCAGGGCCAGGCGGTGAAGGGCAATCAGCTGTTGCCAGTCTCACTGGTGAA AAGAAAAACCACCCTGGCGCCCAATACGCAAACCGCCTCTCCCCGCGCGTTGGCCGATTCATTAATGCAGCTGGCACGACAGGTTTCCCGACTGGAAAGCGGGCAGTGA",
    "pAJM.474": "ATGAAAAACATAAATGCCGACGACACATACAGAATAATTAATAAAATTAAAGCTT GTAGAAGCAATAATGATATTAATCAATGCTTATCTGATATGACTAAAATGGTACATTGTGAATATTATTTACTCGCGATCATTTATCCTCATTCTATGGTTAAATCTGATATTTC AATCCTAGATAATTACCCTAAAAAATGGAGGCAATATTATGATGACGCTAATTTAATAAAATATGATCCTATAGTAGATTATTCTAACTCCAATCATTCACCAATTAATTGGAAT ATATTTGAAAACAATGCTGTAAATAAAAAATCTCCAAATGTAATTAAAGAAGCGAAAACATCAGGTCTTATCACTGGGTTTAGTTTCCCTATTCATACGGCTAACAATGGCTTCG GAATGCTTAGTTTTGCACATTCAGAAAAAGACAACTATATAGATAGTTTATTTTTACATGCGTGTATGAACATACCATTAATTGTTCCTTCTCTAGTTGATAATTATCGAAAAAT AAATATAGCAAATAATAAATCAAACAACGATTTAACCAAAAGAGAAAAAGAATGTTTAGCGTGGGCATGCGAAGGAAAAAGCTCTTGGGATATTTCAAAAATATTAGGTTGCAGT GAGCGTACTGTCACTTTCCATTTAACCAATGCGCAAATGAAACTCAATACAACAAACCGCTGCCAAAGTATTTCTAAAGCAATTTTAACAGGAGCAATTGATTGCCCATACTTTAAAAATTGA",
    "pAJM.657": "ATGAGCCCGAAACGTCGTAC CCAGGCAGAACGTGCAATGGAAACCCAGGGTAAACTGATTGCAGCAGCACTGGGTGTTCTGCGTGAAAAAGGTTATGCAGGTTTTCGTATTGCAGATGTTCCGGGTGCAGCCGGT GTTAGCCGTGGTGCACAGAGCCATCATTTTCCGACCAAACTGGAACTGCTGCTGGCAACCTTTGAATGGCTGTATGAGCAGATTACCGAACGTAGCCGTGCACGTCTGGCAAAAC TGAAACCGGAAGATGATGTTATTCAGCAGATGCTGGATGATGCAGCAGAATTTTTTCTGGATGATGATTTTAGCATCGGCCTGGATCTGATTGTTGCAGCAGATCGTGATCCGGC ACTGCGTGAAGGTATTCAGCGTACCGTTGAACGTAATCGTTTTGTTGTTGAAGATATGTGGCTGGGTGTGCTGGTGAGCCGTGGTCTGAGCCGTGATGATGCCGAAGATATTCTG TGGCTGATTTTTAACAGCGTTCGTGGTCTGGTAGTTCGTAGCCTGTGGCAGAAAGATAAAGAACGTTTTGAACGTGTGCGTAATAGCACCCTGGAAATTGCACGTGAACGTTATG CAAAATTCAAACGTTGA",
    "pAJM.661": "ATGGTTCGTCGTACCAAAGAAGAGGCACAAGAAACCCGTGCACAGATTATTGAAGCAGC AGAACGTGCATTCTATAAACGTGGTGTTGCACGTACCACCCTGGCAGATATTGCAGAACTGGCAGGCGTTACCCGTGGTGCAATTTATTGGCATTTTAACAACAAAGCCGAACTG GTTCAGGCACTGCTGGATAGCCTGCATGAAACCCATGATCATCTGGCACGTGCAAGCGAAAGCGAAGATGAAGTTGATCCGCTGGGTTGTATGCGTAAACTGCTGCTGCAGGTTT TTAATGAACTGGTTCTGGATGCACGTACCCGTCGTATTAATGAAATCCTGCATCACAAATGCGAGTTCACCGATGATATGTGTGAAATTCGTCAGCAGCATCAGAGCGCAGTTCT GGATTGTCATAAAGGTATTACCCTGACACTGGCAAATGTAGTTCGTCGTGGTCAGCTGCCTGGTGAACTGGATGCAGAACGTGCCGCAGTTGCAATGTTTGCCTATGTTGATGGT CTGATTCGTCGTTGGCTGCTGCTGCCGGATAGCGTTGATCTGCTGGGTGATGTTGAAAAATGGGTTGATACCGGTCTGGATATGCTGCGTCTGAGTCCGGCACTGCGTAAATAAT GA",
    "pAJM.677": "ATGGCTGAAGCGCAAAATGATCCCCTGCTGCCGGGATACTCGTTTAATGCC CATCTGGTGGCGGGTTTAACGCCGATTGAGGCCAACGGTTATCTCGATTTTTTTATCGACCGACCGCTGGGAATGAAAGGTTATATTCTCAATCTCACCATTCGCGGTCAGGGGG TGGTGAAAAATCAGGGACGAGAATTTGTTTGCCGACCGGGTGATATTTTGCTGTTCCCGCCAGGAGAGATTCATCACTACGGCCGTCATCCGGAGGCTCGCGAATGGTATCACCA GTGGGTTTACTTTCGTCCGCGCGCCTACTGGCATGAATGGCTTAACTGGCCGTCAATATTTGCCAATACGGGGTTCTTTCGCCCGGATGAAGCGCACCAGCCGCATTTCAGCGAC TTTTTTGGGCAAATCATTAACGCCGGGCAAGGGGAAGGGCGCTATTCGGAGCTGCTGGCGATAAATCTGCTTGAGCAATTGTTACTGCGGCGCATGCTAGCGATTAACGGATCGC TCCATCCACCGATGGATAATCGGGTACGCGAGGCTTGTCAGTACATCAGCGATCACCTGGCAGACAGCAATTTTGATATCGCCAGCGTCGCACAGCATGTTTGCTTGTCGCCGTC GCGTCTGTCACATCTTTTCCGCCAGCAGTTAGGGATTAGCGTCTTAAGCTGGCGCGAGGACCAACGTATCAGCCAGGCGAAGCTGCTTTTGAGCACCACCCGGATGCCTATCGCC ACCGTCGGTCGCAATGTTGGTTTTGACGATCAACTCTATTTCTCGCGGGTATTTAAAAAATGCACCGGGGCCAGCCCGAGCGAGTTCCGTGCCGGTTTGGAAGAAAAAGTGAATG ATGTAGCCGTCAAGTTGTCATGA",
    "pAJM.683": "ATGCCGAAACTGGGTATGCAGAGCATTCGTCGTCGTCAGCTGATTGA TGCAACCCTGGAAGCAATTAATGAAGTTGGTATGCATGATGCAACCATTGCACAGATTGCACGTCGTGCCGGTGTTAGCACCGGTATTATTAGCCATTATTTCCGCGATAAAAAC GGTCTACTGGAAGCAACCATGCGTGATATTACCAGCCAGCTGCGTGATGCAGTTCTGAATCGTCTGCATGCACTGCCGCAGGGTAGCGCAGAACAGCGTCTGCAGGCAATTGTTG GTGGTAATTTTGATGAAACCCAGGTTAGCAGCGCAGCAATGAAAGCATGGCTGGCATTTTGGGCAATCAGCATGCATCAGCCGATGCTGTATCGTCTGCAGCAGGTTAGCAGTCG TCGTCTGCTGAGCAATCTGGTTAGCGAATTTCGTCGTGAACTGCCTCGTGAACAGGCACAAGAGGCAGGTTATGGTCTGGCAGCACTGATTGATGGTCTGTGGCTGCGTGCAGCA CTGAGCGGTAAACCGCTGGATAAAACCCGTGCAAATAGCCTGACCCGTCATTTTATCACCCAGCATCTGCCGACCGATTGA",
    "pAJM.690": "ATGTGGTCGAACATGGA TGACAAGAAAGTGAAAGAGGAGAATATTCTGCACAATTCCACCAACAAGAAGATCATCCGCCACGAAGATTTTGTAGCCGGCATTAGCAAAGGGATGGCGATTCTGGATTCGTTT GGTACAGATCGTCATCGCCTCAATATCACCATGGCCGCAGAGAAAACCGGTATGACACGTGCAGCAGCTCGTCGCCACCTGCTTACTCTGGAGTATCTGGGCTATCTGGAAAGTG ACGGCCACTACTTCTACTTAACTCCCAAAATCCTGAAATTCAGTGGTTCATATTTGGGTGGTGCTCAATTGCCGAAAATTTCCCAACCACTGTTGAACTTGCTTACGACCCAGAC CAGCCTGATTTACAGCGTGATGGTGTTGGATGGCTATGAAGCCATTACCATTGCGCGTTCTGCCGCTCATCAGCAAACCGACCGCGTTAACCCGTATGGTTTACATCTCGGGAAT CGCTTACCAGCGCATACAACGTCAGCGGGCAAAATCCTGTTAGCGTATTTGGATGACCATGCCCAGCAAGAGTGGCTCAATCAGTACCCTCTGCAACGGCTCACGAAATACACGT ATACCAACCACATCGACTTTCTGCGCCTTTTGAGTGAAATCAAGGAACAGGGTTGGTGCTATAGTTCGGAAGAACACGAACTGGGAGTACACGCCCTTGCGGTTCCGATTTACGG ACAGCAGTCTCGCGTCGTAGCGGCACTGAACATTGTCAGCCCGACAATGCGGACCACGAAAGAATACCTGATTCAGCATATTCTGCCGTTACTGCAAGAAACTGCGCGTGAATTG CGCAATATCCTGTAATGA",
    "pAJM.771": "ATGGAACTGCGTGACCTGGATTTAAACCTGCTGGTGGTGTTCAACCAGTTGCTGGTCGACAGACGCGTCTCTGTCACTGCGGAGAACCTGGGCCTGACCCAGCCTGCCG TGAGCAATGCGCTGAAACGCCTGCGCACCTCGCTACAGGACCCACTCTTCGTGCGCACACATCAGGGAATGGAACCCACACCCTATGCCGCGCATCTGGCCGAGCACGTCACTTC GGCCATGCACGCACTGCGCAACGCCCTACAGCACCATGAAAGCTTCGATCCGCTGACCAGCGAGCGTACCTTCACCCTGGCCATGACCGACATTGGCGAGATCTACTTCATGCCG CGGCTGATGGATGCGCTGGCTCACCAGGCCCCCAATTGCGTGATCAGTACGGTGCGCGACAGTTCGATGAGCCTGATGCAGGCCTTGCAGAACGGAACCGTGGACTTGGCCGTGG GCCTGCTTCCCAATCTGCAAACTGGCTTCTTTCAGCGCCGGCTGCTCCAGAATCACTACGTGTGCCTATGTCGCAAGGACCATCCAGTCACCCGCGAACCCCTGACTCTGGAGCG CTTCTGTTCCTACGGCCACGTGCGTGTCATCGCCGCTGGCACCGGCCACGGCGAGGTGGACACGTACATGACACGGGTCGGCATCCGGCGCGACATCCGTCTGGAAGTGCCGCAC TTCGCCGCCGTTGGCCACATCCTCCAGCGCACCGATCTGCTCGCCACTGTGCCGATATGTTTAGCCGACTGCTGCGTAGAGCCCTTCGGCCTAAGCGCCTTGCCGCACCCAGTCG TCTTGCCTGAAATAGCCATCAACATGTTCTGGCATGCGAAGTACCACAAGGACCTAGCCAATATTTGGTTGCGGCAACTGATGTTTGACCTGTTTACGGATTGA",
    "pAJM.773": "ATGGACATGCCTCGTATTAAACCGGGTCAGCGTGTTATGATGGCACTGCGTAAAATGAT TGCAAGCGGTGAAATCAAAAGTGGTGAACGTATTGCAGAAATTCCGACCGCAGCAGCACTGGGTGTTAGCCGTATGCCGGTTCGTATCGCACTGCGTTCACTGGAACAAGAAGGT CTGGTTGTTCGTCTGGGTGCACGTGGTTATGCAGCCCGTGGTGTTAGCAGCGATCAGATTCGTGATGCAATTGAAGTTCGTGGTGTTCTGGAAGGTTTTGCAGCACGTCGTCTGG CAGAACGTGGTATGACCGCAGAAACCCATGCACGTTTTGTTGTACTGATTGCAGAAGGTGAAGCACTGTTTGCAGCCGGTCGCCTGAATGGTGAAGATCTGGATCGTTATGCCGC ATATAATCAGGCATTTCATGATACCCTGGTTAGCGCAGCAGGTAATGGTGCAGTTGAAAGCGCACTGGCACGTAATGGTTTTGAACCGTTTGCAGCAGCCGGTGCACTGGCCCTG GATCTGATGGACCTGTCTGCCGAATATGAACATCTGCTGGCAGCACATCGTCAGCATCAGGCAGTTCTGGATGCAGTTAGCTGTGGTGATGCCGAAGGTGCAGAACGTATTATGC GTGATCATGCACTGGCAGCAATTCGTAATGCAAAAGTTTTTGAAGCAGCAGCAAGCGCAGGCGCACCGCTGGGTGCAGCATGGTCAATTCGTGCAGATTGA",
    "pAJM.847": "ATGGCACGTACCCCGAGCCGTAGCAGCATTGGTAGCCTGCGTAGTCCGCATACCCATAAAGCAATTCTGACCAGCACCATTGAAATCCTGAAAGAATGTGGTTATAGCGGTCTGAGCATTGAAAGCGTGGCACGTCGCGCCGGTGCAGGCAAACCGACCATTTATCGTTGGTGGACCAACAAAGCAGCACTGATTGCCGAAGTGTATGAAAATGAAATCGAACAGGTACGTAAATTTCCGGATTTGGGTAGCTTTAAAGCCGATCTGGATTTTCTGCTGCATAATCTGTGGAAAGTTTGGCGTGAAACCATTTGTGGTGAAGCATTTCGTTGTGTTATTGCAGAAGCACAGTTGGACCCTGTAACCCTGACCCAACTGAAAGATCAGTTTATGGAACGTCGTCGTGAGATACCGAAAAAACTGGTTGAAGATGCCATTAGCAATGGTGAACTGCCGAAAGATATCAATCGTGAACTGCTGCTGGATATGATTTTTGGTTTTTGTTGGTATCGCCTGCTGACCGAACAGTTGACCGTTGAACAGGATATTGAAGAATTTACCTTCCTGCTGATTAATGGTGTTTGTCCGGGTACACAGTGTTGA",
    "pAJM.884": "ATGCCGCTGACCGACACCCCGCCGTCTGTTCCGCAG AAACCGCGTCGTGGTCGTCCGCGTGGTGCTCCGGACGCTTCTCTTGCTCACCAGTCTCTGATCCGTGCTGGTCTGGAACACCTGACCGAAAAAGGTTACTCTTCGGTTGGTGTTG ACGAAATCCTGAAAGCTGCTCGTGTTCCGAAAGGTTCTTTCTACCACTACTTCCGTAACAAAGCTGACTTCGGTCTGGCTCTGATCAAAGCTTACGACACCTACTTCGCTCGTCT CCTCGACCAGGCGTTCCTGGACGGTTCGCTGGCTCCGCTGGCTCGTCTGCGTCTGTTCACCCGTATGGCTGAAGAAGGTATGGCTCGTCACGGTTTCCGTCGTGGTTGCCTGGTT GGTAACCTGGGTCAGGAAATGGGCGCTCTGCCGGACGACTTCCGTGCTGCTCTGATCGGTGTTCTGGAAACCTGGCAACATCGTACCGCTCAGCTGTTCCGTGAAGCTCAGGCTT GCGGTGAACTGTCTGCTGACCATGACCCGGACGCTCTGGCTGAAGCTTTCTGGATCGGATGGGAAGGTGCTATCCTGCGTGCTAAACTGGAACTGCGTCCGGACCCGATGCACTC TTTCACCCGTACCTTCGGTCGTCACTTCGTTACCCGTACCCAGGAATAATGA",
    "pAJM.969": "ATGCCTCGTCCGAAACTGAAAAGTGATGATGAAGTTCTGGAAGCAGCAACCGTTGTTC TGAAACGTTGTGGTCCGATTGAATTTACCCTGAGCGGTGTTGCAAAAGAAGTTGGTCTGAGTCGCGCAGCACTGATTCAGCGTTTTACCAATCGTGATACCCTGCTGGTTCGTAT GATGGAACGTGGTGTTGAACAGGTTCGTCATTATCTGAATGCAATTCCGATTGGTGCAGGTCCGCAGGGTCTGTGGGAATTTCTGCAGGTTCTGGTTCGTAGCATGGATACCCGT AATGATTTCAGCGTGAACTATCTGATCAGCTGGTATGAACTGCAGGTTCCGGAACTGCGTACCCTGGCAATTCAGCGTAATCGTGCAGTTGTTGAAGGTATTCGTAAACGTCTGC CTCCGGGTGCACCGGCAGCAGCAGAACTGCTGCTGCATAGCGTTATTGCCGGTGCAACCATGCAGTGGGCAGTTGATCCGGATGGTGAACTGGCAGATCATGTTCTGGCACAGAT TGCAGCAATTCTGTGTCTGATGTTTCCGGAACATGATGATTTTCAGCTGCTGCAGGCACATGCATAA",
    "pAJM.1642": "ATGATTGAGAATACCTATAGCGAAAAGTTCGAGTCCGCGTTCGAACAGATCAAAGCGGCGGCCAACGTGGATGCCGCCATCCGTATTCTCCAGGCGGAATATAACCTCGATTTC GTCACCTACCATCTCGCCCAGACAATCGCGAGCAAGATCGATTCGCCCTTCGTGCGCACCACCTATCCGGATGCCTGGGTTTCCCGTTACCTCCTCAACTGCTATGTGAAGGTCG ATCCGATCATCAAGCAGGGCTTCGAACGCCAGCTGCCCTTCGACTGGAGCGAGGTCGAACCGACGCCGGAGGCCTATGCCATGCTGGTCGACGCCCAGAAACACGGCATCGATGA CAATGGCTACTCCATCCCCGTCGCCGACAAGGCGCAGCGCCGCGCCCTGCTGTCGCTGAATGCCCATATACCGGCCGACGAATGGACCGAGCTCGTGCGCCGCTGCCGCAATGAG TGGATCGAGATCGCCCATCTGATCCACCGCAAGGCCGTATATGAGCTGCATGGCGAAAACGATCCGGTGCCGGCATTGTCGCCGCGCGAGATCGAGTGTCTGCACTGGACCGCCC TCGGCAAGGATTACAAGGATATTTCGGTCATCCTGGGCATATCAGAGCATACCACACGCGATTACCTGAAAACCGCCCGCTTCAGGCTCGGCTGCACCACGATCTCGGCCGCCGC GTCGCGGGCTGTTCAATTGCGCATCATCAATCCCTATAGGATCCGCATGACGCGACGTAATTGGTAA",
}
cds_paper_seqs = {k: v.replace(" ", "") for k, v in cds_paper_seqs.items()}


aux_paper_seqs = {
    "pAJM.677": "ATGGTTACTATCAATACGGAATCTGCTTTAACGCCACGTTCTTTGCGGGAT ACGCGGCGTATGAATATGTTTGTTTCGGTAGCTGCTGCGGTCGCAGGATTGTTATTTGGTCTTGATATCGGCGTAATCGCCGGAGCGTTGCCGTTCATTACCGATCACTTTGTGC TGACCAGTCGTTTGCAGGAATGGGTGGTTAGTAGCATGATGCTCGGTGCAGCAATTGGTGCGCTGTTTAATGGTTGGCTGTCGTTCCGCCTGGGGCGTAAATACAGCCTGATGGC GGGGGCCATCCTGTTTGTACTCGGTTCTATAGGGTCCGCTTTTGCGACCAGCGTAGAGATGTTAATCGCCGCTCGTGTGGTGCTGGGCATTGCTGTCGGGATCGCGTCTTACACC GCTCCTCTGTATCTTTCTGAAATGGCAAGTGAAAACGTTCGCGGTAAGATGATCAGTATGTACCAGTTGATGGTCACACTCGGCATCGTGCTGGCGTTTTTATCCGATACAGCGT TCAGTTATAGCGGTAACTGGCGCGCAATGTTGGGGGTTCTTGCTTTACCAGCAGTTCTGCTGATTATTCTGGTAGTATTTCTGCCAAATAGCCCGCGCTGGCTGGCGGAAAAGGG GCGTCATATTGAGGCGGAAGAAGTATTGCGTATGCTGCGCGATACGTCGGAAAAAGCGCGAGAAGAACTCAACGAAATTCGTGAAAGCCTGAAGTTAAAACAGGGCGGTTGGGCA CTGTTTAAGATCAACCGTAACGTCCGTCGTGCTGTGTTTCTCGGTATGTTGTTGCAGGCGATGCAGCAGTTTACCGGTATGAACATCATCATGTACTACGCGCCGCGTATCTTCA AAATGGCGGGCTTTACGACCACAGAACAACAGATGATTGCGACTCTGGTCGTAGGGCTGACCTTTATGTTCGCCACCTTTATTGCGGTGTTTACGGTAGATAAAGCAGGGCGTAA ACCGGCTCTGAAAATTGGTTTCAGCGTGATGGCGTTAGGCACTCTGGTGCTGGGCTATTGCCTGATGCAGTTTGATAACGGTACGGCTTCCAGTGGCTTGTCCTGGCTCTCTGTT GGCATGACGATGATGTGTATTGCCGGTTATGCGATGAGCGCCGCGCCAGTGGTGTGGATCCTGTGCTCTGAAATTCAGCCGCTGAAATGCCGCGATTTCGGTATTACCTGTTCGA CCACCACGAACTGGGTGTCGAATATGATTATCGGCGCGACCTTCCTGACACTGCTTGATAGCATTGGCGCTGCCGGTACGTTCTGGCTCTACACTGCGCTGAACATTGCGTTTGT GGGCATTACTTTCTGGCTCATTCCGGAAACCAAAAATGTCACGCTGGAACATATCGAACGCAAACTGATGGCAGGCGAGAAGTTGAGAAATATCGGCGTCTGA",
    "pAJM.969": "ATGAACGAGAAAAATATAAAACACAGT CAAAACTTTATTACTTCAAAACATAATATAGATAAAATAATGACAAATATAAGATTAAATGAACATGATAATATCTTTGAAATCGGCTCAGGAAAAGGCCATTTTACCCTTGAAT TAGTAAAGAGGTGTAATTTCGTAACTGCCATTGAAATAGACCATAAATTATGCAAAACTACAGAAAATAAACTTGTTGATCACGATAATTTCCAAGTTTTAAACAAGGATATATT GCAGTTTAAATTTCCTAAAAACCAATCCTATAAAATATATGGTAATATACCTTATAACATAAGTACGGATATAATACGCAAAATTGTTTTTGATAGTATAGCTAATGAGATTTAT TTAATCGTGGAATACGGGTTTGCTAAAAGATTATTAAATACAAAACGCTCATTGGCATTACTTTTAATGGCAGAAGTTGATATTTCTATATTAAGTATGGTTCCAAGAGAATATT TTCATCCTAAACCTAAAGTGAATAGCTCACTTATCAGATTAAGTAGAAAAAAATCAAGAATATCACACAAAGATAAACAAAAGTATAATTATTTCGTTATGAAATGGGTTAACAA AGAATACAAGAAAATATTTACAAAAAATCAATTTAACAATTCCTTAAAACATGCAGGAATTGACGATTTAAACAATATTAGCTTTGAACAATTCTTATCTCTTTTCAATAGCTAT AAATTATTTAATAAGTGA",
}
aux_paper_seqs = {k: v.replace(" ", "") for k, v in aux_paper_seqs.items()}

promoter_paper_seqs = {
    "pAJM.011": "TTTTCAGCAGGACGCACTGACCTCCCTATCAGTGATAGAGATTGACATCCCTATCAGTGATAGAGATACTGAGCAC",
    "pAJM.336": "TGTTGACAATTAATCATCGGCTCGTATAATGTGTGGAATTGTGAGCGCTCACAATT",
    "pAJM.474": "ACCTGTAGGATCGTACAGGTTTACGCAAGAAAATGGTTTGTTACAGTCGAATAAA",
    "pAJM.657": "AACAAACAGACAATCTGGTCTGTTTGTATTATGGAAAATTTTTCTGTATAATAGATTCAACAAACAGACAATCTGGTCTGTTTGTATTAT",
    "pAJM.661": "CACCCAGCAGTATTTACAAACAACCATGAATGTAAGTATATTCCTTAGCAA",
    "pAJM.677": "AGAAACCAATTGTCCATATTGCATCAGACATTGCCGTCACTGCGTCTTTTACTGGCTCTTCTCGCTAACCAAACCGGTAACCCCGCTTATTAAAAGCATTCTGTAACAAAGCGGGACCAAAGCCATGACAAAAACGCGTAACAAAAGTGTCTATAATCACGGCAGAAAAGTCCACATTGATTATTTGCACGGCGTCACACTTTGCTATGCCATAGCATTTTTATCCATAAGATTAGCGGATCCTACCTGACGCTTTTTATCGCAACTCTCTACTGTTTCTCCATACCCG",
    "pAJM.683": "AGCGCGGGTGAGAGGGATTCGTTACCAATAGACAATTGATTGGACGTTCAATATAATGCTAGC",
    "pAJM.690": "TTTTGTTCGATTATCGAACAAATTATTGAAATATCGAACAAAACCTCTAAACTACTGTGGCACTGAATCAAAAAATTATAAACCATGATCAGA",
    "pAJM.771": "GGGGCCTCGCTTGGGTTATTGCTGGTGCCCGGCCGGGCGCAATATTCATGTTGATGATTTATTATATATCGAGTGGTGTATTTATTTATATTGTTTGCTCCGTTACCGTTATTAAC",
    "pAJM.773": "ATTGGATCCAATTGACAGCTAGCTCAGTCCTAGGTACCATTGGATCCAAT",
    "pAJM.847": "CGACGTACGGTGGAATCTGATTCGTTACCAATTGACATGATACGAAACGTACCGTATCGTTAAGGT",
    "pAJM.884": "CGCTAGCAAGTAAGGCCGACGCTTCACAACCGCACTTGATTTA ATAGACCATACCGTCTATTATTTCTGGCCAT",
    "pAJM.969": "GGATTGAATATAACCGACGTGACTGTTACATTTAGGTGGCTAA ACCCGTCAA",
    "pAJM.1642": "CCCTTTGTGCGTCCAAACGGACGCACGGCGCTCTAAAGCGGGT CGCGATCTTTCAGATTCGCTCCTCGCGCTTTCAGTCTTTGTTTTGGCGCATGTCGTTATCGCAAAACCGCTGCACACTTTTGCGCGACATGCTCTGATCCCCCTCATCTGGGGGG GCCTATCTGAGGGAATTTCCGATCCGGCTCGCCTGAACCATTCTGCTTTCCACGAACTTGAAAACGCT",
}
promoter_paper_seqs = {k: v.replace(" ", "") for k, v in promoter_paper_seqs.items()}

## Sequence comparison: paper SI vs. plasmid maps

In [None]:
def cds_equal_except_stops(seq1, seq2):
    if len(seq1) > len(seq2):
        seq1, seq2 = seq2, seq1
    if seq1 != seq2[: len(seq1)]:
        return False
    tail = seq2[len(seq1) :]
    for codon in grouper(tail, 3):
        codon = "".join(codon)
        if codon.lower() not in ("taa", "tga", "tag"):
            return False
    return True

### Promoters

In [None]:
for name in promoter_source_seqs.keys():
    source_seq = promoter_source_seqs[name].seq_lower()
    paper_seq = promoter_paper_seqs[name].lower()
    # allow paper sequence to have a tga that follows a taa
    if not (source_seq == paper_seq):
        print(name)
        print(source_seq)
        print("-")
        print(paper_seq)
        print()

### CDSes

In [None]:
for name in cds_source_seqs.keys():
    source_seq = cds_source_seqs[name].seq_lower()
    paper_seq = cds_paper_seqs[name].lower()
    # allow paper sequence to have an
    if not cds_equal_except_stops(source_seq, paper_seq):
        print(name)
        print(source_seq)
        print("-")
        print(paper_seq)
        print()

### Auxiliary CDSes

In [None]:
for name in aux_source_seqs.keys():
    source_seq = aux_source_seqs[name].seq_lower()
    paper_seq = aux_paper_seqs[name].lower()
    # allow paper sequence to have a tga that follows a taa
    if not cds_equal_except_stops(source_seq, paper_seq):
        print(name)
        print(source_seq)
        print("-")
        print(paper_seq)
        print()

## Check if we already have identical CDSes

In [None]:
murray_3g_parts = {
    row["Name"]: reg.get(id_)["_seq"]
    for id_, row in flib_fragments.items()
    if "Adam Meyer" in row["Description"] and "Richard Murray lab" in row["Author"]
}

In [None]:
murray_3g_seqs = {seq.seq_lower(): name for name, seq in murray_3g_parts.items()}

In [None]:
murray_3g_parts.keys()

## Check for restriction sites

In [None]:
Restriction.Pa

In [None]:
enzymes = (Restriction.BsaI, Restriction.BsmBI, Restriction.BbsI, Restriction.AarI)#, Restriction.PaqCI)
for enz in enzymes:
    names_with_cuts = []
    for name, seq in cds_source_seqs.items():
        cuts = enzyme.re_search(seq, enz)
        if cuts:
            names_with_cuts.append(f"{ajm_to_repressor[name]} ({name})")
    print(f"{enz} ({len(names_with_cuts)}): {', '.join(names_with_cuts)}")

## CDS primers

In [None]:
digested_storage_vector = workflow.re_digest_part(
    plib_maps[storage_vector_id], storage_enzyme
)

In [None]:
%%time

row_base = {"Author": "Jacob Quinn Shenker", "Date": workflow.date()}

oligo_base = {
    **row_base,
    "Order date": workflow.date(),
    "Vendor": "Genewiz",
    "Type": "Primer",
}

plasmid_base = {
    "Origin": "pUC",
    "Marker": "amp",
    "Reference": reference,
}

strain_base = {
    "Species": "E. coli",
    "Background": "MG1655",
    "Parent": "MG1655",
    "Marker": "amp",
}

part_base = {
    **row_base,
    "Species/codon usage": "E. coli",
    "Reference": reference,
}

apply = {"Sequence": workflow.normalize_seq}
plasmid_apply = strain_apply = {"Names": None}
overwrite = True

primer_ids = {}
primer_pairs = {}
part_seqs = {}
pcr_seqs = {}
storage_plasmid_seqs = {}

source_seqs_all = list(
    it.chain(
        (
            (ajm_to_promoter[ajm], ajm_to_id[ajm], ajm, "promoter", seq)
            for ajm, seq in promoter_source_seqs.items()
        ),
        (
            (ajm_to_repressor[ajm], ajm_to_id[ajm], ajm, "cds", seq)
            for ajm, seq in cds_source_seqs.items()
        ),
        (
            (ajm_to_aux[ajm], ajm_to_id[ajm], ajm, "aux", seq)
            for ajm, seq in aux_source_seqs.items()
        ),
    )
)

for name, id_, ajm, type_, source_seq in tqdm(source_seqs_all):
    if type_ == "cds":
        variants = [
            "bicistronic",
            None,
        ]  # with overwrite=True, names are set by the last variant, hence the vanilla variant last
    else:
        variants = [None]
    for variant in variants:
        variant_name = name
        if variant:
            variant_name += f"_{variant}"
        seq_forward = workflow.normalize_seq(source_seq)
        seq_reverse = workflow.normalize_seq(sequence.reverse_complement(source_seq))
        anneal_promoter = False
        if type_ in ("cds", "aux"):
            # TODO: squeeze?
            part_seq = workflow.normalize_seq(
                cds_flanks[0] + source_seq + cds_flanks[1]
            )
            forward_overhang = storage_flanks[0] + cds_flanks[0]
            forward_overhang = (
                design.random_bases(num_random_bases, seed=forward_overhang)
                + forward_overhang
            )
            if variant == "bicistronic":
                # TODO: replace hard-coded squeeze with automatic squeeze
                part_seq = workflow.normalize_seq(
                    part_seq[:-3] + bicistronic_placeholder
                )
                reverse_overhang = sequence.reverse_complement(
                    cds_flanks[1][:-3] + bicistronic_placeholder + storage_flanks[1]
                )
            else:
                reverse_overhang = sequence.reverse_complement(
                    cds_flanks[1] + storage_flanks[1]
                )
            reverse_overhang = (
                design.random_bases(num_random_bases, seed=reverse_overhang)
                + reverse_overhang
            )
        elif type_ == "promoter":
            oligo_name = f"marionette_prom_{variant_name}"
            part_seq = workflow.normalize_seq(
                promoter_overhangs[0] + source_seq + promoter_overhangs[1]
            )
            forward_overhang = storage_flanks[0] + promoter_overhangs[0]
            forward_overhang = (
                design.random_bases(num_random_bases, seed=forward_overhang)
                + forward_overhang
            )
            reverse_overhang = sequence.reverse_complement(
                promoter_overhangs[1] + storage_flanks[1]
            )
            reverse_overhang = (
                design.random_bases(num_random_bases, seed=reverse_overhang)
                + reverse_overhang
            )
            annealed_seq = (
                forward_overhang
                + source_seq
                + sequence.reverse_complement(reverse_overhang)
            )
            if len(annealed_seq) <= 100:
                anneal_promoter = True
        else:
            raise ValueError(f"unexpected type '{type_}'")
        part_seqs[variant_name] = part_seq
        if not anneal_promoter:
            forward_primer = next(
                primers.iter_primers(
                    seq_forward,
                    overhang=forward_overhang,
                    min_tm=tm_binding,
                    min_mfe=min_mfe,
                    anchor="5prime",
                )
            )
            reverse_primer = next(
                primers.iter_primers(
                    seq_reverse,
                    overhang=reverse_overhang,
                    min_tm=tm_binding,
                    min_mfe=min_mfe,
                    anchor="5prime",
                )
            )
            primer_pair = primers.PrimerPair(forward_primer, reverse_primer)
            primer_pairs[variant_name] = primer_pair
            try:
                pcr_seq = sequence.pcr(
                    plib_maps[id_],
                    forward_primer.seq,
                    reverse_primer.seq,
                    min_score=(
                        len(forward_primer.binding),
                        len(reverse_primer.binding),
                    ),
                )
            except Exception as e:
                if type_ == "promoter":
                    print(
                        f"{variant_name} ({id_}, {ajm}): got '{e}' when attempting PCR, falling back on annealed primers"
                    )
                    anneal_promoter = True
                else:
                    raise
            # if PCR succeeds, keep going
            if not anneal_promoter:
                pcr_seqs[variant_name] = pcr_seq
                pcr_digest_seq = workflow.re_digest_part(pcr_seq, part_enzyme)
                if part_seq != pcr_digest_seq.seq_lower():
                    raise ValueError(
                        f"PCR product does not match expected part for {variant_name}. PCR product:\n{pcr_digest_seq.seq_lower()}\n\nexpecting:\n{part_seq}\n\n"
                    )
                if part_seq in murray_3g_seqs:
                    print(
                        f"{variant_name} ({ajm}): identical match for 3G part {murray_3g_seqs[part_seq]}"
                    )
                # oligos
                if type_ in ("cds", "aux"):
                    oligo_name = f"marionette_cds_{variant_name}"
                    if variant == "bicistronic":
                        description = f"PCRs {id_} ({ajm}) to make golden gate CDS part for {name} followed by a bicistronic CDS placeholder."
                    else:
                        description = f"PCRs {id_} ({ajm}) to make golden gate CDS part for {name}."
                elif type_ == "promoter":
                    description = f"PCRs {id_} ({ajm}) to make golden gate promoter part for {name}."
                else:
                    raise ValueError(f"unexpected type '{type_}'")
                forward_primer_id = olib_oligos.upsert(
                    {
                        **oligo_base,
                        "Name": f"{oligo_name}_f",
                        "Sequence": workflow.normalize_seq_upper(forward_primer),
                        "Description": description,
                    },
                    apply=apply,
                    overwrite=overwrite,
                )
                reverse_primer_id = olib_oligos.upsert(
                    {
                        **oligo_base,
                        "Name": f"{oligo_name}_r",
                        "Sequence": workflow.normalize_seq_upper(reverse_primer),
                        "Description": description,
                    },
                    apply=apply,
                    overwrite=overwrite,
                )
                primer_ids[variant_name] = (forward_primer_id, reverse_primer_id)
                # plasmid_map
                plasmid_seq = sequence.assemble(
                    [
                        workflow.re_digest_part(pcr_seq, storage_enzyme),
                        digested_storage_vector,
                    ]
                )
                plasmid_command = f"@GG({id_}<{forward_primer_id},{reverse_primer_id}>/{storage_enzyme}, {storage_vector_id}/{storage_enzyme})"
        if anneal_promoter:
            annealed_digest_seq = workflow.re_digest_part(annealed_seq, part_enzyme)
            if part_seq != annealed_digest_seq.seq_lower():
                raise ValueError(
                    f"Digestion product of annealed primers does not match expected part for {variant_name}. Digestion product:\n{annealed_digest_seq.seq_lower()}\n\nexpecting:\n{part_seq}\n\n"
                )
            description = f"Primers to anneal to make golden gate promoter part part for {variant_name}. Sequences appear in {id_} ({ajm})."
            sense_primer_id = olib_oligos.upsert(
                {
                    **oligo_base,
                    "Name": f"{oligo_name}_sense",
                    "Sequence": workflow.normalize_seq_upper(annealed_seq),
                    "Description": description,
                },
                apply=apply,
                overwrite=overwrite,
            )
            antisense_primer_id = olib_oligos.upsert(
                {
                    **oligo_base,
                    "Name": f"{oligo_name}_antisense",
                    "Sequence": workflow.normalize_seq_upper(
                        sequence.reverse_complement(annealed_seq)
                    ),
                    "Description": description,
                },
                apply=apply,
                overwrite=overwrite,
            )
            primer_ids[variant_name] = (sense_primer_id, antisense_primer_id)
            # plasmid_map
            plasmid_seq = sequence.assemble(
                [
                    workflow.re_digest_part(annealed_seq, storage_enzyme),
                    digested_storage_vector,
                ]
            )
            plasmid_command = f"@GG({sense_primer_id}={antisense_primer_id}/{storage_enzyme}, {storage_vector_id}/{storage_enzyme})"
        assert plasmid_seq.circular
        storage_plasmid_seqs[variant_name] = plasmid_seq
        # plasmid
        if type_ == "promoter":
            plasmid_description = f"Golden gate part for Adam Meyer's {name} promoter."
        else:
            if variant == "bicistronic":
                plasmid_description = f"Golden gate part for Adam Meyer's {name} CDS followed by a bicistronic CDS placeholder."
            else:
                plasmid_description = f"Golden gate part for Adam Meyer's {name} CDS."
        plasmid_row = {
            **plasmid_base,
            "Command": plasmid_command,
            "Names": variant_name,
            "Description": plasmid_description,
            "Size": len(plasmid_seq),
        }
        plasmid_id = plib_plasmids.upsert(
            plasmid_row, apply=plasmid_apply, overwrite=overwrite
        )
        plib_maps[plasmid_id] = plasmid_seq
        # strain
        strain_row = {**strain_base, "Names": variant_name, "Plasmids": plasmid_id}
        lib_strains.upsert(strain_row, apply=strain_apply, overwrite=overwrite)
        # part
        usage = f"{plasmid_id}/{part_enzyme}"
        if anneal_promoter:
            usage += f",{primer_ids[variant_name][0]}={primer_ids[variant_name][1]}/{part_enzyme}"
        if type_ == "promoter":
            part_description = f"Adam Meyer's {name} promoter. Sequence taken unmodified from color-coded region in the supplemental information for Meyer 2019."
            if not anneal_promoter:
                part_description += f" Sequences PCRed from {id_} ({ajm})."
            if ajm in extra_promoter_description:
                part_description += f" {extra_promoter_description[ajm]}"
        else:
            if variant == "bicistronic":
                part_description = f"Adam Meyer's {name} CDS followed by a bicistronic CDS placeholder. Sequence taken unmodified from color-coded region in the supplemental information for Meyer 2019. Sequences PCRed from {id_} ({ajm})."
            else:
                part_description = f"Adam Meyer's {name} CDS. Sequence taken unmodified from color-coded region in the supplemental information for Meyer 2019. Sequences PCRed from {id_} ({ajm})."
        part_row = {
            **part_base,
            "Name": variant_name,
            "Description": part_description,
            "Sequence": part_seq,
            "Usage": usage,
            "Type": cds_part_type,
            "Upstream overhang": promoter_overhangs[0],
            "Downstream overhang": promoter_overhangs[1],
        }
        flib_fragments.upsert(part_row, apply=apply, overwrite=overwrite)

In [None]:
{o["Name"]: len(o["Sequence"]) for o in olib_oligos.local.values()}

In [None]:
storage_plasmid_seqs.keys()

In [None]:
storage_plasmid_seqs["AM19_PLuxB"].seq_lower()

In [None]:
{ajm_to_promoter[k]: v for k, v in promoter_paper_seqs.items()}

# Commit

In [None]:
%%time
olib_oligos.commit()
flib_fragments.commit()

In [None]:
%%time
plib_maps.commit()
plib_plasmids.commit()
lib_strains.commit()

In [None]:
for name, oligo in olib_oligos.items():
    if oligo["Name"].startswith("marionette_prom"):
        if len(oligo["Sequence"]) > 60:
            print(f"{name}\t{oligo['Sequence']}")

# Rollback

In [None]:
olib_oligos.rollback()
flib_fragments.rollback()
plib_maps.rollback()
plib_plasmids.rollback()
lib_strains.rollback()

# Test

In [None]:
pcr_seqs.keys()

In [None]:
pcr_seqs["AM19_PhlFAM"].seq_lower()