In [None]:
import toml
import pygsheets
from tqdm.auto import tqdm
from Bio.Seq import Seq
from Bio.SeqRecord import SeqRecord
from Bio import Restriction
import random
import re
from itertools import product

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import paulssonlab.cloning.registry as registry
import paulssonlab.cloning.workflow as workflow
import paulssonlab.cloning.design as design
import paulssonlab.cloning.sequence as sequence
import paulssonlab.cloning.enzyme as enzyme
import paulssonlab.api.geneious as geneious

# Setup

In [None]:
config = toml.load("config.toml")

In [None]:
gc = pygsheets.authorize(service_account_file="credentials.json")

In [None]:
geneious_sessionmaker = geneious.connect(**config["geneious"])

In [None]:
reg = registry.Registry(
    gc,
    config["registry"]["folder"],
    geneious_sessionmaker=geneious_sessionmaker,
    geneious_folder="registry",
)

# Config

In [None]:
olib_oligos = reg[("oLIB", "oligos")]
plib_plasmids = reg[("pLIB", "plasmids")]
plib_maps = reg[("pLIB", "maps")]
lib_strains = reg[("LIB", "strains")]
flib_fragments = reg[("fLIB", "fragments")]
part_types = reg[("fLIB", "fragments", "Part types")]

In [None]:
orig_rbs_names = ["B0033m_BC", "B0032m_BC", "B0034m_BC"]
part_type = "5UTR_2"
part_overhangs = workflow.overhangs_for(part_types[part_type])
placeholder_insert_part_types = ["5UTR_2b", "5UTR_2c"]
insert_part_types = ["5UTR_2", "5UTR_2b", "5UTR_2c"]
part_enzyme = Restriction.BsaI
insert_enzymes = [Restriction.BbsI]
library_reverse_primer_id = "oLIB46"
# to make oligos fit within 60bp, we omit random bases from the BsmBI flanks
# TODO
num_random_bases = 6

In [None]:
base = {"Author": "Jacob Quinn Shenker", "Date": workflow.date()}

oligo_base = {
    **base,
    "Order date": workflow.date(),
    "Vendor": "IDT",
    "Type": "Primer",
}

fragment_base = {
    **base,
}

# apply = {"Sequence": workflow.normalize_seq}
apply = {"Name": None}

# Overhangs

In [None]:
def print_unique_overhangs(names):
    overhangs = [
        list(workflow.overhangs_for(part_types[overhang_type]))
        for overhang_type in names
    ]
    return ",".join(set(sum(overhangs, [])))

In [None]:
print_unique_overhangs(
    """Promoter_AB
5UTR_BC
5UTR_2
CDS_CD
Terminator_DE
FP_insert_mScarlet
Eaton_barcode
ClpXP_tag""".split()
)

In [None]:
part_types.rollback()

In [None]:
print_unique_overhangs(
    """Promoter_AB
5UTR_2
5UTR_2b
5UTR_2c
5UTR_2d
5UTR_2e
FP_insert_mScarlet
Eaton_barcode
ClpXP_tag""".split()
)

In [None]:
print_unique_overhangs(
    """Promoter_AB
5UTR_BC
5UTR_2
5UTR_2b
5UTR_2c
5UTR_2d
5UTR_2e
ClpXP_tag
""".split()
)

# RBS placeholders

In [None]:
orig_bbsi_placeholder_seqs = enzyme.re_digest(reg.get("RBS_placeholder_BbsI")["_seq"], Restriction.BsaI)

In [None]:
orig_bbsi_placeholder_seqs

In [None]:
workflow.re_digest_part(reg.get("RBS_placeholder_BbsI")["_seq"], Restriction.BsaI)

In [None]:
for insert_enzyme, insert_part_type in product(insert_enzymes, placeholder_insert_part_types):
    insert_overhangs = workflow.overhangs_for(part_types[insert_part_type])
    if insert_enzyme == Restriction.BbsI:
        seq = orig_bbsi_placeholder_seqs[0].fill_in() + insert_overhangs[0] + orig_bbsi_placeholder_seqs[1].trim_overhangs() + insert_overhangs[1] + orig_bbsi_placeholder_seqs[2].fill_in()
    else:
        raise NotImplementedError
    # seq = insert_overhangs[0] + sequence.reverse_complement(
    #     design.type2s_with_spacer(insert_enzyme, len(insert_overhangs[0]))
    # )
    # seq += (
    #     design.random_bases(2 * num_random_bases, seed=seq)
    #     + design.type2s_with_spacer(insert_enzyme, len(insert_overhangs[1]))
    #     + insert_overhangs[1]
    # )
    # seq = (
    #     design.type2s_with_spacer(part_enzyme, len(part_overhangs[0]))
    #     + part_overhangs[0]
    #     + seq
    #     + part_overhangs[1]
    #     + sequence.reverse_complement(
    #         design.type2s_with_spacer(part_enzyme, len(part_overhangs[1]))
    #     )
    # )
    # seq = design.random_bases(num_random_bases, seed=seq) + seq
    # seq = seq + design.random_bases(num_random_bases, seed=seq)
    seq = workflow.normalize_seq(seq)
    insert_type_name = insert_part_type.replace("5UTR_2", "RBS")
    placeholder_name = f"{insert_type_name}_placeholder_{insert_enzyme}"
    description = f"Insulated RBS placeholder that accepts {insert_type_name} inserts."
    oligo_id = olib_oligos.upsert(
        {
            **oligo_base,
            "Name": f"{placeholder_name}_sense",
            "Sequence": sequence.normalize_seq_upper(seq),
            "Description": description,
        },
        apply=apply,
    )
    oligo_id2 = olib_oligos.upsert(
        {
            **oligo_base,
            "Name": f"{placeholder_name}_antisense",
            "Sequence": sequence.normalize_seq_upper(sequence.reverse_complement(seq)),
            "Description": description,
        },
        apply=apply,
    )
    usage = f"{oligo_id}={oligo_id2}/{part_enzyme}"
    part_seq = sequence.normalize_seq(workflow.re_digest_part(seq, part_enzyme))
    fragment_row = {
        **fragment_base,
        "Name": placeholder_name,
        "Sequence": part_seq,
        "Description": description,
        "Usage": usage,
        "Type": part_type,
        "Upstream overhang": part_overhangs[0],
        "Downstream overhang": part_overhangs[1],
        "Species/codon usage": "E. coli",
    }
    flib_fragments.upsert(fragment_row, apply=apply, clear=True)

# RBS libraries

In [None]:
old_rbs_library_names = ["Strong_Weiss_RBS_library"]
rbs_library_seqs = {
    name: reg.get(name)["_seq"].trim_overhangs() for name in old_rbs_library_names
}

In [None]:
rbs_library_seqs["Strong_Weiss_RBS_library"]

In [None]:
library_primer_seq_rc = sequence.reverse_complement(
    reg.get(library_reverse_primer_id)["Sequence"]
)
random_prefix = design.random_bases(num_random_bases, seed=57)

In [None]:
for insert_enzyme, insert_part_type, (old_library_name, library_seq) in product(
    insert_enzymes, insert_part_types, rbs_library_seqs.items()
):
    insert_overhangs = workflow.overhangs_for(part_types[insert_part_type])
    seq = sequence.smoosh_and_normalize_sequences(
        insert_overhangs[0],
        library_seq,
        insert_overhangs[1],
    )
    seq = (
        random_prefix
        + design.type2s_with_spacer(insert_enzyme, len(insert_overhangs[0]))
        + seq
        + sequence.reverse_complement(
            design.type2s_with_spacer(insert_enzyme, len(insert_overhangs[1]))
        )
        + library_primer_seq_rc
    )
    seq = workflow.normalize_seq(seq)
    insert_type_name = insert_part_type.replace("5UTR_2", "RBS")
    library_name = (
        old_library_name.replace("RBS", insert_type_name) + f"_{insert_enzyme}"
    )
    description = f"RBS library based on {old_library_name} with {insert_type_name} overhangs and {insert_enzyme} cut sites."
    oligo_id = olib_oligos.upsert(
        {
            **oligo_base,
            "Name": library_name,
            "Sequence": sequence.normalize_seq_upper(seq),
            "Description": description,
        },
        apply=apply,
    )
    usage = f"{oligo_id}<{library_reverse_primer_id}>/{insert_enzyme}"
    part_seq = sequence.normalize_seq(workflow.re_digest_part(seq, insert_enzyme))
    fragment_row = {
        **fragment_base,
        "Name": library_name,
        "Sequence": part_seq,
        "Description": description,
        "Usage": usage,
        "Type": insert_part_type,
        "Upstream overhang": insert_overhangs[0],
        "Downstream overhang": insert_overhangs[1],
        "Species/codon usage": "E. coli",
    }
    flib_fragments.upsert(fragment_row, apply=apply, clear=True)

# Commit

In [None]:
olib_oligos.local

In [None]:
flib_fragments.local

In [None]:
olib_oligos.commit()
flib_fragments.commit()

In [None]:
olib_oligos.rollback()
flib_fragments.rollback()