In [None]:
import toml
import pygsheets
from tqdm.auto import tqdm
from Bio.Seq import Seq
from Bio.SeqRecord import SeqRecord
from Bio import Restriction
import random
import re

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import paulssonlab.cloning.registry as registry
import paulssonlab.cloning.workflow as workflow
import paulssonlab.cloning.design as design
import paulssonlab.cloning.sequence as sequence
import paulssonlab.cloning.enzyme as enzyme
import paulssonlab.api.geneious as geneious

# Setup

In [None]:
random.seed(75)

In [None]:
config = toml.load("config.toml")

In [None]:
gc = pygsheets.authorize(service_account_file="credentials.json")

In [None]:
geneious_sessionmaker = geneious.connect(**config["geneious"])

In [None]:
reg = registry.Registry(
    gc,
    config["registry"]["folder"],
    geneious_sessionmaker=geneious_sessionmaker,
    geneious_folder="registry",
)

In [None]:
olib_oligos = reg[("oLIB", "oligos")]
olt_oligos = reg[("oLT", "oligos")]
plib_plasmids = reg[("pLIB", "plasmids")]
plib_maps = reg[("pLIB", "maps")]
lib_parts = reg[("LIB", "parts")]
part_types = reg[("LIB", "parts", "Part types")]

# Config

In [None]:
orig_rbs_names = ["B0033m_BC", "B0032m_BC", "B0034m_BC"]
part_type = "5UTR_2"
part_overhangs = workflow.overhangs_for(part_types[part_type])
use_storage_vector = True
part_enzyme = Restriction.BsaI
num_random_bases = (
    0  # to make oligos fit within 60bp, we omit random bases from the BsmBI flanks
)
part_tags = ""
reference = ""

# RBSes

# Sequences

In [None]:
seqs = {}
for name in orig_rbs_names:
    entry = reg.get(name)
    new_name = re.sub(r"_BC$", "_RiboJ", name)
    if use_storage_vector:
        if part_enzyme != Restriction.BsaI:
            raise ValueError("storage vector assumes BsaI part")
        seq = (
            design.random_bases(num_random_bases)
            + lib_parts["JUMP_storage_vector_prefix"]["Sequence"]
            + workflow.smoosh_and_normalize_sequences(
                part_overhangs[0],
                entry["_seq"].trim_overhangs(),
                part_overhangs[1],
            )
            + lib_parts["JUMP_storage_vector_suffix"]["Sequence"]
            + design.random_bases(num_random_bases)
        )
    else:
        seq = workflow.smoosh_and_normalize_sequences(
            part_overhangs[0],
            entry["_seq"].trim_overhangs(),
            part_overhangs[1],
        )
        seq = (
            design.random_bases(num_random_bases)
            + design.type2s_with_spacer(part_enzyme, len(part_overhangs[0]))
            + seq
            + sequence.reverse_complement(
                design.type2s_with_spacer(part_enzyme, len(part_overhangs[1]))
            )
            + design.random_bases(num_random_bases)
        )
    seq = workflow.normalize_seq(seq)
    # include first sentence
    description = (
        entry["Description"].split(".")[0]
        + f". Same as {name} but with RiboJ-compatible overhangs."
    )
    seqs[new_name] = {"Sequence": seq, "Description": description}

## Oligos

In [None]:
base = {"Author": "Jacob Quinn Shenker", "Date": workflow.date()}

oligo_base = {
    **base,
    "Order date": workflow.date(),
    "Vendor": "Genewiz",
    "Type": "Primer",
}

part_base = {
    **base,
    "Tags": part_tags,
    "Reference": reference,
}

# apply = {"Sequence": workflow.normalize_seq}
apply = {"Name": None}

for name, row in seqs.items():
    seq = row["Sequence"]
    oligo_id = olib_oligos.upsert(
        {
            **oligo_base,
            **row,
            "Name": f"{name}_sense",
            "Sequence": workflow.normalize_seq_upper(seq),
        },
        apply=apply,
    )
    oligo_id2 = olib_oligos.upsert(
        {
            **oligo_base,
            **row,
            "Name": f"{name}_antisense",
            "Sequence": workflow.normalize_seq_upper(sequence.reverse_complement(seq)),
        },
        apply=apply,
    )
    usage = f"{oligo_id}={oligo_id2}/{part_enzyme}"
    part_seq = workflow.normalize_seq(workflow.re_digest_part(seq, part_enzyme))
    part_row = {
        **part_base,
        **row,
        "Sequence": part_seq,
        "Usage": usage,
        "Type": part_type,
        "Upstream overhang": part_overhangs[0],
        "Downstream overhang": part_overhangs[1],
        "Species/codon usage": "E. coli",
    }
    lib_parts[name] = part_row

In [None]:
lib_parts.local

In [None]:
olib_oligos.local

In [None]:
olib_oligos.commit()
lib_parts.commit()

## Storage vectors