In [None]:
import numpy as np
import pandas as pd
import holoviews as hv
import hvplot.pandas
import matplotlib.pyplot as plt
import seaborn as sns
import toml
import re
from zipfile import ZipFile
import urllib
from datetime import datetime
import string
import pygsheets
import requests
from tqdm.auto import tqdm
import Bio.Restriction as Restriction
from Bio.Seq import Seq
from Bio.SeqRecord import SeqRecord
import Bio.Entrez as Entrez
import benchlingapi

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import paulssonlab.api as api
import paulssonlab.api.benchling as bapi
from paulssonlab.api.util import base_url
import paulssonlab.cloning.registry as registry
import paulssonlab.cloning.workflow as workflow
import paulssonlab.cloning.design as design
import paulssonlab.cloning.sequence as sequence
import paulssonlab.cloning.enzyme as enzyme
import paulssonlab.cloning.viennarna as viennarna
import paulssonlab.cloning.thermodynamics as thermodynamics
import paulssonlab.cloning.primers as primers
import paulssonlab.cloning.ncbi as ncbi
import paulssonlab.cloning.commands.parser as cmd_parser
import paulssonlab.cloning.commands.semantics as cmd_semantics

In [None]:
hv.extension("bokeh")

# Setup

In [None]:
config = toml.load("config.toml")

In [None]:
gc = pygsheets.authorize(service_account_file="credentials.json")

In [None]:
bench_session = benchlingapi.Session(config["benchling"]["api_key"])
benchling_folder = bapi.get_project_root(bench_session, config["benchling"]["project"])

In [None]:
reg = registry.Registry(gc, config["registry"]["folder"], benchling_folder)

# Config

In [None]:
plib_plasmids = reg[("pLIB", "plasmids")]
plib_maps = reg[("pLIB", "maps")]
lib_parts = reg[("LIB", "parts")]
part_types = reg[("LIB", "parts", "Part types")]

In [None]:
# gg_overhangs = workflow.overhangs_for(part_types["CDS_CD"])
degtag_overhangs = workflow.overhangs_for(part_types["Deg_tag"])

In [None]:
# storage_flanks = (
#     lib_parts["JUMP_storage_vector_prefix"]["Sequence"],
#     lib_parts["JUMP_storage_vector_suffix"]["Sequence"],
# )

In [None]:
# ua_rbs = "tctagatttaagaaggagatatacat"
# cluzel_cterm = "atgtccagacctgcaggcatgcaagctctagaggcat"
# flanks = (ua_rbs + "atg", "taa" + cluzel_cterm)

# Deg tags

## Data

In [None]:
# FROM: Andersen, J. B., Sternberg, C., Poulsen, L. K., Bjørn, S. P., Givskov, M., & Molin, S. (1998). New unstable variants of green fluorescent protein for studies of transient gene expression in bacteria. Applied and environmental microbiology, 64(6), 2240-2246.
# paper gives reverse-complement sequences
tags_wt_rc = {
    "LAA": "AGCTGCTAAAGCGTAGTTTTCGTCGTTTGCTGC",
    "AAV": "AACTGCTGCAGCGTAGTTTTCGTCGTTTGCTGC",
    # "LVA": "AGCTACTAAAGCGTAGTTTTCGTCGTTTGCTGC", # paper implies it behaves similarly to LAA
    "ASV": "AACTGATGCAGCGTAGTTTTCGTCGTTTGCTGC",
}
tags_wt_rc = {name: Seq(seq) for name, seq in tags_wt_rc.items()}
tags_wt = {name: sequence.reverse_complement(seq) for name, seq in tags_wt_rc.items()}

In [None]:
tags_wt["AAV"].translate()

In [None]:
tag_normalized = {name: workflow.normalize_seq(seq) for name, seq in tags_wt.items()}
tag_prefixes = {
    name: seq[: seq.find(upstream_overhang)] for name, seq in tag_normalized.items()
}
tag_prefixes_list = list(tag_prefixes.values())
tag_prefix = tag_prefixes_list[0]
assert all(s == tag_prefix for s in tag_prefixes_list)

In [None]:
tag_suffix = sequence.smoosh_sequences(
    workflow.normalize_seq("taa"), degtag_overhangs[1]
)

In [None]:
tag_parts = {name: seq[len(tag_prefix) :] + tag_suffix for name, seq in tags_wt.items()}

In [None]:
tag_parts

## Tags

In [None]:
head_length = 6 - len(tag_prefix)  # nt
tail_length = 9 + len(tag_suffix)  # nt
nnk_length = 6  # aa's

In [None]:
tags = {}
for name, seq in tag_parts.items():
    tags[f"degtag_{name}"] = {
        "Sequence": seq,
        "Description": f"Wild-type {name} ClpXP degradation tag from Andersen 1998.",
    }
    assert len(seq) - head_length - tail_length == nnk_length * 3
    tags[f"degtag_{name}_NNK"] = {
        "Sequence": seq[:head_length] + "NNK" * nnk_length + seq[-tail_length:],
        "Description": f"ClpXP degradation tag library with 2x alanines, 6x NNK's, and the {name} tail from Andersen 1998.",
    }

In [None]:
tags

In [None]:
# OLIGOS
# PARTS

In [None]:
reference = "Andersen, J. B., Sternberg, C., Poulsen, L. K., Bjørn, S. P., Givskov, M., & Molin, S. (1998). New unstable variants of green fluorescent protein for studies of transient gene expression in bacteria. Applied and environmental microbiology, 64(6), 2240-2246."

oligo_base = {
    "Author": "Jacob Quinn Shenker",
    "Date": workflow.date(),
    "Order date": workflow.date(),
    "Vendor": "IDT",
    "Type": "Primer",
    "Description": description,
    "Reference": reference,
}

for enzyme_name, flipped, upstream in product(enzymes, (False, True), (False, True)):
    base = {}
    olt[olt.next_id()] = {
        **base,
        "Name": "",
        "Sequence": seq,
        "Description": description,
    }
    print(name, enzyme_name, flipped, unses)

## Placeholders

In [None]:
# part_names = ["sigW", "rsiW", "ECF20_992", "AS20_992", "sfGFP"]  # TODO: pick correct FPs
part_names = ["sigW", "rsiW"]

In [None]:
part_seqs = {name: reg.get(name)["_seq"] for name in part_names}

In [None]:
part_seqs

In [None]:
part_seq = part_seqs["sigW"]

In [None]:
workflow.find_coding_sequence(part_seq)

In [None]:
workflow.get_source_plasmid(reg, lib_parts["sigW"]["Usage"])

In [None]:
reg.get("pLIB214")["_seq"]

In [None]:
part_name = part_names[0]
part = reg.get(part_name)
part_seq = part["_seq"]
cds_start, cds_stop = workflow.find_coding_sequence(part_seq)
# part_seq = part_seq[cds_start:cds_stop]
plasmid_name = workflow.get_source_plasmid(reg, part["Usage"])
plasmid_seq = reg.get(plasmid_name)["_seq"]
part_start, part_stop, _, _ = sequence.find_subsequence(
    plasmid_seq, part_seq, min_score=len(part_seq)
)
start = part_start + cds_start
stop = part_stop - (len(part_seq) - cds_stop)

In [None]:
plasmid_seq_forward = plasmid_seq.reindex(stop)
plasmid_seq_reverse = plasmid_seq.reindex(stop).reverse_complement()

In [None]:
degtag_overhangs

In [None]:
design.golden_gate_placeholder(Restriction.BsaI, None, *degtag_overhangs)

In [None]:
placeholder = None

In [None]:
tm_binding = 60
tm_homology = 55
homology = next(
    primers.iter_primers(
        sequence.get_seq(plasmid_seq_reverse), min_tm=tm_homology, anchor_3prime=True
    )
)
reverse_primer = next(
    primers.iter_primers(
        sequence.get_seq(plasmid_seq_reverse),
        min_tm=tm_binding,
        min_length=len(homology),
        anchor_3prime=True,
    )
)
forward_primer = next(
    primers.iter_primers(
        sequence.get_seq(plasmid_seq_forward), min_tm=tm_binding, anchor_3prime=True
    )
)

In [None]:
homology

In [None]:
reverse_primer

In [None]:
plasmid_seq_forward

In [None]:
plasmid_seq_reverse[::-1][-20:]

In [None]:
plasmid_seq_reverse.reverse_complement()[-20:]

In [None]:
next(
    primers.iter_primers(
        sequence.get_seq(plasmid_seq_forward), min_tm=65, anchor_3prime=True
    )
)

In [None]:
part_seq[-10:]

In [None]:
# find stop, extract CDS
# get storage plasmid sequence
# find CDS location on storage plasmid
# generate Tm>50 reverse primer abutting stop codon
# generate Tm>50 forward primer starting on stop codon
# extend forward primer to generate Tm>60-65 overhang
# add placeholder+overhang to reverse primer

In [None]:
# primers

In [None]:
# storage vectors