In [None]:
import random
from itertools import count

import Bio.Restriction as Restriction
import pygsheets
import toml
from Bio.Seq import Seq
from Bio.SeqRecord import SeqRecord
from tqdm.auto import tqdm

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import paulssonlab.cloning.design as design
import paulssonlab.cloning.enzyme as enzyme
import paulssonlab.cloning.io as cio
import paulssonlab.cloning.primers as primers
import paulssonlab.cloning.registry as registry
import paulssonlab.cloning.sequence as sequence
import paulssonlab.cloning.thermodynamics as thermodynamics
import paulssonlab.cloning.workflow as workflow
from paulssonlab.api import addgene
from paulssonlab.api.util import base_url

# Setup

In [None]:
config = toml.load("config.toml")

In [None]:
gc = pygsheets.authorize(service_account_file="credentials.json")

In [None]:
reg = registry.Registry(gc, config["registry"]["folder"])

# Config

In [None]:
olib_oligos = reg[("oLIB", "oligos")]
plib_plasmids = reg[("pLIB", "plasmids")]
plib_maps = reg[("pLIB", "maps")]
flib_fragments = reg[("fLIB", "fragments")]
part_types = reg[("fLIB", "fragments", "Part types")]

# Primers to make FP parts

In [None]:
gg_overhangs = workflow.overhangs_for(part_types["CDS_CD"])

In [None]:
storage_flanks = (
    flib_fragments.find({"Name": "JUMP_storage_vector_prefix"})["Sequence"],
    flib_fragments.find({"Name": "JUMP_storage_vector_suffix"})["Sequence"],
)

In [None]:
ua_rbs = "tctagatttaagaaggagatatacat"
cluzel_cterm = "atgtccagacctgcaggcatgcaagctctagaggcat"
flanks = (ua_rbs + "atg", "taa" + cluzel_cterm)

## Source plasmids

In [None]:
# %%time
# plasmids = {
#     row["Names"]: plib_maps[id_]
#     for id_, row in plib_plasmids.items()
#     if "cluzel-fp" in row["Tags"]
# }

In [None]:
%%time
# get all Cluzel plasmids, even the ones we haven't ordered yet
addgene_publication = addgene.get_addgene(
    "https://www.addgene.org/browse/article/28192043/"
)
plasmids = {}
for item in tqdm(addgene_publication["items"]):
    name = item["plasmid"]
    plasmids[name] = cio.read_http(item["sequence_urls"]["addgene_full"][0])

In [None]:
plasmids.keys()

In [None]:
## Extract FP inserts

In [None]:
%%time
locations = {
    name: sequence.amplicon_location(
        seq, flanks[0], sequence.reverse_complement(flanks[1])
    )
    for name, seq in tqdm(plasmids.items())
}

In [None]:
inserts = {name: seq.slice(*locations[name]) for name, seq in plasmids.items()}

## Check restriction sites

In [None]:
for enzyme_name in ("BsaI", "BsmBI", "BbsI", "AarI"):
    names_with_cuts = []
    for name, seq in inserts.items():
        cuts = enzyme.re_search(seq, enzyme_name)
        if cuts:
            names_with_cuts.append(name)
    print(f"{enzyme_name} ({len(names_with_cuts)}): {', '.join(names_with_cuts)}")

## Find FP common ends

In [None]:
max_end_length = 40

In [None]:
insert_ends = {
    name: (
        seq.seq_lower()[:max_end_length],
        seq.seq_lower()[-max_end_length:][::-1],
    )
    for name, seq in inserts.items()
}

In [None]:
thermodynamics.tm("cgagcaccagga")

In [None]:
clusters

In [None]:
thermodynamics.tm("acgcgcaagccca"[::1])

In [None]:
thermodynamics.tm(sequence.reverse_complement("acgcgcaagccca"[::1]))

In [None]:
thermodynamics.tm("gtgtctgagctgattaaggagaacatgcacatgaagctgt")

In [None]:
clusters = workflow.cluster_sequences_by_prefix(insert_ends, max_length=27)
workflow.print_sequence_clusters(
    clusters,
    metrics={
        "4nt+Tm={:.1f}/{:.1f}": lambda upstream_seq, downstream_seq: (
            thermodynamics.tm(upstream_seq[4:]) or 0,
            # tm is (slightly) sensitive to character order, downstream_seq is order-reversed (not reverse-complemented!)
            thermodynamics.tm(downstream_seq[4:][::-1]) or 0,
        )
    },
)

# Placeholders

## Bare placeholders

## Multimer fusion placeholders

## FP fusion placeholders

## Degradation placeholders

# Primers

In [None]:
import primer3plus

In [None]:
flanks = workflow.concatenate_flanks(gg_overhangs, storage_flanks)
primers.primer3_amplicon(inserts["pEB1-SCFP3A"], flanks, return_many=3)

In [None]:
# TODO: make find_primer_binding_site more general,
# allow specifying score func so can find amplicons with overhangs on both sides?

In [None]:
# USE CASES:
# 1) take desired product, template seq, find overhangs
# 2) take amplicon, optional overhangs

# TODO:
# tm/ta settings for Q5/phusion


primer3_amplicon_primers(
    inserts["pEB1-SCFP3A"], [gg_overhangs, storage_flanks], return_many=3
)