In [None]:
import numpy as np
import pandas as pd
import holoviews as hv
import hvplot.pandas
import matplotlib.pyplot as plt
import seaborn as sns
import toml
import re
from zipfile import ZipFile
import urllib
from datetime import datetime
import string
import pygsheets
import requests
from tqdm.auto import tqdm
import Bio.Restriction as Restriction
from Bio.Seq import Seq
from Bio.SeqRecord import SeqRecord
import Bio.pairwise2 as pairwise2
import benchlingapi

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import paulssonlab.api as api
import paulssonlab.api.benchling as bapi
from paulssonlab.api.util import base_url
import paulssonlab.cloning.registry as registry
import paulssonlab.cloning.workflow as workflow
import paulssonlab.cloning.sequence as sequence
import paulssonlab.cloning.enzyme as enzyme
import paulssonlab.cloning.viennarna as viennarna
import paulssonlab.cloning.thermodynamics as thermodynamics

In [None]:
hv.extension("bokeh")

# Setup

In [None]:
config = toml.load("config.toml")

In [None]:
gc = pygsheets.authorize(service_account_file="credentials.json")

In [None]:
bench_session = benchlingapi.Session(config["benchling"]["api_key"])
benchling_folder = bapi.get_project_root(bench_session, config["benchling"]["project"])

In [None]:
reg = registry.Registry(gc, config["registry"]["folder"], benchling_folder)

# Bervoets 2018

In [None]:
bervoets_primers_tsv = """Primer	Sequence
IB0173	ATGGTTAGCGAGCTGATCAAAG
IB0174	CTTCGTAAATCTGGCGAGTG
IB0175	TGTGCATGTTTTCTTTGATCAGCTCGCTAACCATCATTAGAAAACCTCCATAGCATG
IB0176	GATGTCTGGCAGTTCCCCACTCGCCAGATTTACGAAGTTCTAGAGCACAGCTAACAC
IB0180	CCAAGCTTGCATGCCTGCAGGTCGACTCTAGAGGATCCCCGTGTAGGCTGGAGCTGCTTC
IB0181	"AACAGCTATGACCATGATTACGAATTCGAGCTCGGTACCCTGGTCCATATG
AATATCCTCCTTAG"
IB0184	"TTCCCAGTCACGACGTTGTAAAACGACGGCCAGTGCCAGGAGACCACAAC
GGTTTCCCTCTAC"
IB0186	"GTATAGGAACTTCGAAGCAGCTCCAGCCTACACGGGGATCTTCATTCATCA
TTAACACCTCTATTATAAAGTGCTTTCAGCC"
IB0198	"TTACCGGATTCTTAATTACCTGGTGCGTATGGGCGGTAATTTGACCTTAATA
AAAAGGTCTGGTCCATATGAATATCCTCCTTAG"
IB0199	"GCGAAATCCTGCAAACGCAGGGGCTGAATATCGAAGCGCTGTTCCGCGAG
TAGGAGACCACAACGGTTTCCCTCTAC"
IB0238	GAGTCACACAGGAAAGTACTAGATGACGATCGATGAAATTTACC
IB0245	"TGAGCGGATAACAATTTCACACAGGAAACAGACCATGGAATTCGGAGACCA
CAACGGTTTCCCTCTAC"
IB0249	"CCAAGCTTGCATGCCTGCAGGTCGACTCTAGAGGATCCCCGGGTCATTCA
TCATTAACACCTCTATTATAAAGTGCTTTCAGCCGCTGTC"
IB0250	TTAACTTTTACTAGAGTCACACAGGAAAGTACTAGATGACACAACCATCAAAAACTACGAAACTAAC
IB0251	CCAAGCTTGCATGCCTGCAGGTCGACTCTAGAGGATCCCCGGGTTACATTAACTCCATCGAGGGATCTTC
IB0252	TTAACTTTTACTAGAGTCACACAGGAAAGTACTAGATGGATGTGGAGGTTAAGAAAAACGGCAAAAACG
IB0253	CCAAGCTTGCATGCCTGCAGGTCGACTCTAGAGGATCCCCGGGCTAGCCATCCGTATGATCCATTTGAACC
IB0254	TTAACTTTTACTAGAGTCACACAGGAAAGTACTAGGTGTCGAGAAATAAAGTCGAAATCTGCGGGGTGGATAC
IB0255	"CCAAGCTTGCATGCCTGCAGGTCGACTCTAGAGGATCCCCGGGTTATTGATGAATATTTTTATTCATTTGTTTGATAGCCGCTTTTTCAAGTCTGGACACCTG
CGCTTGAG"
IB0256	TTAACTTTTACTAGAGTCACACAGGAAAGTACTAGGTGAATCTACAGAACAACAAGGGAAAATTCAAC
IB0257	"CCAAGCTTGCATGCCTGCAGGTCGACTCTAGAGGATCCCCGGGTTACAAA
CTGATTTCGCGAATTTCCAAGTAC"
IB0258	"TTAACTTTTACTAGAGTCACACAGGAAAGTACTAGATGGAAATGATGATTAA
AAAAAGAATTAAACAAGTCAAAAAAGGCGACCAG"
IB0259	"CCAAGCTTGCATGCCTGCAGGTCGACTCTAGAGGATCCCCGGGTTAAAGA
TCCCTTAATTGTTTTCTAAGAGCCTCTCTG"
IB0260	TTAACTTTTACTAGAGTCACACAGGAAAGTACTAGATGGAAGAAACCTTTCAATTATTATATGATACATATCATCAAGATTTG
IB0261	CCAAGCTTGCATGCCTGCAGGTCGACTCTAGAGGATCCCCGGGTTAACTGCCGGAAGTTGACTTAACAACTC
IB0262	CTAGTACTTTCCTGTGTGACTC
IB0268	GTATAGGAACTTCGAAGCAGCTCCAGCCTACACGGGGATCTTTACATTAACTCCATCGAGGG
IB0269	GTATAGGAACTTCGAAGCAGCTCCAGCCTACACGGGGATCTCTAGCCATCCGTATGATCC
IB0270	GTATAGGAACTTCGAAGCAGCTCCAGCCTACACGGGGATCTTTATTGATGAATATTTTTATTCATTTGTTTGATAGCC
IB0271	"GTATAGGAACTTCGAAGCAGCTCCAGCCTACACGGGGATCTTTACAAACTG
ATTTCGCGAATTTCC"
IB0272	"GTATAGGAACTTCGAAGCAGCTCCAGCCTACACGGGGATCTTTAAAGATCC
CTTAATTGTTTTCTAAGAGC"
IB0273	"GTATAGGAACTTCGAAGCAGCTCCAGCCTACACGGGGATCTTTAACTGCC
GGAAGTTGACTTAACAACTCCTTTATCTG"
IB0476	"TTAACTTTTACTAGAGTCACACAGGAAAGTACTAGATGAAGCAAGGTTTGC
AACTCAGGCTTAG"
IB0477	"CCAAGCTTGCATGCCTGCAGGTCGACTCTAGAGGATCCCCGGGTCAAACG
AGTTGTTTACGCTGGTTTGAC"
IB0478	TTAACTTTTACTAGAGTCACACAGGAAAGTACTAGGTGAATTCACTCTATACCGCTGAAGGTG
IB0479	CCAAGCTTGCATGCCTGCAGGTCGACTCTAGAGGATCCCCGGGTTATAACTTACCCAGTTTAGTGCGTAACC
IB0480	TTAACTTTTACTAGAGTCACACAGGAAAGTACTAGATGACTGACAAAATGCAAAGTTTAG
IB0481	CCAAGCTTGCATGCCTGCAGGTCGACTCTAGAGGATCCCCGGGTTACGCTTCAATGGCAGCAC
IB0482	TTAACTTTTACTAGAGTCACACAGGAAAGTACTAGATGTCTGACCGCGCCACTAC
IB0483	"CCAAGCTTGCATGCCTGCAGGTCGACTCTAGAGGATCCCCGGGTCATAAC
CCATACTCCAGACGGAACAG"
IB0484	"TTAACTTTTACTAGAGTCACACAGGAAAGTACTAGATGAGCGAGCAGTTAA
CGGAC"
IB0485	"CCAAGCTTGCATGCCTGCAGGTCGACTCTAGAGGATCCCCGGGTCAACGC
CTGATAAGCGGTTGAAC"
Fw_BB	GGATCTCGTAACCGAACTTG
Fw_LibB_1	GCCCTATGTTTAAAAAAATGTCGGAGAACGTGTTTATNNNNNNNNNNNNGGGTATGTAACTTGTAGGGCC
Fw_LibB_2	GCCCTATGTTTAAAAAAATGTCGGAGAACGTGTTTATTTTTTTNNNNNNGGGTATGTAACTTGTAGGGCC
Fw_LibB_3	GCCCTATGTTTAAAAAAATGTCGGAGAACGTGTTTATNNNNNNGAAAAAGGGTATGTAACTTGTAGGGCC
Fw_LibF_1	GATGCGTCCTGTTCTGCGATGTTTANNNNNNNNNNNNNNNKCTCATAATAGTAGAAACAGGGCC
Fw_LibF_2	GATGCGTCCTGTTCTGCGATGTTTAAAAACGATNNNNNNNKCTCATAATAGTAGAAACAGGGCC
Fw_LibF_3	GATGCGTCCTGTTCTGCGATGTTTANNNNNNNNCTTTTTTTCTCATAATAGTAGAAACAGGGCC
Fw_LibW_1	CTATCTGCTGCCCTATGATAAACTTATTTTATAAAAAAATTGAAACNNNNNNNNNNNNNNNNCGTATACATACAGAGGGCC
Fw_LibW_2	CTATCTGCTGCCCTATGATAAACTTATTTTATAAAAAAATTGAAACCTTTTGAANNNNNNNNCGTATACATACAGAGGGCC
Fw_LibW_3	CTATCTGCTGCCCTATGATAAACTTATTTTATAAAAAAATTGAAACNNNNNNNNACGAAGCTCGTATACATACAGAGGGCC
Fw_LibproD_1	GGTTGCTGGATAACTTTACGNNNNNNNNNNNNNNTCGTATAATATATTCAGGGAGAGCACAAC
Fw_LibproD_2	GGTTGCTGGATAACTTTACGNNNNNNNNNNNNNNNNNTATAATATATTCAGGGAGAGCACAAC
Rv_BB	CTGGTTGTTCTCAAGTTCGG
Rv_LibB	CGACATTTTTTTAAACATAGGGCAG
Rv_LibF	CATCGCAGAACAGGACGCATC
Rv_LibW	GTTTATCATAGGGCAGCAGATAG
Rv_LibproD	CGTAAAGTTATCCAGCAACC"""

bervoets_promoters_tsv = """Sigma	Promoter	Sequence
B	PB2	GTTTATTTTTTTGAAAAAGGGTAT
B	PB2.1	GTTTATCAAATGGTGCTGGGGTAT
B	PB2.2	GTTTATCGTTTAATCTGTGGGTAT
B	PB2.3	GTTTATAGGTCCTCAATTGGGTAT
B	PB2.4	GTTTATCAAAAGGCACATGGGTAT
B	PB2.5	GTTTATTCCCCAGTTTTGGGGTAT
B	PB2.6	GTTTATTTGTTCGAAAGGGGGTAT
B	PB2.7	GTTTATCATATGCAAAACGGGTAT
B	PB2.8	GTTTATTCTGGGAAAATCGGGTAT
B	PB2.9	GTTTATCTGTGGTAAAACGGGTAT
B	PB2.10	GTTTATGTTTTTTCTGTACAGGGTAT
F	PF3	GTTTAAAAACGATCTTTTTTTCTCATAAT
F	PF3.1	GTTTAAGCTATTGAGGGTATTCTCATAAT
F	PF3.2	GTTTATGCCAAATGGCAGGTGCTCATAAT
F	PF3.3	GTTTATTGACGGATATCGCTGCTCATAAT
F	PF3.4	GTTTAGTGATGTGTCACGATGCTCATAAT
F	PF3.5	GTTTATTTGAAGGGATGAGTGCTCATAAT
F	PF3.6	GTTTAGTTTTAATTATAACTGCTCATAAT
F	PF3.7	GTTTAAAAACGATGCGTTGTGCTCATAAT
F	PF3.8	GTTTACATAATTTAATTTTGGCTCATAAT
F	PF3.9	GTTTACTTTTATGTGTTTATGCTCATAAT
W	PW2	TGAAACCTTTTGAAACGAAGCTCGTA
W	PW2.1	TGAAACTTATTTACCCTCGTA
W	PW2.2	TGAAACCTTTTGAGCAGCTTTCGTA
W	PW2.3	TGAAACGAGCCCGGGATTTCGCGTA
W	PW2.4	TGAAACCTTTTGAAAGGATTTGCGTA
W	PW2.5	TGAAACCTTTTGAACGTTTGCACGTA
W	PW2.6	TGAAACGGAAAAATGGAGCGGGCGTA
W	PW2.7	TGAAACCGATCGTCTGCGGACGCGTA
W	PW2.8	TGAAACGCGGAAAAACGAAGCTCGTA
W	PW2.9	TGAAACGTCTCGGAGGGGTGTTCGTA"""

import io

bervoets_primers = pd.read_csv(
    io.StringIO(bervoets_primers_tsv), sep="\s+", index_col=0
)

bervoets_promoters = pd.read_csv(
    io.StringIO(bervoets_promoters_tsv), sep="\s+", index_col=1
)

bervoets_primers_for_sigma = {
    "B": ("IB0250", "IB0251"),
    "F": ("IB0252", "IB0253"),
    "G": ("IB0254", "IB0255"),
    "H": ("IB0256", "IB0257"),
    "M": ("IB0238", "IB0249"),
    "W": ("IB0258", "IB0259"),
    "X": ("IB0260", "IB0261"),
}

# Bacillus sigma primers

In [None]:
import primer3plus

In [None]:
def overhangs_for(x):
    return (x["Upstream overhang"], x["Downstream overhang"])


def _format_seq(seq):
    # TODO: mixed bases in upper case for IDT
    return str(sequence.get_seq(seq)).lower()


def strip(s):
    return re.sub(r"\s+", "", s)

In [None]:
sigb = strip(
    """atgACACAAC CATCAAAAAC TACGAAACTA ACTAAAGATG AAGTCGATCG GCTCATAAGC
GATTACCAAA CAAAGCAAGA TGAACAAGCG CAGGAAACGC TTGTGCGGGT GTATACAAAT
CTGGTTGACA TGCTTGCGAA AAAATACTCA AAAGGCAAAA GCTTCCACGA GGATCTCCGC
CAGGTCGGCA TGATCGGGCT GCTAGGCGCG ATTAAGCGAT ACGATCCTGT TGTCGGCAAA
TCGTTTGAAG CTTTTGCAAT CCCGACAATC ATCGGTGAAA TTAAACGTTT CCTCAGAGAT
AAAACATGGA GCGTTCATGT GCCGAGACGA ATTAAAGAAC TCGGTCCAAG AATCAAAATG
GCGGTTGATC AGCTGACCAC TGAAACACAA AGATCGCCGA AAGTCGAAGA GATTGCCGAA
TTCCTCGATG TTTCTGAAGA AGAGGTTCTT GAAACGATGG AAATGGGCAA AAGCTATCAA
GCCTTATCCG TTGACCACAG CATTGAAGCG GATTCGGACG GAAGCACTGT CACGATTCTT
GATATCGTCG GATCACAGGA GGACGGATAT GAGCGGGTCA ACCAGCAATT GATGCTGCAA
AGCGTGCTTC ATGTCCTTTC AGACCGTGAG AAACAAATCA TAGACCTTAC GTATATTCAA
AACAAAAGCC AAAAAGAAAC TGGGGACATT CTCGGTATAT CTCAAATGCA CGTCTCGCGC
TTGCAACGCA AAGCTGTGAA GAAGCTCAGA GAGGCCTTGA TTGAAGATCC CTCGATGGAG
TTAATGtaa"""
)

rsbw = strip(
    """atgAAGAATA ATGCTGATTA CATCGAAATG AAAGTGCCGG CCCAACCTGA ATATGTGGGA
ATTATAAGAC TGACGCTGTC AGGGGTCGCA AGCAGAATGG GCTATACGTA CGATGAAATT
GAAGACTTGA AAATCGCAGT CAGTGAGGCG TGCACAAATG CGGTTCAGCA CGCTTACAAA
GAAGATAAAA ATGGGGAAGT GTCAATACGA TTCGGTGTGT TTGAAGACCG TTTAGAGGTT
ATTGTGGCGG ATGAAGGAGA CAGCTTTGAC TTTGATCAAA AGCAGCAGGA TCTAGGGCCG
TACACACCTT CGCACACAGT TGATCAATTA TCAGAAGGAG GGCTCGGTCT ATATTTAATG
GAAACGCTCA TGGATGAAGT CAGAGTGCAA AACCACTCCG GCGTCACCGT AGCGATGACA
AAGTATTTAA ATGGGGAGCG AGTTGATCAT GACACAACCA TCAAAAACTA CGAAACTAAC
taa"""
)

In [None]:
[bervoets_primers.loc[name, "Sequence"] for name in bervoets_primers_for_sigma["B"]]

In [None]:
olt = reg[("oLT", "oligos")]
lib_parts = reg[("LIB", "parts")]
part_types = reg[("LIB", "parts", "Part types")]

In [None]:
# overhangs = overhangs_for(part_types["5UTR_2"])
# storage_flanks = ("CGTCTCGGTCTCa", "tGAGACCgGAGACG") # storage vector BsmBI flanks
# random_bases = ("GCTTCA", "TGCTAA") # to add between BsmBI recognition site and ends of oligos
# storage_flanks = workflow.concatenate_flanks(storage_flanks, random_bases)
storage_flanks = (
    lib_parts["JUMP_storage_vector_prefix"]["Sequence"],
    lib_parts["JUMP_storage_vector_suffix"]["Sequence"],
)

In [None]:
storage_flanks

In [None]:
overhangs_for(part_types["CDS_CD"])

In [None]:
# WIP commit
# rename target to template?
# evaluate_primer should take template or binding? (see replace_primer_tail)
# evaluate bervoets primers, compare with primer3
# check that my flanks are good (e.g., simulate pcr)

# sigma/antisigma primers
# -> save (evaluate_primer -> description)
# primer3 tm params
# primer3 tm goal
# promoters
# -> save
# bsubcyc?

# find what params primer3plus is passing to BoulderIO
# try reproducing using BoulderIO directly
# try to avoid product_size issue with BoulderIO
# use same fix for primer3plus?

In [None]:
def design_primers(target, flanks):
    target_seq = str(sequence.get_seq(target)).lower()
    full_flanks = workflow.concatenate_flanks(*flanks)
    trimmed_flanks = workflow.smoosh_and_trim_flanks(target_seq, full_flanks)
    design = primer3plus.Design()
    design.settings.template(target_seq)
    design.settings.as_cloning_task()
    design.settings.use_overhangs()
    design.settings.left_overhang(trimmed_flanks[0])
    design.settings.right_overhang(trimmed_flanks[1])
    design.settings.product_size([27, 10000], opt=0)
    design.settings.primer_num_return(1)
    results, explain = design.run()
    return results, explain


design_primers(sigb, [overhangs_for(part_types["CDS_CD"]), storage_flanks])

In [None]:
design = primer3plus.Design()
design.settings.template(str(sequence.get_seq(sigb)))
design.settings.as_cloning_task()
design.settings.use_overhangs()
design.settings.left_overhang("A")
design.settings.product_size([27, 1000], opt=0)
design.settings.primer_num_return(1)
results, explain = design.run()

In [None]:
results

In [None]:
explain

In [None]:
date = datetime.now().strftime("%-m/%-d/%Y")

description = "Added Eaton flanks, to be amplified with oLIB45/46."

base_row = {
    "Author": "Jacob Quinn Shenker",
    "Vendor": "IDT",
    "Type": "Primer",
    "Date": date,
    "Order date": date,
    "Description": description,
}

rows = []

for seq_name, seq in seqs_to_order.items():
    name = f"{seq_name}_v3"
    row = {
        "Name": name,
        "Sequence": str(seq).upper(),
        # "Description": description,
        **base_row,
    }
    rows.append(row)

In [None]:
for row in rows:
    olt[olt.next_id()] = row

In [None]:
olt.save()

# BioCyc test

In [None]:
import requests

In [None]:
requests.get("https://websvc.biocyc.org/getxml?BSUB:BSU04730")

In [None]:
_.content

# Old

In [None]:
olib = reg[("oLIB", "oligos")]
plib_maps = reg[("pLIB", "maps")]
primers = {f"oLIB{num}": olib[f"oLIB{num}"]["Sequence"] for num in range(179, 183)}

In [None]:
olt = reg[("oLT", "oligos")]

In [None]:
date = datetime.now().strftime("%-m/%-d/%Y")

rows = []

for i, idx in enumerate(idxs):
    row = {
        **dissoc(old_row, "ID"),
        "Sequence": seq[idx].upper(),
        "Name": "{}_v2_split{}".format(old_row["Name"], i + 1),
        "Description": "Split section {}-of-{} of oLT74 (20bp overlap).\n{}".format(
            i + 1, len(idxs), old_row["Description"]
        ),
        "Date": date,
        "Order date": date,
    }
    rows.append(row)

In [None]:
for row in rows:
    olt[olt.next_id()] = row

In [None]:
olt.save()

In [None]:
rows