In [None]:
import re
import string
import urllib
from datetime import datetime
from zipfile import ZipFile

import benchlingapi
import Bio.pairwise2 as pairwise2
import Bio.Restriction as Restriction
import holoviews as hv
import hvplot.pandas
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import pygsheets
import requests
import seaborn as sns
import toml
from Bio.Seq import Seq
from Bio.SeqRecord import SeqRecord
from tqdm.auto import tqdm

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import paulssonlab.api as api
import paulssonlab.api.benchling as bapi
import paulssonlab.cloning.enzyme as enzyme
import paulssonlab.cloning.primers as primers
import paulssonlab.cloning.registry as registry
import paulssonlab.cloning.sequence as sequence
import paulssonlab.cloning.thermodynamics as thermodynamics
import paulssonlab.cloning.viennarna as viennarna
import paulssonlab.cloning.workflow as workflow
from paulssonlab.api.util import base_url

In [None]:
hv.extension("bokeh")

# Setup

In [None]:
config = toml.load("config.toml")

In [None]:
gc = pygsheets.authorize(service_account_file="credentials.json")

In [None]:
bench_session = benchlingapi.Session(config["benchling"]["api_key"])
benchling_folder = bapi.get_project_root(bench_session, config["benchling"]["project"])

In [None]:
reg = registry.Registry(gc, config["registry"]["folder"], benchling_folder)

# Bervoets 2018

In [None]:
bervoets_primers_tsv = """Primer	Sequence
IB0173	ATGGTTAGCGAGCTGATCAAAG
IB0174	CTTCGTAAATCTGGCGAGTG
IB0175	TGTGCATGTTTTCTTTGATCAGCTCGCTAACCATCATTAGAAAACCTCCATAGCATG
IB0176	GATGTCTGGCAGTTCCCCACTCGCCAGATTTACGAAGTTCTAGAGCACAGCTAACAC
IB0180	CCAAGCTTGCATGCCTGCAGGTCGACTCTAGAGGATCCCCGTGTAGGCTGGAGCTGCTTC
IB0181	"AACAGCTATGACCATGATTACGAATTCGAGCTCGGTACCCTGGTCCATATG
AATATCCTCCTTAG"
IB0184	"TTCCCAGTCACGACGTTGTAAAACGACGGCCAGTGCCAGGAGACCACAAC
GGTTTCCCTCTAC"
IB0186	"GTATAGGAACTTCGAAGCAGCTCCAGCCTACACGGGGATCTTCATTCATCA
TTAACACCTCTATTATAAAGTGCTTTCAGCC"
IB0198	"TTACCGGATTCTTAATTACCTGGTGCGTATGGGCGGTAATTTGACCTTAATA
AAAAGGTCTGGTCCATATGAATATCCTCCTTAG"
IB0199	"GCGAAATCCTGCAAACGCAGGGGCTGAATATCGAAGCGCTGTTCCGCGAG
TAGGAGACCACAACGGTTTCCCTCTAC"
IB0238	GAGTCACACAGGAAAGTACTAGATGACGATCGATGAAATTTACC
IB0245	"TGAGCGGATAACAATTTCACACAGGAAACAGACCATGGAATTCGGAGACCA
CAACGGTTTCCCTCTAC"
IB0249	"CCAAGCTTGCATGCCTGCAGGTCGACTCTAGAGGATCCCCGGGTCATTCA
TCATTAACACCTCTATTATAAAGTGCTTTCAGCCGCTGTC"
IB0250	TTAACTTTTACTAGAGTCACACAGGAAAGTACTAGATGACACAACCATCAAAAACTACGAAACTAAC
IB0251	CCAAGCTTGCATGCCTGCAGGTCGACTCTAGAGGATCCCCGGGTTACATTAACTCCATCGAGGGATCTTC
IB0252	TTAACTTTTACTAGAGTCACACAGGAAAGTACTAGATGGATGTGGAGGTTAAGAAAAACGGCAAAAACG
IB0253	CCAAGCTTGCATGCCTGCAGGTCGACTCTAGAGGATCCCCGGGCTAGCCATCCGTATGATCCATTTGAACC
IB0254	TTAACTTTTACTAGAGTCACACAGGAAAGTACTAGGTGTCGAGAAATAAAGTCGAAATCTGCGGGGTGGATAC
IB0255	"CCAAGCTTGCATGCCTGCAGGTCGACTCTAGAGGATCCCCGGGTTATTGATGAATATTTTTATTCATTTGTTTGATAGCCGCTTTTTCAAGTCTGGACACCTG
CGCTTGAG"
IB0256	TTAACTTTTACTAGAGTCACACAGGAAAGTACTAGGTGAATCTACAGAACAACAAGGGAAAATTCAAC
IB0257	"CCAAGCTTGCATGCCTGCAGGTCGACTCTAGAGGATCCCCGGGTTACAAA
CTGATTTCGCGAATTTCCAAGTAC"
IB0258	"TTAACTTTTACTAGAGTCACACAGGAAAGTACTAGATGGAAATGATGATTAA
AAAAAGAATTAAACAAGTCAAAAAAGGCGACCAG"
IB0259	"CCAAGCTTGCATGCCTGCAGGTCGACTCTAGAGGATCCCCGGGTTAAAGA
TCCCTTAATTGTTTTCTAAGAGCCTCTCTG"
IB0260	TTAACTTTTACTAGAGTCACACAGGAAAGTACTAGATGGAAGAAACCTTTCAATTATTATATGATACATATCATCAAGATTTG
IB0261	CCAAGCTTGCATGCCTGCAGGTCGACTCTAGAGGATCCCCGGGTTAACTGCCGGAAGTTGACTTAACAACTC
IB0262	CTAGTACTTTCCTGTGTGACTC
IB0268	GTATAGGAACTTCGAAGCAGCTCCAGCCTACACGGGGATCTTTACATTAACTCCATCGAGGG
IB0269	GTATAGGAACTTCGAAGCAGCTCCAGCCTACACGGGGATCTCTAGCCATCCGTATGATCC
IB0270	GTATAGGAACTTCGAAGCAGCTCCAGCCTACACGGGGATCTTTATTGATGAATATTTTTATTCATTTGTTTGATAGCC
IB0271	"GTATAGGAACTTCGAAGCAGCTCCAGCCTACACGGGGATCTTTACAAACTG
ATTTCGCGAATTTCC"
IB0272	"GTATAGGAACTTCGAAGCAGCTCCAGCCTACACGGGGATCTTTAAAGATCC
CTTAATTGTTTTCTAAGAGC"
IB0273	"GTATAGGAACTTCGAAGCAGCTCCAGCCTACACGGGGATCTTTAACTGCC
GGAAGTTGACTTAACAACTCCTTTATCTG"
IB0476	"TTAACTTTTACTAGAGTCACACAGGAAAGTACTAGATGAAGCAAGGTTTGC
AACTCAGGCTTAG"
IB0477	"CCAAGCTTGCATGCCTGCAGGTCGACTCTAGAGGATCCCCGGGTCAAACG
AGTTGTTTACGCTGGTTTGAC"
IB0478	TTAACTTTTACTAGAGTCACACAGGAAAGTACTAGGTGAATTCACTCTATACCGCTGAAGGTG
IB0479	CCAAGCTTGCATGCCTGCAGGTCGACTCTAGAGGATCCCCGGGTTATAACTTACCCAGTTTAGTGCGTAACC
IB0480	TTAACTTTTACTAGAGTCACACAGGAAAGTACTAGATGACTGACAAAATGCAAAGTTTAG
IB0481	CCAAGCTTGCATGCCTGCAGGTCGACTCTAGAGGATCCCCGGGTTACGCTTCAATGGCAGCAC
IB0482	TTAACTTTTACTAGAGTCACACAGGAAAGTACTAGATGTCTGACCGCGCCACTAC
IB0483	"CCAAGCTTGCATGCCTGCAGGTCGACTCTAGAGGATCCCCGGGTCATAAC
CCATACTCCAGACGGAACAG"
IB0484	"TTAACTTTTACTAGAGTCACACAGGAAAGTACTAGATGAGCGAGCAGTTAA
CGGAC"
IB0485	"CCAAGCTTGCATGCCTGCAGGTCGACTCTAGAGGATCCCCGGGTCAACGC
CTGATAAGCGGTTGAAC"
Fw_BB	GGATCTCGTAACCGAACTTG
Fw_LibB_1	GCCCTATGTTTAAAAAAATGTCGGAGAACGTGTTTATNNNNNNNNNNNNGGGTATGTAACTTGTAGGGCC
Fw_LibB_2	GCCCTATGTTTAAAAAAATGTCGGAGAACGTGTTTATTTTTTTNNNNNNGGGTATGTAACTTGTAGGGCC
Fw_LibB_3	GCCCTATGTTTAAAAAAATGTCGGAGAACGTGTTTATNNNNNNGAAAAAGGGTATGTAACTTGTAGGGCC
Fw_LibF_1	GATGCGTCCTGTTCTGCGATGTTTANNNNNNNNNNNNNNNKCTCATAATAGTAGAAACAGGGCC
Fw_LibF_2	GATGCGTCCTGTTCTGCGATGTTTAAAAACGATNNNNNNNKCTCATAATAGTAGAAACAGGGCC
Fw_LibF_3	GATGCGTCCTGTTCTGCGATGTTTANNNNNNNNCTTTTTTTCTCATAATAGTAGAAACAGGGCC
Fw_LibW_1	CTATCTGCTGCCCTATGATAAACTTATTTTATAAAAAAATTGAAACNNNNNNNNNNNNNNNNCGTATACATACAGAGGGCC
Fw_LibW_2	CTATCTGCTGCCCTATGATAAACTTATTTTATAAAAAAATTGAAACCTTTTGAANNNNNNNNCGTATACATACAGAGGGCC
Fw_LibW_3	CTATCTGCTGCCCTATGATAAACTTATTTTATAAAAAAATTGAAACNNNNNNNNACGAAGCTCGTATACATACAGAGGGCC
Fw_LibproD_1	GGTTGCTGGATAACTTTACGNNNNNNNNNNNNNNTCGTATAATATATTCAGGGAGAGCACAAC
Fw_LibproD_2	GGTTGCTGGATAACTTTACGNNNNNNNNNNNNNNNNNTATAATATATTCAGGGAGAGCACAAC
Rv_BB	CTGGTTGTTCTCAAGTTCGG
Rv_LibB	CGACATTTTTTTAAACATAGGGCAG
Rv_LibF	CATCGCAGAACAGGACGCATC
Rv_LibW	GTTTATCATAGGGCAGCAGATAG
Rv_LibproD	CGTAAAGTTATCCAGCAACC"""

bervoets_promoter_lib_tsv = """Sigma	Promoter	Sequence
B	PB2	GTTTATTTTTTTGAAAAAGGGTAT
B	PB2.1	GTTTATCAAATGGTGCTGGGGTAT
B	PB2.2	GTTTATCGTTTAATCTGTGGGTAT
B	PB2.3	GTTTATAGGTCCTCAATTGGGTAT
B	PB2.4	GTTTATCAAAAGGCACATGGGTAT
B	PB2.5	GTTTATTCCCCAGTTTTGGGGTAT
B	PB2.6	GTTTATTTGTTCGAAAGGGGGTAT
B	PB2.7	GTTTATCATATGCAAAACGGGTAT
B	PB2.8	GTTTATTCTGGGAAAATCGGGTAT
B	PB2.9	GTTTATCTGTGGTAAAACGGGTAT
B	PB2.10	GTTTATGTTTTTTCTGTACAGGGTAT
F	PF3	GTTTAAAAACGATCTTTTTTTCTCATAAT
F	PF3.1	GTTTAAGCTATTGAGGGTATTCTCATAAT
F	PF3.2	GTTTATGCCAAATGGCAGGTGCTCATAAT
F	PF3.3	GTTTATTGACGGATATCGCTGCTCATAAT
F	PF3.4	GTTTAGTGATGTGTCACGATGCTCATAAT
F	PF3.5	GTTTATTTGAAGGGATGAGTGCTCATAAT
F	PF3.6	GTTTAGTTTTAATTATAACTGCTCATAAT
F	PF3.7	GTTTAAAAACGATGCGTTGTGCTCATAAT
F	PF3.8	GTTTACATAATTTAATTTTGGCTCATAAT
F	PF3.9	GTTTACTTTTATGTGTTTATGCTCATAAT
W	PW2	TGAAACCTTTTGAAACGAAGCTCGTA
W	PW2.1	TGAAACTTATTTACCCTCGTA
W	PW2.2	TGAAACCTTTTGAGCAGCTTTCGTA
W	PW2.3	TGAAACGAGCCCGGGATTTCGCGTA
W	PW2.4	TGAAACCTTTTGAAAGGATTTGCGTA
W	PW2.5	TGAAACCTTTTGAACGTTTGCACGTA
W	PW2.6	TGAAACGGAAAAATGGAGCGGGCGTA
W	PW2.7	TGAAACCGATCGTCTGCGGACGCGTA
W	PW2.8	TGAAACGCGGAAAAACGAAGCTCGTA
W	PW2.9	TGAAACGTCTCGGAGGGGTGTTCGTA"""

bervoets_promoter_lib_upstream = {
    "B": "TGTTTAAAAAAATGTCGGAGAACGT",
    "F": "GTAAAGATGCGTCCTGTTCTGCGAT",
    "W": "TGATAAACTTATTTTATAAAAAAAT",
}

bervoets_promoter_lib_downstream = {
    "B": "GTAACTTGTA",
    "F": "AGTAGAAACA",
    "W": "TACATACAGA",
}

bervoets_promoter_insulation = "TTCTAGAGCACAGCTAACACCACGTCGTCCCTATCTGCTGCCCTA"
bervoets_promoter_bcd19 = "GGGCCCAAGTTCACTTAAAAAGGAGATCAACAATGAAAGCAATTTTCGTACTGAAACATCTTAATCATGCTATGGAGGTTTTCTAATG"

bervoets_promoters_tsv = """Sigma	Promoter	Source	Sequence
-	Pno	-	-
70	Plow	"proB (Davis et al., 2011)"	TTCTAGAGCACAGCTAACACCACGTCGTCCCTATCTGCTGCCCTAGGTCTATGAGTGGTTGCTGGATAACTTTACGGGCATGCATAAGGCTCGTAATATATATTCAGGG
70	Pmid	"proC (Davis et al., 2011)"	TTCTAGAGCACAGCTAACACCACGTCGTCCCTATCTGCTGCCCTAGGTCTATGAGTGGTTGCTGGATAACTTTACGGGCATGCATAAGGCTCGTATGATATATTCAGGG
70	Phigh	"proD (Davis et al., 2011)"	TTCTAGAGCACAGCTAACACCACGTCGTCCCTATCTGCTGCCCTAGGTCTATGAGTGGTTGCTGGATAACTTTACGGGCATGCATAAGGCTCGTATAATATATTCAGGG
B	PB1	ctc	TTCTAGAGCACAGCTAACACCACGTCGTCCCTATCTGCTGCCCTACTGTTCAGCTAAACCATTTTTCGAGGTTTAAATCCTTATCGTTATGGGTATTGTTTGTAATGGGCCCAAGTTCACTTAAAAAGGAGATCAACAATGAAAGCAATTTTCGTACTGAAACATCTTAATCATGCTATGGAGGTTTTCTAATG
B	PB2	gspA	TTCTAGAGCACAGCTAACACCACGTCGTCCCTATCTGCTGCCCTATGTTTAAAAAAATGTCGGAGAACGTGTTTATTTTTTTGAAAAAGGGTATGTAACTTGTAGGGCCCAAGTTCACTTAAAAAGGAGATCAACAATGAAAGCAATTTTCGTACTGAAACATCTTAATCATGCTATGGAGGTTTTCTAATG
B	PB3	trxA	TTCTAGAGCACAGCTAACACCACGTCGTCCCTATCTGCTGCCCTAGCTTCATGCCGGCGCTCTTTTTCAGGTTTTAAAACAGCTCCGGCAGGGCATGGTAAAGTACGGGCCCAAGTTCACTTAAAAAGGAGATCAACAATGAAAGCAATTTTCGTACTGAAACATCTTAATCATGCTATGGAGGTTTTCTAATG
F	PF1	spoIIR	TTCTAGAGCACAGCTAACACCACGTCGTCCCTATCTGCTGCCCTATTGCTAGATTTTTTTCACCCTGCACGTTTATCCCAGGCTCTCCTTGTCCATAATAGGGCTAGAAGGGCCCAAGTTCACTTAAAAAGGAGATCAACAATGAAAGCAATTTTCGTACTGAAACATCTTAATCATGCTATGGAGGTTTTCTAATG
F	PF2	spoIIQ	TTCTAGAGCACAGCTAACACCACGTCGTCCCTATCTGCTGCCCTACTAAAAAAGTTTTTTTGGATAGGTTGTATATATTTTCAGAAAAGTGTTCAGAATGTTGCTGAGGGGGCCCAAGTTCACTTAAAAAGGAGATCAACAATGAAAGCAATTTTCGTACTGAAACATCTTAATCATGCTATGGAGGTTTTCTAATG
F	PF3	ywhE	TTCTAGAGCACAGCTAACACCACGTCGTCCCTATCTGCTGCCCTAGTAAAGATGCGTCCTGTTCTGCGATGTTTAAAAACGATCTTTTTTTCTCATAATAGTAGAAACAGGGCCCAAGTTCACTTAAAAAGGAGATCAACAATGAAAGCAATTTTCGTACTGAAACATCTTAATCATGCTATGGAGGTTTTCTAATG
G	PG1	yoaR	TTCTAGAGCACAGCTAACACCACGTCGTCCCTATCTGCTGCCCTAATTCAAACAAACGATGGGAAGAAATACATCAAAGGATAAGCGGCTGTTCATACTAATGATTGGGAGGGCCCAAGTTCACTTAAAAAGGAGATCAACAATGAAAGCAATTTTCGTACTGAAACATCTTAATCATGCTATGGAGGTTTTCTAATG
G	PG2	yozQ	TTCTAGAGCACAGCTAACACCACGTCGTCCCTATCTGCTGCCCTATGGCCAAAGCGCGAATGAAAAAAGTGCATGAATACCTGCCCAACAGACAGAATAAGAAGAGTTGGGGCCCAAGTTCACTTAAAAAGGAGATCAACAATGAAAGCAATTTTCGTACTGAAACATCTTAATCATGCTATGGAGGTTTTCTAATG
G	PG3	yvaB	TTCTAGAGCACAGCTAACACCACGTCGTCCCTATCTGCTGCCCTATCTATAATAAAGTCTAAGAGAGACAGAATAATCATTATGCATCTGTATGATAATAATTGATGTGTGGGCCCAAGTTCACTTAAAAAGGAGATCAACAATGAAAGCAATTTTCGTACTGAAACATCTTAATCATGCTATGGAGGTTTTCTAATG
M	PM1	yfnI	TTCTAGAGCACAGCTAACACCACGTCGTCCCTATCTGCTGCCCTATTTTTTTATTTCTGAGAAAAAAATGTGAAACGAAATGAAGGTTTCTTTCGTCCAGTGATTGGGGGCCCAAGTTCACTTAAAAAGGAGATCAACAATGAAAGCAATTTTCGTACTGAAACATCTTAATCATGCTATGGAGGTTTTCTAATG
M	PM2	rodA	TTCTAGAGCACAGCTAACACCACGTCGTCCCTATCTGCTGCCCTATTCATTTGAAAAGTTTTGTGTCAATCGAAACATTTCGGTTTATGATACGTCATATTTCGTGGGGCCCAAGTTCACTTAAAAAGGAGATCAACAATGAAAGCAATTTTCGTACTGAAACATCTTAATCATGCTATGGAGGTTTTCTAATG
M	PM3	divIC	TTCTAGAGCACAGCTAACACCACGTCGTCCCTATCTGCTGCCCTAATCCGTTTTATCGCGAAACAATGTTTGAAACTTCTTCCTGTGAAAATGCGTCTAACTTTTAGGGGCCCAAGTTCACTTAAAAAGGAGATCAACAATGAAAGCAATTTTCGTACTGAAACATCTTAATCATGCTATGGAGGTTTTCTAATG
M	PM4	ywtF	TTCTAGAGCACAGCTAACACCACGTCGTCCCTATCTGCTGCCCTATTACTTGTATTTTTTTCAATGTCGCCGAAACATTTTACCTGCTGCGGCGTCCAATATAAGGGGGCCCAAGTTCACTTAAAAAGGAGATCAACAATGAAAGCAATTTTCGTACTGAAACATCTTAATCATGCTATGGAGGTTTTCTAATG
W	PW1	ybfO	TTCTAGAGCACAGCTAACACCACGTCGTCCCTATCTGCTGCCCTAAAGCTTTTTTTTGTTGGCAGGAAAAGGAAACTTTTTCTATATCTATCTCGTAATGACTAGAGGGGCCCAAGTTCACTTAAAAAGGAGATCAACAATGAAAGCAATTTTCGTACTGAAACATCTTAATCATGCTATGGAGGTTTTCTAATG
W	PW2	sigW	TTCTAGAGCACAGCTAACACCACGTCGTCCCTATCTGCTGCCCTATGATAAACTTATTTTATAAAAAAATTGAAACCTTTTGAAACGAAGCTCGTATACATACAGAGGGCCCAAGTTCACTTAAAAAGGAGATCAACAATGAAAGCAATTTTCGTACTGAAACATCTTAATCATGCTATGGAGGTTTTCTAATG
W	PW3	ydjF	TTCTAGAGCACAGCTAACACCACGTCGTCCCTATCTGCTGCCCTAGAAATGTCATTTTTTATTAAAAAAGTGAAACTTTTAACGATAATAAATAGTATATGTAACAAGGGCCCAAGTTCACTTAAAAAGGAGATCAACAATGAAAGCAATTTTCGTACTGAAACATCTTAATCATGCTATGGAGGTTTTCTAATG
W	PW4	yfhL	TTCTAGAGCACAGCTAACACCACGTCGTCCCTATCTGCTGCCCTACGAGGCTTGTCTTTTTGCCTATGCATGAAACATTTCTTCTTTCTGCACGTAACAATGAGAAGGGCCCAAGTTCACTTAAAAAGGAGATCAACAATGAAAGCAATTTTCGTACTGAAACATCTTAATCATGCTATGGAGGTTTTCTAATG
X	PX1	lytR	TTCTAGAGCACAGCTAACACCACGTCGTCCCTATCTGCTGCCCTAATTTTAAAGAAAAATTAAGAAACAATGAAACTTTTTTTTATAAAAAACGACTATTTTAGGAGGGCCCAAGTTCACTTAAAAAGGAGATCAACAATGAAAGCAATTTTCGTACTGAAACATCTTAATCATGCTATGGAGGTTTTCTAATG
X	PX2	csbB	TTCTAGAGCACAGCTAACACCACGTCGTCCCTATCTGCTGCCCTAAAAATCATGAATGTCACCATAAAATTGTAACAAAAAACAGGTTTAAACGACTTTAAAAAAAGGGCCCAAGTTCACTTAAAAAGGAGATCAACAATGAAAGCAATTTTCGTACTGAAACATCTTAATCATGCTATGGAGGTTTTCTAATG
X	PX3	bcrC	TTCTAGAGCACAGCTAACACCACGTCGTCCCTATCTGCTGCCCTAATTTCAGACAATCTCTATTTTTATTTGAAACTTTTCATGAGTAAGATTAGTCTACTAAATATGGGCCCAAGTTCACTTAAAAAGGAGATCAACAATGAAAGCAATTTTCGTACTGAAACATCTTAATCATGCTATGGAGGTTTTCTAATG"""

bervoets_foldchange_tsv = """
Promoter	B	F	W
P82.10	534.89	7.00	2.35
P82.9	2.33	0.79	1.05
P82.8	93.01	1.33	1.09
P82.7	72.92	1.12	1.40
PB2.6	89.78	1.42	1.01
P82.5	51.37	1.30	1.44
P82.4	48.43	1.60	1.50
PB2.3	22.58	1.36	1.21
PB2.2	2.04	0.76	1.15
PB2.1	2.18	1.03	1.22
PB2	50.84	1.57	1.31
PF3.9	0.79	1.30	1.13
PF3.8	0.79	1.26	1.14
PF3.7	1.66	35.09	1.05
PF3.6	2.05	31.82	1.19
PF3.5	1.12	28.83	1.44
PF3.4	1.09	17.65	0.96
PF3.3	1.29	20.61	1.44
PF3.2	1.42	13.78	1.07
PF3.1	1.03	4.57	1.40
PF3	0.97	14.94	1.14
PW2.9	0.89	1.07	55.65
PW2.8	0.89	1.12	74.95
PW2.7	1.07	1.01	70.63
PW2.6	0.97	0.93	80.28
PW2.5	0.90	1.06	45.98
PW2.4	1.23	1.30	51.24
PW2.3	1.13	0.99	1.34
PW2.2	1.05	0.91	9.05
PW2.1	0.86	0.89	2.68
PW2	1.20	1.21	65.65
"""

import io

bervoets_primers = pd.read_csv(
    io.StringIO(bervoets_primers_tsv), sep="\s+", index_col=0
)

bervoets_promoter_lib = pd.read_csv(
    io.StringIO(bervoets_promoter_lib_tsv), sep="\s+", index_col=1
)

bervoets_promoter_lib["Sequence_variableonly"] = bervoets_promoter_lib["Sequence"]
bervoets_promoter_lib["Sequence"] = bervoets_promoter_lib.apply(
    lambda x: bervoets_promoter_lib_upstream[x["Sigma"]]
    + x["Sequence_variableonly"]
    + bervoets_promoter_lib_downstream[x["Sigma"]],
    axis=1,
)

bervoets_promoters = pd.read_csv(
    io.StringIO(bervoets_promoters_tsv), sep="\s+", index_col=1
)

bervoets_promoters["Sequence_fullcontext"] = bervoets_promoters["Sequence"]

bervoets_promoters["Sequence"] = bervoets_promoters["Sequence_fullcontext"].str.replace(
    rf"^{bervoets_promoter_insulation}(.*?)(?:{bervoets_promoter_bcd19})?$",
    r"\1",
    regex=True,
)

bervoets_foldchange = pd.read_csv(
    io.StringIO(bervoets_foldchange_tsv), sep="\s+", index_col=0
)

bervoets_primers_for_sigma = {
    "sigB": ("IB0250", "IB0251"),
    "sigF": ("IB0252", "IB0253"),
    "sigG": ("IB0254", "IB0255"),
    "sigH": ("IB0256", "IB0257"),
    "sigM": ("IB0238", "IB0249"),
    "sigW": ("IB0258", "IB0259"),
    "sigX": ("IB0260", "IB0261"),
}

In [None]:
# check that we've interpreted the tables correctly
bervoets_promoters.loc["PW2", "Sequence"] == bervoets_promoter_lib.loc[
    "PW2", "Sequence"
]

# Bacillus sigma primers

In [None]:
import primer3plus

In [None]:
def overhangs_for(x):
    return (x["Upstream overhang"], x["Downstream overhang"])


def _format_seq(seq):
    # TODO: mixed bases in upper case for IDT
    return str(sequence.get_seq(seq)).lower()


def strip(s):
    return re.sub(r"\s+", "", s)

In [None]:
sigb = strip(
    """atgACACAAC CATCAAAAAC TACGAAACTA ACTAAAGATG AAGTCGATCG GCTCATAAGC
GATTACCAAA CAAAGCAAGA TGAACAAGCG CAGGAAACGC TTGTGCGGGT GTATACAAAT
CTGGTTGACA TGCTTGCGAA AAAATACTCA AAAGGCAAAA GCTTCCACGA GGATCTCCGC
CAGGTCGGCA TGATCGGGCT GCTAGGCGCG ATTAAGCGAT ACGATCCTGT TGTCGGCAAA
TCGTTTGAAG CTTTTGCAAT CCCGACAATC ATCGGTGAAA TTAAACGTTT CCTCAGAGAT
AAAACATGGA GCGTTCATGT GCCGAGACGA ATTAAAGAAC TCGGTCCAAG AATCAAAATG
GCGGTTGATC AGCTGACCAC TGAAACACAA AGATCGCCGA AAGTCGAAGA GATTGCCGAA
TTCCTCGATG TTTCTGAAGA AGAGGTTCTT GAAACGATGG AAATGGGCAA AAGCTATCAA
GCCTTATCCG TTGACCACAG CATTGAAGCG GATTCGGACG GAAGCACTGT CACGATTCTT
GATATCGTCG GATCACAGGA GGACGGATAT GAGCGGGTCA ACCAGCAATT GATGCTGCAA
AGCGTGCTTC ATGTCCTTTC AGACCGTGAG AAACAAATCA TAGACCTTAC GTATATTCAA
AACAAAAGCC AAAAAGAAAC TGGGGACATT CTCGGTATAT CTCAAATGCA CGTCTCGCGC
TTGCAACGCA AAGCTGTGAA GAAGCTCAGA GAGGCCTTGA TTGAAGATCC CTCGATGGAG
TTAATGtaa"""
)

rsbw = strip(
    """atgAAGAATA ATGCTGATTA CATCGAAATG AAAGTGCCGG CCCAACCTGA ATATGTGGGA
ATTATAAGAC TGACGCTGTC AGGGGTCGCA AGCAGAATGG GCTATACGTA CGATGAAATT
GAAGACTTGA AAATCGCAGT CAGTGAGGCG TGCACAAATG CGGTTCAGCA CGCTTACAAA
GAAGATAAAA ATGGGGAAGT GTCAATACGA TTCGGTGTGT TTGAAGACCG TTTAGAGGTT
ATTGTGGCGG ATGAAGGAGA CAGCTTTGAC TTTGATCAAA AGCAGCAGGA TCTAGGGCCG
TACACACCTT CGCACACAGT TGATCAATTA TCAGAAGGAG GGCTCGGTCT ATATTTAATG
GAAACGCTCA TGGATGAAGT CAGAGTGCAA AACCACTCCG GCGTCACCGT AGCGATGACA
AAGTATTTAA ATGGGGAGCG AGTTGATCAT GACACAACCA TCAAAAACTA CGAAACTAAC
taa"""
)

In [None]:
olt = reg[("oLT", "oligos")]
lib_parts = reg[("LIB", "parts")]
part_types = reg[("LIB", "parts", "Part types")]

In [None]:
# overhangs = overhangs_for(part_types["5UTR_2"])
# storage_flanks = ("CGTCTCGGTCTCa", "tGAGACCgGAGACG") # storage vector BsmBI flanks
# random_bases = ("GCTTCA", "TGCTAA") # to add between BsmBI recognition site and ends of oligos
# storage_flanks = workflow.concatenate_flanks(storage_flanks, random_bases)
storage_flanks = (
    lib_parts["JUMP_storage_vector_prefix"]["Sequence"],
    lib_parts["JUMP_storage_vector_suffix"]["Sequence"],
)

In [None]:
storage_flanks

In [None]:
overhangs_for(part_types["CDS_CD"])

## Bervoets primers

In [None]:
[bervoets_primers.loc[name, "Sequence"] for name in bervoets_primers_for_sigma["sigB"]]

In [None]:
# Primer/PrimerPair class, memoize thermo params

In [None]:
# TODO: order Bervoets primers in forward/reverse order

In [None]:
"fwd"[0].isupper()

In [None]:
date = datetime.now().strftime("%-m/%-d/%Y")

base_row = {
    "Author": "Jacob Quinn Shenker",
    "Vendor": "IDT",
    "Type": "Primer",
    "Date": date,
    "Order date": date,
}

rows = []

name = "sigB"
template_seq = sigb

full_flanks = workflow.concatenate_flanks(
    overhangs_for(part_types["CDS_CD"]), storage_flanks
)
trimmed_flanks = workflow.smoosh_and_trim_flanks(template_seq, full_flanks)

for bervoets_name, flank in zip(
    bervoets_primers_for_sigma[name],
    trimmed_flanks,
):
    primer_seq_orig = bervoets_primers.loc[bervoets_name, "Sequence"]
    primer_seq = primers.replace_primer_overhang(sigb, primer_seq_orig, flank)
    orientation = primers.format_primer_orientation(template_seq, primer_seq)
    primer_name = f"Bsub_{name}_{orientation}"
    description = f"Primers to amplify {name} from B. subtilis. Primer binding site from {bervoets_name} (Bervoets 2018). Overhangs for BsmBI golden gate into pLIB112."
    row = {
        "Name": primer_name,
        "Sequence": str(primer_seq).upper(),
        "Description": description,
        **base_row,
    }
    rows.append(row)

In [None]:
product = sequence.pcr(sigb, *[r["Sequence"] for r in rows])

In [None]:
product.seq_lower()

In [None]:
rows

In [None]:
for row in rows:
    olt[olt.next_id()] = row

In [None]:
olt.save()

## De novo primers

In [None]:
primer3plus.Design.PRIMER_

In [None]:
design = primer3plus.Design()

In [None]:
primer3testresult = {
    "PAIR": {
        "PENALTY": 11.368492839936664,
        "COMPL_ANY_TH": 0.0,
        "COMPL_END_TH": 0.0,
        "PRODUCT_SIZE": 789,
    },
    "LEFT": {
        "PENALTY": 5.529429783166449,
        "SEQUENCE": "atgacacaaccatcaaaaactacga",
        "location": (0, 25),
        "TM": 59.47057021683355,
        "GC_PERCENT": 36.0,
        "SELF_ANY_TH": 0.0,
        "SELF_END_TH": 0.0,
        "HAIRPIN_TH": 0.0,
        "END_STABILITY": 3.43,
        "OVERHANG": "gcttcacgtctcggtctcaa",
    },
    "RIGHT": {
        "PENALTY": 5.839063056770215,
        "SEQUENCE": "ttacattaactccatcgagggatct",
        "location": (788, 25),
        "TM": 59.160936943229785,
        "GC_PERCENT": 40.0,
        "SELF_ANY_TH": 20.727150498313335,
        "SELF_END_TH": 12.024104015462171,
        "HAIRPIN_TH": 40.770439736531614,
        "END_STABILITY": 2.75,
        "OVERHANG": "ggttgagaccggagacgtgctaa",
    },
}

In [None]:
from dataclasses import InitVar, dataclass, field
from functools import cached_property
from typing import Optional, Union

from Bio.Seq import Seq
from Bio.SeqRecord import SeqRecord

a = Primer("gggg" + ("a" * 10), binding_length=10)
b = Primer("gggg" + ("a" * 10), overhang_length=4)
c = Primer("gggg", "a" * 10)
d = Primer("gggg" + ("a" * 10))
e = Primer("gggg" + ("a" * 10), template="ccccaaaacccc")
f = Primer(binding="a" * 10, overhang="gggg")
g = Primer("gggg", ("a" * 10), template="ccccaaaacccc")  # error
h = Primer(primer3=primer3testresult)
h2 = Primer(template="ggggaaaagggg", primer3=primer3testresult)  # error
print(a, b, c, d, e, f, h, h2)

In [None]:
from dataclasses import InitVar, dataclass, field
from functools import cached_property
from typing import Optional, Union

from Bio.Seq import Seq
from Bio.SeqRecord import SeqRecord


@dataclass
class Primer:
    seq1: InitVar[sequence.SEQ_TYPE] = None
    seq2: InitVar[sequence.SEQ_TYPE] = None
    template: InitVar[sequence.SEQ_TYPE] = None
    binding_length: InitVar[int] = None
    overhang_length: InitVar[int] = None
    seq: str = None
    binding: str = None
    overhang: str = None
    tm: Optional[float] = None
    mfe_monomer: Optional[float] = None
    mfe_homodimer: Optional[float] = None
    primer3: dict = field(repr=False, default_factory=dict)

    def __post_init__(self, seq1, seq2, template, binding_length, overhang_length):
        if seq1 is not None:
            if seq2 is not None:
                if any(template is not None, binding_length is not None, overhang_length is not None):
                    raise ValueError("if binding and overhang are specified, cannot also specify template, binding_length, or overhang_length")
                self.overhang = seq1
                self.binding = seq2
            else:
                if binding_length is not None:
                    if overhang_length is not None:
                        raise ValueError("cannot specify both binding_length and overhang_length")
                    self.binding = seq1[len(seq)-binding:]
                    self.overhang =  = seq1[:len(seq)-binding]
                else:
                    if overhang_length is not None:
                        self.binding = seq1[len(seq)-binding:]
                        self.overhang =  = seq1[:len(seq)-binding]
                    else:
                        pass # TODO
        else:
            # seq1+template
            # binding+overhang
        pass

a = Primer("gggg"+("a"*10), binding_length=10)
b = Primer("gggg"+("a"*10), overhang_length=4)
c = Primer("gggg", "a"*10)
d = Primer("gggg"+("a"*10))
e = Primer("gggg"+("a"*10), template="ccccaaaacccc")
f = Primer(binding="a"*10, overhang="gggg")
g = Primer("gggg", ("a"*10), template="ccccaaaacccc") # error
h = Primer(primer3=primer3testresult)
h2 = Primer(template="ggggaaaagggg", primer3=primer3testresult) #error
print(a,b,c,d,e,f,h,h2)

In [None]:
def _format_primer3(result):
    return result


# USE CASES:
# 1) take desired product, template seq, find overhangs
# 2) take amplicon, optional overhangs
def primer3_amplicon_primers(
    template, flanks, tm=(55, 65, 72), return_explain=False, return_many=False
):
    template_seq = str(sequence.get_seq(template)).lower()
    full_flanks = workflow.concatenate_flanks(*flanks)
    trimmed_flanks = workflow.smoosh_and_trim_flanks(template_seq, full_flanks)
    design = primer3plus.Design()
    design.settings.template(template_seq)
    design.settings.as_cloning_task()
    design.settings.use_overhangs()
    design.settings.left_overhang(trimmed_flanks[0])
    design.settings.right_overhang(trimmed_flanks[1])
    if len(tm) != 3:
        raise ValueError("expecting (min, optimal, max) tm")
        design.params["PRIMER_OPT_TM"] = tm[0]
        design.params["PRIMER_MIN_TM"] = tm[1]
        design.params["PRIMER_MAX_TM"] = tm[2]
    design.settings.product_size([27, 10000], opt=0)
    if return_many is False:
        num_return = 1
    else:
        num_return = return_many
    design.settings.primer_num_return(num_return)
    # print(design.params)
    results, explain = design.run()
    if not return_many:
        if len(results) == 0:
            raise ValueError("did not design primers")
        elif len(results) > 1:
            raise NotImplementError
        return _format_primer3(results[0])
    else:
        return [_format_primer3(r) for r in results.values()]
    return results, explain


primer3_amplicon_primers(
    sigb, [overhangs_for(part_types["CDS_CD"]), storage_flanks], return_many=3
)

In [None]:
proteins_to_order = {"sigB": sigb, "rsbW": rsbw}

In [None]:
date = datetime.now().strftime("%-m/%-d/%Y")

base_row = {
    "Author": "Jacob Quinn Shenker",
    "Vendor": "IDT",
    "Type": "Primer",
    "Date": date,
    "Order date": date,
    "Description": description,
}

rows = []

for name, template_seq in proteins_to_order.items():
    primers = primer3_amplicon_primers(
        template_seq, [overhangs_for(part_types["CDS_CD"]), storage_flanks]
    )
    for primer, orientation in zip(primers, ("f", "r")):
        print(">", primer)
        primer_name = f"Bsub_{name}_{orientation}"
        description = f"Primers to amplify {name} from B. subtilis. Overhangs for BsmBI golden gate into pLIB112."
        row = {
            "Name": primer_name,
            "Sequence": str(seq).upper(),
            "Description": description,
            **base_row,
        }
        rows.append(row)

In [None]:
rows

In [None]:
for row in rows:
    olt[olt.next_id()] = row

In [None]:
olt.save()

## Bervoets promoters

In [None]:
promoters_to_order = {}

# PB2, PF3, PW2
# PB2.10, PW2.6, PF3.7


# for name in ("PB2.10"):
#     promoters_to_order[name] = bervoets_promoters.loc[name, "Sequence"]

for name in ("PB2", "PF3", "PW2", "PB2.10", "PW2.6", "PF3.7"):
    promoters_to_order[name] = bervoets_promoter_lib.loc[name, "Sequence"]

In [None]:
promoters_to_order

In [None]:
date = datetime.now().strftime("%-m/%-d/%Y")

base_part_row = {"Author": "Jacob Quinn Shenker"}

base_oligo_row = {
    "Author": "Jacob Quinn Shenker",
    "Vendor": "IDT",
    "Type": "Primer",
    "Date": date,
    "Order date": date,
}

oligo_rows = []

for bervoets_name, promoter_seq in promoters_to_order.items():
    seq_orig = bervoets_promoter_lib.loc[bervoets_name, "Sequence"]
    full_flanks = workflow.concatenate_flanks(
        overhangs_for(part_types["Promoter_AB"]), storage_flanks
    )
    print(seq_orig, full_flanks)
    seq = workflow.smoosh_and_trim_flanks(seq_orig, full_flanks)
    for sense in (True, False):
        if antisense:
            oligo_seq = sequence.reverse_complement(seq)
        else:
            oligo_seq = seq
        sense_str = "antisense" if antisense else "sense"
        oligo_name = f"Bervoets_{bervoets_name}_{sense_str}"
        fc = bervoets_foldchange.loc[
            bervoets_name, bervoets_promoter_lib.loc[bervoets_name, "Sigma"]
        ]
        description = f"{sense_str.capitalize()} primer to anneal to make {bervoets_name} (Bervoets 2018). Flanks for BsmBI golden gate into pLIB112. Fold change with cognate sigma: {fc}."
        row = {
            "Name": oligo_name,
            "Sequence": str(oligo_seq).upper(),
            "Description": description,
            **base_oligo_row,
        }
        oligo_rows.append(oligo_row)

In [None]:
for row in rows:
    olt[olt.next_id()] = row

In [None]:
olt.save()

# BioCyc test

In [None]:
import requests

In [None]:
requests.get("https://websvc.biocyc.org/getxml?BSUB:BSU04730")

In [None]:
_.content