In [None]:
import pandas as pd
import holoviews as hv
import hvplot.pandas
import matplotlib.pyplot as plt
import seaborn as sns
import toml
import re
import urllib
from datetime import datetime
import string
import pygsheets
import requests
from tqdm.auto import tqdm
import Bio.Restriction as Restriction
from Bio.Seq import Seq
from Bio.SeqRecord import SeqRecord
import benchlingapi

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import paulssonlab.api as api
import paulssonlab.api.benchling as bapi
from paulssonlab.api.util import base_url
import paulssonlab.cloning.registry as registry
import paulssonlab.cloning.workflow as workflow
import paulssonlab.cloning.sequence as sequence

In [None]:
hv.extension("bokeh")

# Setup

In [None]:
config = toml.load("config.toml")

In [None]:
gc = pygsheets.authorize(service_account_file="credentials.json")

In [None]:
bench_session = benchlingapi.Session(config["benchling"]["api_key"])
benchling_folder = bapi.get_project_root(bench_session, config["benchling"]["project"])

In [None]:
reg = registry.Registry(gc, config["registry"]["folder"], benchling_folder)

In [None]:
strain_sheet = reg.get_sheet(("LT", "strains"))
plasmid_sheet = reg.get_sheet(("pLT", "plasmids"))
part_sheet = reg.get_sheet(("LT", "parts"))
part_type_sheet = gc.open_by_key(col["parts"]).worksheet_by_title("Part types")

In [None]:
drive_service = plasmid_sheet.client.drive.service
plasmid_folder = col["plasmid_maps"]
plasmid_maps = api.google.list_drive(drive_service, root=plasmid_folder)

# Test

In [None]:
reg.registry

In [None]:
reg.get_sheet_by_id(("LT", "parts"))

In [None]:
reg.get_sheet_by_id(("LT", "parts", "Part types"))

In [None]:
reg.get_sheet(("LT", "parts"))

# Voigt terminators

In [None]:
!mkdir -p data/2013terminators_supp
!curl -b does_not_exist -Lo data/2013terminators_supp/supptable2.xlsx "https://static-content.springer.com/esm/art%3A10.1038%2Fnmeth.2515/MediaObjects/41592_2013_BFnmeth2515_MOESM206_ESM.xlsx"
!curl -b does_not_exist -Lo data/2013terminators_supp/supptable3.xlsx "https://static-content.springer.com/esm/art%3A10.1038%2Fnmeth.2515/MediaObjects/41592_2013_BFnmeth2515_MOESM207_ESM.xlsx"
!curl -b does_not_exist -Lo data/2013terminators_supp/supptable4.xlsx "https://static-content.springer.com/esm/art%3A10.1038%2Fnmeth.2515/MediaObjects/41592_2013_BFnmeth2515_MOESM208_ESM.xlsx"

# Data

In [None]:
# FROM: used https://www.adobe.com/acrobat/online/pdf-to-excel.html to convert
# Table S1 (p. 23) from https://www.embopress.org/action/downloadSupplement?doi=10.15252%2Fmsb.20209584&file=msb209584-sup-0001-AppendixFig.pdf

voigt_bidirectional_terms_tsv = """
name	sequence	Ts_forward	Ts_reverse
DT3	"CCGGCTTATCGGTCAGTTTCACCTGATTTACGTAAAAACCCGCTTCGGCGGGTTTTTGCTTTTGGAGGGGCAGAAAGATGAATGACTGTCCACGACGCTATACCCAAAAGAAAAAAAAAAAACCCCGCCCCTGACAGGGCGGGGTTTTTTTT"		3000				120		
DT5	"TCCGGCAATTAAAAAAGCGGCTAACCACGCCGCTTTTTTTACGTCTGCACTCGGTACCAAATTCCAGAAAAGAGGCCTCCCGAAAGGGGGGCCTTTTTTCGTTTTGGTCC"		4700				50		
DT19	TTCAGCCAAAAAACTTAAGACCGCCGGTCTTGTCCACTACCTTGCAGTAATGCGGTGGACAGGATCGGCGGTTTTCTTTTCTCTTCTCAACTCGGTACCAAAGACGAACAATAAGACGCTGAAAAGCGTCTTTTTTCGTTTTGGTCC	770				1.2			
DT34	GCTGATGCCAGAAAGGGTCCTGAATTTCAGGGCCCTTTTTTTACATGGATTGCTCGGTACCAAATTCCAGAAAAGAGACGCTTTCGAGCGTCTTTTTTCGTTTTGGTCC	570				1.4			
DT36	GATCTAACTAAAAAGGCCGCTCTGCGGCCTTTTTTCTTTTCACTGTAACAACGGAAACCGGCCATTGCGCCGGTTTTTTTTGGCCT	680				3.2			
DT42	"AGTTAACCAAAAAGGGGGGATTTTATCTCCCCTTTAATTTTTCCTCGCAGATAGCAAAAAAGCGCCTTTAGGGCGCTTTTTTACATTG
GTGG"	2500				2.2			
DT54	"GGAAACACAGAAAAAAGCCCGCACCTGACAGTGCGGGCTTTTTTTTTCGACCAAAGGCTCGGTACCAAATTCCAGAAAAGACACCCGAAAGGGTGTTTTTTCGTTTTGGTCC"		1800				30		
DT56	TACCACCGTCAAAAAAAACGGCGCTTTTTAGCGCCGTTTTTATTTTTCAACCTTCCAGGCATCAAATAAAACGAAAGGCTCAGTCGAAAGACTGGGCCTTTCGTTTTATCTGTTGTTTGTCGGTGAACGCTCTC	240				11			
DT60	ACATTTAATAAAAAAAGGGCGGTCGCAAGATCGCCCTTTTTTACGTATGACACAGTGAAAAATGGCGCCCATCGGCGCCATTTTTTTATG	110				29			
DT65	TGCTCGTACCAGGCCCCTGCAATTTCAACAGGGGCCTTTTTTTATCCAATTCCATCGGGTCCGAATTTTCGGACCTTTTCTCCGC	400				1.0			
DT82	"CTTATTCCATAACAAAGCCGGGTAATTCCCGGCTTTGTTGTATCTGAACAATAAATGGATGCCCTGCGTAAGCGGGGCATTTTTCTTCCT"	170				2.8			
DT83	AGCGTCAAAAGGCCGGATTTTCCGGCCTTTTTTATTAGGCAGCATGCTGCCAGGTGATCCCCCTGGCCACCTCTTTT	600				4.4			
DT86	TAATCATTCTTAGCGTGACCGGGAAGTCGGTCACGCTACCTCTTCTGAAGAAACAGCAAACAATCCAAAACGCCGCGTTCAGCGGCGTTTTTTCTGCTTTTCT	210				0.4			
DT100	"GTGAAGTGAAAAATGGCGCACATTGTGCGCCATTTTTTTTGTCTGCCGTTTACCGCTTCTCTGAAAATCAACGGGCAGGTCACTGACTTGCCCGTTTTTTTATCCCTTCTCCACACCG"	4700				12			
DT101	"TCTTTAAAAAGAAACCTCCGCATTGCGGAGGTTTCGCCTTTTGATACTCTGTCTGAAGTAATTCTTGCCGCAGTGAAAAATGGCGCCCATCGGCGCCATTTTTTTATGCTTCCATTAGAAAGCAAAAAGCCTGCTAGAAAGCAGGCTTTTTTGAATTTGGCTCCTCTGAC"		2800				160		
DT103	"AAAGTTCTGAAAAAGGGTCACTTCGGTGGCCCTTTTTTATCGCCACGGTTTGAGCAGTGCACTTGCTTAAAATCCCGCCAGCGGCGGGATTTTTTATTGTCCGGTTTAAGACA"	790				4.0			
DT104	"GCAGACAAAAAAAATGGCGCACAATGTGCGCCATTTTTCACTTCACAGGTACTATTGTTTTGAATTGAAAAGGGCGCTTCGGCGCCCTTTTTGCATTTGTTGACGGCATATATTTGTATATCGAAGCGCCCTGATGGGCGCTTTTTTTATTTAATCGATAACCAGA"		580				101		
"""

import io

voigt_bidirectional_terms = pd.read_csv(
    io.StringIO(voigt_bidirectional_terms_tsv), sep="\s+", index_col=0
)

In [None]:
selected_terms = voigt_bidirectional_terms[
    voigt_bidirectional_terms["Ts_reverse"] >= 10
]

In [None]:
selected_terms

## Makeshift oligo orders

In [None]:
def _format_seq(seq):
    return str(sequence.get_seq(seq)).lower()

In [None]:
NO_GBLOCK = ["DT3", "DT56", "DT60"]
# NO_GBLOCK = []

In [None]:
overhangs = ["aggt", "gctt"]
random_bases = (
    "GCTTCA",
    "TGCTAA",
)  # to add between BsmBI recognition site and ends of oligos
flanks = ("CGTCTCGGTCTCa", "tGAGACCgGAGACG")  # storage vector BsmBI flanks
seqs_to_order = {}
for term_name, row in selected_terms.iterrows():
    seq = row["sequence"]
    seq = workflow.add_flanks(
        workflow.add_overhangs(seq.lower(), overhangs),
        [flanks, random_bases],
    )
    seqs_to_order[term_name] = seq

In [None]:
seqs_to_order

In [None]:
prefix = "oLT"
id_num = 37
for term_name, seq in seqs_to_order.items():
    if term_name in NO_GBLOCK:
        continue
    # for sense in (False, True):
    for sense in (True,):
        if sense:
            oligo_seq = seq
        else:
            oligo_seq = sequence.reverse_complement(seq)
        id_ = f"{prefix}{id_num}"
        name = f"Voigt_{term_name}"
        print(f"{id_}\t{name}\t{_format_seq(oligo_seq)}")
        # print(f"{name}\t{_format_seq(oligo_seq)}")
        id_num += 1

In [None]:
for term_name, seq in seqs_to_order.items():
    if term_name not in NO_GBLOCK:
        continue
    for sense in (True, False):
        if sense:
            oligo_seq = seq
        else:
            oligo_seq = sequence.reverse_complement(seq)
        id_ = f"{prefix}{id_num}"
        name = f"Voigt_{term_name}_{'sense' if sense else 'antisense'}"
        print(f"{id_}\t{name}\t{_format_seq(oligo_seq)}")
        id_num += 1