In [None]:
import pandas as pd
import toml
import re
import pygsheets
from tqdm.auto import tqdm
import Bio.Restriction as Restriction
from Bio.Seq import Seq
import benchlingapi

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import paulssonlab.api as api
from paulssonlab.api.util import base_url
import paulssonlab.cloning.workflow as workflow
import paulssonlab.cloning.util as cloning_util
import paulssonlab.cloning.sequence as sequence
import paulssonlab.cloning.registry as registry
import paulssonlab.cloning.enzyme as enzyme
import paulssonlab.cloning.commands.parser as cmd_parser
import paulssonlab.cloning.commands.semantics as cmd_semantics

# Setup

In [None]:
config = toml.load("config.toml")

In [None]:
gc = pygsheets.authorize(service_account_file="credentials.json")

In [None]:
reg = registry.Registry(gc, config["registry"]["folder"])

# Data

# Design

In [None]:
def _format_seq(seq):
    # TODO: mixed bases in upper case for IDT
    return str(sequence.get_seq(seq)).lower()

In [None]:
def overhangs_for(x):
    return (x["Upstream overhang"], x["Downstream overhang"])

In [None]:
part_types = reg[("LIB", "parts", "Part types")]

In [None]:
part_types["5UTR_BC"]

In [None]:
part_types["5UTR_1"]

In [None]:
part_types["5UTR_2"]

In [None]:
print("\t")

In [None]:
ribozymes_tsv = """
Name	Sequence
RiboJ	AGCTGTC|ACCGGATGTGCTTTCCGGTCTGATGAGTCCGTGAGGACGAAACAGCCTCTACAAATAATTTTGTTTAA
AraJ	AGTGGTC|GTGATCTGAAACTCGATCACCTGATGAGCTCAAGGCAGAGCGAAACCACCTCTACAAATAATTTTGTTTAA
BydvJ	AGGGTGTC|TCAAGGTGCGTACCTTGACTGATGAGTCCGAAAGGACGAAACACCCCTCTACAAATAATTTTGTTTAA
CchJ	AGTTCCAGTC|GAGACCTGAAGTGGGTTTCCTGATGAGGCTGTGGAGAGAGCGAAAGCTTTACTCCCGCACAAGCCGAAACTGGAACCTCTACAAATAATTTTGTTTAA
ElvJ	AGCCCCATA|GGGTGGTGTGTACCACCCCTGATGAGTCCAAAAGGACGAAATGGGGCCTCTACAAATAATTTTGTTTAA
LtsvJ	AGTACGTC|TGAGCGTGATACCCGCTCACTGAAGATGGCCCGGTAGGGCCGAAACGTACCTCTACAAATAATTTTGTTTAA
PlmJ	AGTCATAAGTC|TGGGCTAAGCCCACTGATGAGTCGCTGAAATGCGACGAAACTTATGACCTCTACAAATAATTTTGTTTAA
SarJ	AGACTGTC|GCCGGATGTGTATCCGACCTGACGATGGCCCAAAAGGGCCGAAACAGTCCTCTACAAATAATTTTGTTTAA
ScmJ	AGCGCTGTC|TGTACTTGTATCAGTACACTGACGAGTCCCTAAAGGACGAAACACCGCCTCTACAAATAATTTTGTTTAA
RiboJ10	AGCGCTC|AACGGGTGTGCTTCCCGTTCTGATGAGTCCGTGAGGACGAAAGCGCCTCTACAAATAATTTTGTTTAA
RiboJ51	AGTAGTC|ACCGGCTGTGCTTGCCGGTCTGATGAGCCTGTGAAGGCGAAACTACCTCTACAAATAATTTTGTTTAA
RiboJ53	AGCGGTC|AACGCATGTGCTTTGCGTTCTGATGAGACAGTGATGTCGAAACCGCCTCTACAAATAATTTTGTTTAA
RiboJ54	AGGGGTC|AGTTGATGTGCTTTCAACTCTGATGAGTCAGTGATGACGAAACCCCCTCTACAAATAATTTTGTTTAA
RiboJ57	AGAAGTC|AATTAATGTGCTTTTAATTCTGATGAGTCGGTGACGACGAAACTTCCTCTACAAATAATTTTGTTTAA
RiboJ60	AGTCGTC|AAGTGCTGTGCTTGCACTTCTGATGAGGCAGTGATGCCGAAACGACCTCTACAAATAATTTTGTTTAA
RiboJ64	AGGAGTC|AATTAATGTGCTTTTAATTCTGATGAGACGGTGACGTCGAAACTCCCTCTACAAATAATTTTGTTTAA
"""

import io

ribozymes = pd.read_csv(io.StringIO(ribozymes_tsv), sep="\s+", index_col=0)
ribozymes["Cleaved"] = ribozymes["Sequence"].map(lambda x: x.split("|")[1])
ribozymes["Sequence"] = ribozymes["Sequence"].str.replace("|", "", regex=False)

In [None]:
workflow.add_overhangs(riboj, overhangs_for(part_types["5UTR_1"]))

In [None]:
(
    ribozymes.loc["RiboJ"]["Cleaved"]
    + part_types["5UTR_1"]["Downstream overhang"]
    + 35 * "N"
    + "a"
)

In [None]:
sigw = reg[("LIB", "parts")]["sigW"]["Sequence"]

In [None]:
sigw[1:]

# Weiss

In [None]:
len(reg[("LIB", "parts")]["B0032m_BC"]["Sequence"])

In [None]:
reg[("LIB", "parts")]["B0032m_BC"]["Sequence"] + sigw[4:]

In [None]:
reg[("LIB", "parts")]["BCD22"]["Sequence"] + sigw[4:]

In [None]:
reg[("LIB", "parts")]["BCD14"]["Sequence"] + sigw[4:]

In [None]:
reg[("LIB", "parts")]["BCD14"]["Sequence"]  # + sigw[4:]

# RBS library order

In [None]:
seqs = {
    "SigW_RBSlib1_weak": "ACCGGATGTGCTTTCCGGTCTGATGAGTCCGTGAGGACGAAACAGCCTCTACAAATAATTTTGTTTAACCATKRWGGCCAYKGSGGACACAKA",
    "SigW_RBSlib1_strong": "ACCGGATGTGCTTTCCGGTCTGATGAGTCCGTGAGGACGAAACAGCCTCTACAAATAATTTTGTTTAACCATAACTSASRKARGCGTTMTYTA",
}

In [None]:
cleave5 = len(
    ribozymes.loc["RiboJ"]["Cleaved"] + part_types["5UTR_1"]["Downstream overhang"]
)
cleave3 = 1
seqs = {name: seq[cleave5:-cleave3] for name, seq in seqs.items()}

In [None]:
overhangs = overhangs_for(part_types["5UTR_2"])
random_bases = (
    "GCTTCA",
    "TGCTAA",
)  # to add between BsmBI recognition site and ends of oligos
flanks = ("CGTCTCGGTCTCa", "tGAGACCgGAGACG")  # storage vector BsmBI flanks
seqs_to_order = {}
for name, seq in seqs.items():
    seqs_to_order[name] = workflow.add_flanks(
        workflow.add_overhangs(seq.lower(), overhangs),
        [flanks, random_bases],
    )

In [None]:
prefix = "oLT"
id_num = 48
for seq_name, seq in seqs_to_order.items():
    for sense in (True, False):
        if sense:
            oligo_seq = seq
        else:
            oligo_seq = sequence.reverse_complement(seq)
        id_ = f"{prefix}{id_num}"
        name = f"{seq_name}_{'sense' if sense else 'antisense'}"
        print(f"{id_}\t{name}\t{_format_seq(oligo_seq).upper()}")
        id_num += 1

# Ribozyme order

In [None]:
import random


def random_bases(n):
    return "".join(random.choice("ATCG") for _ in range(max(n, 0)))

In [None]:
seqs = ribozymes["Sequence"].to_dict()

In [None]:
min_length = 125  # for gblock
overhangs = overhangs_for(part_types["5UTR_1"])
random_flanks = (
    "GCTTCA",
    "TGCTAA",
)  # to add between BsmBI recognition site and ends of oligos
flanks = ("CGTCTCGGTCTCa", "tGAGACCgGAGACG")  # storage vector BsmBI flanks
seqs_to_order = {}
for name, seq in seqs.items():
    seq = workflow.add_flanks(
        workflow.add_overhangs(seq.lower(), overhangs),
        [flanks, random_flanks],
    )
    seq += random_bases(min_length - len(seq))
    seqs_to_order[name] = seq

In [None]:
prefix = "oLT"
id_num = 52
for seq_name, seq in seqs_to_order.items():
    for sense in (True,):
        if sense:
            oligo_seq = seq
        else:
            oligo_seq = sequence.reverse_complement(seq)
        id_ = f"{prefix}{id_num}"
        # name = f"{seq_name}_{'sense' if sense else 'antisense'}"
        print(f"{id_}\t{seq_name}\t{_format_seq(oligo_seq)}")
        # print(f"{id_}\t{_format_seq(oligo_seq)}")
        # print(f"{name}\t{len(oligo_seq)}")
        id_num += 1