In [None]:
import pandas as pd
import toml
import re
import pygsheets
from tqdm.auto import tqdm
import Bio.Restriction as Restriction
import benchlingapi

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import paulssonlab.api as api
import paulssonlab.api.benchling as bapi
from paulssonlab.api.util import base_url
import paulssonlab.cloning.workflow as workflow
import paulssonlab.cloning.util as cloning_util
import paulssonlab.cloning.io as cloning_io
import paulssonlab.cloning.sequence as sequence
import paulssonlab.cloning.registry as registry
import paulssonlab.cloning.enzyme as enzyme
import paulssonlab.cloning.commands.semantics as cmd_semantics
import paulssonlab.cloning.commands.parser as cmd_parser
import paulssonlab.api.benchling as benchling

# Setup

In [None]:
config = toml.load("config.toml")

In [None]:
gc = pygsheets.authorize(service_account_file="credentials.json")

In [None]:
bench_session = benchlingapi.Session(config["benchling"]["api_key"])
benchling_folder = bapi.get_project_root(bench_session, config["benchling"]["project"])

In [None]:
reg = registry.Registry(gc, config["registry"]["folder"], benchling_folder)

# Rename .gb to .gbk

In [None]:
files = api.google.list_drive(reg.drive_service, root=reg.gdrive_ids[("pLIB", "maps")])

In [None]:
service = build("drive", "v3", credentials=reg.sheets_client.oauth)
batch = service.new_batch_http_request()
for name, file in files.items():
    if name.endswith(".gb"):
        batch.add(
            reg.drive_service.files().update(
                fileId=file["id"], body={"name": f"{name}k"}
            )
        )
batch.execute()

# Plasmid map for Daniel

In [None]:
# test pcr with other primer order

In [None]:
c = reg[("LIB", "parts")]

In [None]:
cmd_semantics.eval_exprs_by_priority("oLT72=oLT73", reg.get)

In [None]:
cmd_semantics.eval_expr("pLIB122", reg.get)

In [None]:
cmd_semantics.eval_expr("JUMP_upstream_T97", reg.get)

In [None]:
plib122 = cmd_semantics.eval_expr("pLIB122", reg.get)["_seq"]
olib179 = cmd_semantics.eval_expr("oLIB179", reg.get)["_seq"]
olib180 = cmd_semantics.eval_expr("oLIB180", reg.get)["_seq"]
olib181 = cmd_semantics.eval_expr("oLIB181", reg.get)["_seq"]
olib182 = cmd_semantics.eval_expr("oLIB182", reg.get)["_seq"]
a = cmd_semantics.eval_expr("pLIB122<oLIB182,oLIB179>", reg.get)["_seq"]
b = cmd_semantics.eval_expr("JUMP_upstream_T97", reg.get)["_seq"]
c = cmd_semantics.eval_expr("pLIB122<oLIB180,oLIB181>", reg.get)["_seq"]
d = cmd_semantics.eval_expr("JUMP_downstream_T540", reg.get)["_seq"]

In [None]:
olib180

In [None]:
olib181

In [None]:
a[:20]

In [None]:
b[:20]

In [None]:
len(a)

In [None]:
len(c)

In [None]:
len(plib122)

In [None]:
cd_nocirc = sequence.assemble(
    [c.annotate("c"), d.annotate("d")], method="gibson", circularize=True
)

In [None]:
cd_nocirc

In [None]:
cd_nocirc._assemble(None, method="gibson")

In [None]:
ab = sequence.assemble(
    [a.annotate("a"), b.annotate("b", type="foo")], method="gibson", circularize=False
)

In [None]:
from Bio.Seq import Seq

In [None]:
x = sequence.DsSeqRecord(Seq("gagcctttcgttttatttgatg")).annotate("a")
y = b.annotate("b", type="foo").slice(10, None, annotation_start=0)
(x + y).features

In [None]:
y.features

In [None]:
ab = sequence.assemble(
    [
        sequence.DsSeqRecord(Seq("gagcctttcgttttatttgatg")).annotate("a"),
        b.annotate("b", type="foo"),
    ],
    method="gibson",
    circularize=False,
)

In [None]:
ab.features

In [None]:
bc = sequence.assemble([b.annotate("b"), c.annotate("c")], method="gibson")

In [None]:
with open("/Users/jacob/Downloads/_ab2.gb", "w") as f:
    f.write(ab.format("genbank"))

In [None]:
sequence.assemble([b, c], method="gibson")

In [None]:
sequence.assemble([c, d], method="gibson")

In [None]:
sequence.assemble([d, a], method="gibson")

In [None]:
s = cmd_semantics.eval_command(
    "@Gib(pLIB122<oLIB182,oLIB179>, JUMP_upstream_T97, pLIB122<oLIB180,oLIB181>, JUMP_downstream_T540)",
    reg.get,
)

In [None]:
s = cmd_semantics.eval_command(
    "@Gib(pLIB122<oLIB182,oLIB179>, JUMP_upstream_2r2, pLIB122<oLIB180,oLIB181>, JUMP_downstream_2r2)",
    reg.get,
)

In [None]:
with open("/Users/jacob/Downloads/insulated_hudson.gb", "w") as f:
    f.write(s["_seq"].format("genbank"))

In [None]:
pcrs = [
    "pLIB122<oLIB182,oLIB179>",
    "pLIB122<oLIB180,oLIB181>",
    "pLIB122<oLIB180,oLIB179>",
    "pLIB122<oLIB182,oLIB181>",
]
seqs = [cmd_semantics.eval_expr(c, reg.get) for c in pcrs]
[len(s["_seq"]) for s in seqs]

In [None]:
reg[("oLT", "oligos")].clear_cache()

In [None]:
s = cmd_semantics.eval_command("@Gib(oLT78, oLT79)", reg.get)

In [None]:
s2 = cmd_semantics.eval_expr("JUMP_upstream_2r2", reg.get)

In [None]:
s["_seq"]

In [None]:
s2["_seq"]

In [None]:
s2["_seq"] == s["_seq"]

# Test case

In [None]:
c = reg[("pTEST", "maps")]

In [None]:
c.set_from_file(
    "pTEST1", "/Users/jacob/Downloads/addgene-plasmid-59954-sequence-95195.dna"
)
c.set_from_file(
    "pTEST2", "/Users/jacob/Downloads/addgene-plasmid-59954-sequence-95195.gbk"
)
c.set_from_file(
    "pTEST3.bad", "/Users/jacob/Downloads/addgene-plasmid-59954-sequence-95195.gbk"
)
c.set_from_file(
    ("foo_dir", "pTEST3.bad"),
    "/Users/jacob/Downloads/addgene-plasmid-59954-sequence-95195.gbk",
)

In [None]:
list(c.local.keys())

In [None]:
seq1 = cloning_io.read_file(
    "/Users/jacob/Downloads/addgene-plasmid-59954-sequence-95195.dna"
)

In [None]:
c["dir1"] = {"a": seq1, "b": seq1}
c[("b_dir", "dir2")] = {"a": seq1, "b": seq1}
c.content[("b_dir", "dir3")] = {"a.gbk": seq1, "b.gbk": seq1}

In [None]:
c.raw[("c_dir", "c.txt")] = {"mimeType": "plain/text", "content": "blah"}
c.bytes[("c_dir", "c.txt")]

In [None]:
c.raw["test3.gb"] = None
c["test4"] = None

In [None]:
c["test4"] = "foobar"

In [None]:
x = ("b_dir", "dir2", "a.gbk")
folders = set()
folders.update(x[:i] for i in range(1, len(x)))

In [None]:
folders

In [None]:
set.update?

In [None]:
set(sorted(set([("b", 1), ("a", 0)])))

In [None]:
list(c.local.keys())

# Test

In [None]:
del reg.clients[("pTEST", "maps")]

In [None]:
# reg.duplicate_collection("LIB", "TEST")

In [None]:
reg.refresh()

In [None]:
c = reg[("pTEST", "maps")]

In [None]:
c.clear_cache()

In [None]:
c._download()

In [None]:
c._remote

In [None]:
c[()]

In [None]:
c.client.files().update(
    fileId="1QTyMeRMLF8F3Hf21Dmc3PDY_3PPhA2U1", body={"trashed": True}
).execute()

In [None]:
api.google.delete_drive(c.client, "1QTyMeRMLF8F3Hf21Dmc3PDY_3PPhA2U1")

In [None]:
c._remote_folders

In [None]:
sorted(c._remote_folders)

In [None]:
c[("bar", "bar1", "plib84")]

In [None]:
c.raw[("bar", "bar1", "plib84.gb")]

In [None]:
c.keys()

In [None]:
c.content["plib84.gb"]

In [None]:
c["test3"] = None

In [None]:
c._remote

In [None]:
c.bytes["test3.gb"] = 

# Benchling test

In [None]:
z = reg.sync_benchling(overwrite=True, return_data=True)

In [None]:
reg.duplicate_collection("LIB", "LT", clear=True)

In [None]:
reg.registry

# PCR simulation test

In [None]:
seq1 = "TTTT"
seq1a = "TTTTA"
seq1b = "TTTTC"
seq1c = "CCCCCCCCCCCTTTTC"
seq2 = "AAAATTTTAAAATTTTAAAA"
seq3 = "AGTGATTTTTTTCTCCATTCTTTGTGTGTTTTTTTTGTTTTATGAATTTTTTTAACTGATACCCGTTTTTTTGGAAGGAGACCCGTTTTTTTGGAAG"
seq4 = "TTTTTTAAAAAAAGGGGGGGGGGGGGGTTTTTTCCCCCCCCCCCCAAAAAAATTTTTTAAAAAAA"
seq4p = "GGGGGGGGGGGGGG"

In [None]:
sequence.find_primer_binding_site(seq4p, seq4)

In [None]:
sequence.slice_seq?

In [None]:
sequence.find_primer_binding_site(seq5, p2)

In [None]:
p1 = "TTTTTAGAAGA"
p2 = "TTTTTTAGAGGG"
seq5 = "AGAAGACCCCGGGGCCCTCT"
sequence.pcr(seq5, p1, p2)

In [None]:
p1 = "ttttttatggacaaag"
p2 = "ttttttcgatacgggg"
seq6 = "atggacaaagactttgatttcgataatattggcaaacgcaccccgtatcg"
sequence.pcr(seq6, p1, p2)

In [None]:
sequence.find_primer_binding_site(seq6, p2, try_reverse_complement=True)

In [None]:
sequence.find_primer_binding_site(
    seq6, sequence.reverse_complement(p2), try_reverse_complement=False
)

In [None]:
sequence.reverse_complement(seq5)

# Circularization test

In [None]:
a = sequence.DsSeqRecord(
    "abcdxxxx" + "a" * 20 + "g" * 20 + "zabcdxxxx",
    upstream_overhang=-7,
    downstream_overhang=14,
)
a

In [None]:
sequence._assemble_gibson(a, a, max_overlap=40)

In [None]:
sequence._assemble_gibson(a, None)

In [None]:
a.assemble(None, method="gibson")

In [None]:
b = sequence.DsSeqRecord(
    "abcdxxxx" + "a" * 20 + "g" * 20 + "zabcd",
    upstream_overhang=-4,
    downstream_overhang=4,
)
b

In [None]:
print(_)

In [None]:
b.assemble(None, method="goldengate")

In [None]:
print(_)

# Command parsing

In [None]:
commands.expr_parser.parse("oLIT1=oLIT2<oLIB1,oLIB2>/BsaI")

In [None]:
commands.expr_parser.parse(
    "oLIT1=oLIT2<oLIB1,oLIB2>/BsaI,oLIT1=oLIT2<oLIB1,oLIB2>/BsaI"
)

In [None]:
commands.command_parser.parse(
    "@3(foo, bar, baz, oLIT1=oLIT2<oLIB1,oLIB2>/BsaI, oLIT1=oLIT2<oLIB1,oLIB2>/BsaI)"
)

In [None]:
command = "@Gib(@GG(UNS1, J23101, BCD11, UNS5), pLIB47<oLIB22,oLIB24>/BsaI)"
ast = commands.command_parser.parse(command)
ast

In [None]:
# command = (
#     "@3G(UNS1-J23101-BCD11-mVenus-L3S3P11-UNS5, UNS5-J23150-CFP-BCD16-L3S2P55-UNS10)"
# )
# command = "@Gib(@GG(UNS1, J23101, BCD11, UNS5), pLIB47~oLIB22,oLIB24/BsaI)"
# command = "@Gib(@GG(UNS1, J23101, BCD11, UNS5), @PCR(pLIB47, oLIB22, oLIB24)/BsaI)"
# command = (
#     "@Gib(@GG(UNS1, J23101, BCD11, UNS5), @RE(@PCR(pLIB47, oLIB22, oLIB24), BsaI))"
# )

In [None]:
# GG
# command = "@GG(J23101, BCD11, mVenus, L3S3P11, p121/BsaI)"
# 3G
# command = "@3G(@GG(UNS1, J23101, BCD11, mVenus, L3S3P11, UNS5), @GG(UNS5, J23150, CFP, BCD16, L3S2P55, UNS10), JUMP_p15a_UNS1_UNS10)"
# command = """@Gib(@GG:tLT(UNS1_A, pTac, B0032m_BC, sigW, L3S1P13, UNS3_E),
#                   @GG:tLT(UNS3_A, J23107_AB, B0034m_BC, mScarlet-I, L3S3P11, UNS4_E),
#                   @GG:tLT(UNS4_A, PsigW, B0032m_BC, sfGFP, L3S2P55, UNS5_E),
#                   @GG:tLT(UNS5_A, pTet_AB, B0034m_BC, rsiW, L3S2P11, UNS6_E),
#                   @GG:tLT(UNS6_A, PsigW, BCD16, sigW, L3S2P21, UNS10_E))"""
# TODO: pLIB213-pLIB218 maps
# TODO: BACKBONE!!!
# command = """@Gib(@GG:tLT3(UNS1_A, pTac, B0032m_BC, sfCFP, L3S1P13, UNS3_E),
#                   @GG:tLT(UNS3_A, J23107_AB, B0034m_BC, mScarlet-I, L3S3P11, UNS4_E))"""
# command = "@GG:tLT(UNS1_A, pTac, B0032m_BC, sfCFP, L3S1P13, UNS3_E)"
command = """@Gib(@GG:tLT(UNS1_A, pTac, B0032m_BC, sfCFP, L3S1P13, UNS3_E),
                  @GG:tLT(UNS3_A, J23107_AB, B0034m_BC, mScarlet-I, L3S3P11, UNS4_E),
                  @GG:tLT(UNS4_A, pTac, B0032m_BC, sfGFP, L3S2P55, UNS5_E),
                  @GG:tLT(UNS5_A, pTet_AB, B0034m_BC, sfCFP, L3S2P11, UNS6_E),
                  @GG:tLT(UNS6_A, pTac, BCD16, sfCFP, L3S2P21, UNS10_E))"""

In [None]:
commands.command_parser.parse(command)

In [None]:
# TODO: better error message if next id fails
# ignore columns with formulae when _get_next_empty_row

In [None]:
workflow.parse_id(" tLIB  ")

In [None]:
workflow._get_next_empty_row(reg.get_sheet(("tLT", "tus")))

In [None]:
workflow.get_next_collection_id(reg.get_sheet(("tLT", "tus")))