In [None]:
import pandas as pd
import holoviews as hv
import hvplot.pandas
import matplotlib.pyplot as plt
import seaborn as sns
import toml
import re
import urllib
from datetime import datetime
import string
import pygsheets
from tqdm.auto import tqdm
import Bio.Restriction as Restriction
from Bio.Seq import Seq
import benchlingapi

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import paulssonlab.api as api
from paulssonlab.api.util import base_url
import paulssonlab.cloning.workflow as workflow
import paulssonlab.cloning.util as cloning_util
import paulssonlab.cloning.sequence as sequence
import paulssonlab.cloning.golden_gate as golden_gate
import paulssonlab.cloning.registry as registry

In [None]:
hv.extension("bokeh")

# Setup

In [None]:
config = toml.load("config.toml")

In [None]:
gc = pygsheets.authorize(service_account_file="credentials.json")

In [None]:
# session = benchlingapi.Session(config["benchling"]["api_key"])

In [None]:
# strain_sheet = gc.open_by_key(col["strains"]).worksheet()
# plasmid_sheet = gc.open_by_key(col["plasmids"]).worksheet()
# part_sheet = gc.open_by_key(col["parts"]).worksheet()
# part_sequences_sheet = gc.open_by_key(col["parts"]).worksheet_by_title("Sequences")
# oligo_sheet = gc.open_by_key(col["oligos"]).worksheet()

In [None]:
# drive_service = plasmid_sheet.client.drive.service
# plasmid_folder = col["plasmid_maps"]
# plasmid_maps = api.google.list_drive(drive_service, root=plasmid_folder)

# Registry

In [None]:
# temp: get parts, plasmid seqs
# check that GG works
# gibson
# command: primer design for plasmid -> part storage vector (oligodest=oLIT)
# PCR with flanks
# primer design for fusion parts
# commands: GG, Gib (need to specify recipient strain!)
# get parts, plasmid seqs

In [None]:
reg = registry.Registry(gc, config["registry"]["folder"])

In [None]:
reg.registry

In [None]:
%%timeit
"J23101" in df2

In [None]:
reg.get("LIB93")

In [None]:
%%time
reg.duplicate_collection("LIB", "TESTB", clear=False)

# GG

In [None]:
# gibson.assemble -> hhh

In [None]:
seq1 = get_plib_seq(drive_service, 1)
seq2 = get_plib_seq(drive_service, 82)
seq3 = get_plib_seq(drive_service, 23)
seq4 = get_plib_seq(drive_service, 95)
seq5 = get_plib_seq(drive_service, 110)

In [None]:
to_join = [
    (sequence.reverse_complement(seq1), Restriction.BsaI, "Name1", "promoter"),
    (sequence.reverse_complement(seq2), Restriction.BsaI, "Name2", "RBS"),
    (seq3, Restriction.BsaI, "Name3", "CDS"),
    (seq4, Restriction.BsaI, "Name4", "terminator"),
    (sequence.reverse_complement(seq5), Restriction.BsaI, "Name5", "misc_feature"),
]

assembly = golden_gate.assemble(to_join, linear=False)
assembly

In [None]:
with open("/Users/jacob/Downloads/test3.gb", "w") as f:
    f.write(assembly.format("gb"))

# 3G

# Command parsing

In [None]:
import tatsu

In [None]:
grammar_preamble = """@@grammar::CLONING
@@whitespace :: //"""

reference_grammar = """reference
    =
    | pcr
    | restriction_digest
#    | assembly
    | name
    ;

name = name:/\w+/ ;

pcr = template:reference '~' ~ primer1:name ',' primer2:name ;

restriction_digest = input:reference '/' ~ enzyme:name ;

#assembly = assembly+:name {'-' ~ assembly+:name}+ ;
"""

grammar = f"""start = command $ ;

argument
    =
    | quoted_string
    | command
    | float
    | int
    | lookup
    | reference
    ;

ws = /\s*/ ;

command_name = '@' ~ @:/\w+/ ;

command_arglist = '(' ~ ws @+:argument ws {{',' ws @+:argument ws }}* ')' ;

command = command_name:command_name arguments:command_arglist ;

quoted_string = '"' ~ quoted_string:/[^"]*/ '"' ;

float = float:/\d+\.\d+/ ;

int = int:/\d+/ ;

lookup = '$' ~ name ;

{reference_grammar}
"""

In [None]:
parser = tatsu.compile(grammar)
command = "@Gib(@GG(UNS1, J23101, BCD11, UNS5), pLIB47~oLIB22,oLIB24/BsaI)"
ast = parser.parse(command)
ast

In [None]:
# command = (
#     "@3G(UNS1-J23101-BCD11-mVenus-L3S3P11-UNS5, UNS5-J23150-CFP-BCD16-L3S2P55-UNS10)"
# )
# command = "@Gib(@GG(UNS1, J23101, BCD11, UNS5), pLIB47~oLIB22,oLIB24/BsaI)"
# command = "@Gib(@GG(UNS1, J23101, BCD11, UNS5), @PCR(pLIB47, oLIB22, oLIB24)/BsaI)"
# command = (
#     "@Gib(@GG(UNS1, J23101, BCD11, UNS5), @RE(@PCR(pLIB47, oLIB22, oLIB24), BsaI))"
# )

In [None]:
part_sheet.sync?

In [None]:
def get_named_sequence(name, part_sheet, plasmid_maps):
    # try plasmid, strain, part
    pass


def goldengate(*args):
    return "gg", args


def threeg(*args):
    return "3g", args


commands = {"GG": goldengate, "3G": threeg}

get_named_sequence("pLIB27")

In [None]:
reference_parser = tatsu.compile(grammar_preamble + reference_grammar)


class CloningCommandSemantics(object):
    def __init__(self, commands):
        self.commands = commands

    def command(self, ast):
        if ast.command_name not in self.commands:
            raise tatsu.semantics.SemanticError(
                "command must be one of: {}".format(
                    ", ".join([f"@{k}" for k in commands.keys()])
                )
            )
        command = self.commands[ast.command_name]
        return command(ast.arguments)

    def int_(self, ast):
        return int(s)

    def float_(self, ast):
        return float(s)

    def name(self, ast):
        return ast.name

    def assembly(self, ast):
        return ast.assembly


# parser.parse(command, semantics=CloningCommandSemantics())

In [None]:
# GG
command = "@GG(J23101, BCD11, mVenus, L3S3P11, p121/BsaI)"
# 3G
# command = "@3G(@GG(UNS1, J23101, BCD11, mVenus, L3S3P11, UNS5), @GG(UNS5, J23150, CFP, BCD16, L3S2P55, UNS10), JUMP_p15a_UNS1_UNS10)"

In [None]:
parser.parse(command, semantics=CloningCommandSemantics(commands))