In [None]:
import pandas as pd
import holoviews as hv
import hvplot.pandas
import matplotlib.pyplot as plt
import seaborn as sns
import toml
import re
import urllib
from datetime import datetime
import string
import pygsheets
from tqdm.auto import tqdm
import Bio.Restriction as Restriction
from Bio.Seq import Seq
import benchlingapi

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import paulssonlab.api as api
from paulssonlab.api.util import base_url
import paulssonlab.cloning.workflow as workflow
import paulssonlab.cloning.util as cloning_util
import paulssonlab.cloning.sequence as sequence
import paulssonlab.cloning.golden_gate as golden_gate

In [None]:
hv.extension("bokeh")

# Setup

In [None]:
config = toml.load("config.toml")

In [None]:
session = benchlingapi.Session(config["benchling"]["api_key"])

In [None]:
gc = pygsheets.authorize(service_account_file="credentials.json")

In [None]:
col = workflow.get_strain_collection_sheets(gc.drive.service, "LIB")
col

In [None]:
strain_sheet = gc.open_by_key(col["strains"]).worksheet()
plasmid_sheet = gc.open_by_key(col["plasmids"]).worksheet()
part_sheet = gc.open_by_key(col["parts"]).worksheet()
part_sequences_sheet = gc.open_by_key(col["parts"]).worksheet_by_title("Sequences")
oligo_sheet = gc.open_by_key(col["oligos"]).worksheet()

In [None]:
drive_service = plasmid_sheet.client.drive.service
plasmid_folder = col["plasmid_maps"]
plasmid_maps = api.google.list_drive(drive_service, root=plasmid_folder)

# Registry

In [None]:
%%timeit
df = part_sheet.get_as_df().set_index("Name*")
df.loc["YFP_CD"]

In [None]:
%%timeit
names = part_sheet.get_col(1)[1:]
idx = names.index("YFP_CD")
part_sheet.get_row(idx + 2)

In [None]:
# oligo0_sheet = gc.open_by_key(col["oligos"]).worksheet_by_title("Special (oLIB0.x)")

In [None]:
# if clear=False, RENAME ^oLIB -> oTESTA in first column
# temp: get parts, plasmid seqs
# check that GG works
# gibson
# command: primer design for plasmid -> part storage vector (oligodest=oLIT)
# PCR with flanks
# primer design for fusion parts
# commands: GG, Gib (need to specify recipient strain!)
# get parts, plasmid seqs

In [None]:
from paulssonlab.api.google import (
    get_drive_by_path,
    ensure_drive_folder,
    make_drive_folder,
    copy_drive_file,
    list_drive,
    clear_sheet,
    FOLDER_MIMETYPE,
)
from paulssonlab.cloning.workflow import rename_ids

FOLDER_TYPES = ["plasmid_maps", "sequencing"]
COLLECTION_REGEX = r"^(.*)_(strains|plasmids|oligos|parts|sequencing|plasmid_maps)$"
ENABLE_AUTOMATION_FILENAME = "ENABLE_AUTOMATION.txt"


class Registry(object):
    def __init__(self, sheets_client, registry_folder):
        self.sheets_client = sheets_client
        self.registry_folder = registry_folder
        self.refresh()

    def refresh(self):
        collection_folders = list_drive(
            self.sheets_client.drive.service, root=self.registry_folder, folder=True
        )
        registry = {}
        for collection_folder in collection_folders.values():
            new_registry = self.get_collection(collection_folder["id"])
            duplicate_keys = registry.keys() & new_registry.keys()
            if len(duplicate_keys):
                raise ValueError(f"found duplicate prefixes: {list(duplicate_keys)}")
            registry = {**registry, **new_registry}
        self.registry = registry

    def get_collection(self, collection_folder):
        files = list_drive(self.sheets_client.drive.service, root=collection_folder)
        if ENABLE_AUTOMATION_FILENAME not in files:
            return {}
        registry = {}
        for file in files.values():
            name = file["name"]
            match = re.match(COLLECTION_REGEX, name)
            if match:
                prefix = match.group(1)
                type_ = match.group(2)
                ensure_drive_folder(file, type_ in FOLDER_TYPES)
                key = (prefix, type_)
                if key in registry:
                    raise ValueError(f"found duplicate prefix: {key}")
                registry[key] = file["id"]
        return registry

    def duplicate_collection(
        self,
        source_prefix,
        dest_prefix,
        source_folder_name=None,
        dest_folder_name=None,
        clear=True,
    ):
        # TODO: handle parts spreadsheet clearing (keep formulae)
        if source_folder_name is None:
            source_folder_name = f"{source_prefix}_collection"
        if dest_folder_name is None:
            dest_folder_name = f"{dest_prefix}_collection"
        drive_service = self.sheets_client.drive.service
        collections = list_drive(drive_service, root=self.registry_folder)
        if dest_folder_name in collections:
            raise ValueError(f"collection '{dest_folder_name}' already exists")
        if source_folder_name not in collections:
            raise ValueError(f"collection '{source_folder_name}' not found")
        source_folder = collections[source_folder_name].get("id")
        source_files = list_drive(drive_service, root=source_folder, folder=True)
        dest_folder = make_drive_folder(
            drive_service, dest_folder_name, self.registry_folder
        )
        for source_file in source_files.values():
            if source_file["mimeType"] == FOLDER_MIMETYPE:
                continue
            dest_file_name = None
            if source_file["name"] == ENABLE_AUTOMATION_FILENAME:
                dest_file_name = ENABLE_AUTOMATION_FILENAME
            else:
                match = re.match(COLLECTION_REGEX, source_file["name"])
                if match:
                    dest_type_prefix = re.sub(
                        f"{re.escape(source_prefix)}$", dest_prefix, match.group(1)
                    )
                    dest_file_name = f"{dest_type_prefix}_{match.group(2)}"
            if dest_file_name is not None:
                dest_body = {"name": dest_file_name, "parents": [dest_folder]}
                dest_file = (
                    drive_service.files()
                    .copy(fileId=source_file["id"], body=dest_body)
                    .execute()
                )
                if match:
                    name_mapper = 0  # pass
                    if match.group(2) in ("plasmids", "strains"):
                        source_seq_folder_name = f"{source_prefix}_sequencing"
                        dest_seq_folder_name = f"{dest_type_prefix}_sequencing"
                        dest_seq_folder = make_drive_folder(
                            drive_service, dest_seq_folder_name, dest_folder
                        )
                        if source_seq_folder_name in source_files:
                            recursive_copy(
                                drive_service,
                                source_files[source_seq_folder_name],
                                dest_seq_folder,
                                transform_names=name_mapper,
                            )
                    if match.group(2) == "plasmids":
                        source_map_folder_name = f"{source_prefix}_maps"
                        dest_map_folder_name = f"{dest_type_prefix}_maps"
                        dest_map_folder = make_drive_folder(
                            drive_service, dest_map_folder_name, dest_folder
                        )
                        if source_map_folder_name in source_files:
                            recursive_copy(
                                drive_service,
                                source_files[source_map_folder_name],
                                dest_map_folder,
                                transform_names=name_mapper,
                            )
                    if clear:
                        # get first worksheet
                        dest_sheet = self.sheets_client.open_by_key(
                            dest_file["id"]
                        ).worksheet()
                        clear_sheet(dest_sheet)
                    elif match.group(2) not in ("parts",):
                        # get first worksheet
                        dest_sheet = self.sheets_client.open_by_key(
                            dest_file["id"]
                        ).worksheet()
                        rename_ids(dest_sheet, source_prefix, dest_type_prefix)
        return dest_folder

    def get_loc(self, name):
        pass
        # pull number off, try to match to prefix (plasmid, strain, oligo)
        # get spreadsheet file, worksheet index
        # return worksheet, row

    def get_sequence(self, name):
        # name = pLIB99, oLIB99, Part_Name
        # try plasmid, strain, part
        # return
        # ("plasmid", SeqRecord)
        # ("part", ("5prime", SeqRecord("AAA"), "3prime"))
        pass

In [None]:
def recursive_copy(service, source_folder, dest_folder, folders_only=False):
    pass

In [None]:
registry = Registry(gc, config["registry"]["folder"])

In [None]:
registry.duplicate_collection("LIB", "TESTA", clear=False)

In [None]:
w = gc.open_by_key("1ZQubxSLcMyaIbcAbk286KwCIpmrKjMh6-_ApWxjSYYI").worksheet()

In [None]:
workflow.rename_ids(w, "pLIB", "pTESTA")

In [None]:
api.google.clear_sheet(w)

In [None]:
registry.refresh()

In [None]:
registry.registry

# GG

In [None]:
# gibson.assemble -> hhh

In [None]:
seq1 = get_plib_seq(drive_service, 1)
seq2 = get_plib_seq(drive_service, 82)
seq3 = get_plib_seq(drive_service, 23)
seq4 = get_plib_seq(drive_service, 95)
seq5 = get_plib_seq(drive_service, 110)

In [None]:
to_join = [
    (sequence.reverse_complement(seq1), Restriction.BsaI, "Name1", "promoter"),
    (sequence.reverse_complement(seq2), Restriction.BsaI, "Name2", "RBS"),
    (seq3, Restriction.BsaI, "Name3", "CDS"),
    (seq4, Restriction.BsaI, "Name4", "terminator"),
    (sequence.reverse_complement(seq5), Restriction.BsaI, "Name5", "misc_feature"),
]

assembly = golden_gate.assemble(to_join, linear=False)
assembly

In [None]:
with open("/Users/jacob/Downloads/test3.gb", "w") as f:
    f.write(assembly.format("gb"))

# 3G

# Command parsing

In [None]:
import tatsu

In [None]:
grammar_preamble = """@@grammar::CLONING
@@whitespace :: //"""

reference_grammar = """reference
    =
    | pcr
    | restriction_digest
#    | assembly
    | name
    ;

name = name:/\w+/ ;

pcr = template:reference '~' ~ primer1:name ',' primer2:name ;

restriction_digest = input:reference '/' ~ enzyme:name ;

#assembly = assembly+:name {'-' ~ assembly+:name}+ ;
"""

grammar = f"""start = command $ ;

argument
    =
    | quoted_string
    | command
    | float
    | int
    | lookup
    | reference
    ;

ws = /\s*/ ;

command_name = '@' ~ @:/\w+/ ;

command_arglist = '(' ~ ws @+:argument ws {{',' ws @+:argument ws }}* ')' ;

command = command_name:command_name arguments:command_arglist ;

quoted_string = '"' ~ quoted_string:/[^"]*/ '"' ;

float = float:/\d+\.\d+/ ;

int = int:/\d+/ ;

lookup = '$' ~ name ;

{reference_grammar}
"""

In [None]:
parser = tatsu.compile(grammar)
command = "@Gib(@GG(UNS1, J23101, BCD11, UNS5), pLIB47~oLIB22,oLIB24/BsaI)"
ast = parser.parse(command)
ast

In [None]:
# command = (
#     "@3G(UNS1-J23101-BCD11-mVenus-L3S3P11-UNS5, UNS5-J23150-CFP-BCD16-L3S2P55-UNS10)"
# )
# command = "@Gib(@GG(UNS1, J23101, BCD11, UNS5), pLIB47~oLIB22,oLIB24/BsaI)"
# command = "@Gib(@GG(UNS1, J23101, BCD11, UNS5), @PCR(pLIB47, oLIB22, oLIB24)/BsaI)"
# command = (
#     "@Gib(@GG(UNS1, J23101, BCD11, UNS5), @RE(@PCR(pLIB47, oLIB22, oLIB24), BsaI))"
# )

In [None]:
part_sheet.sync?

In [None]:
def get_named_sequence(name, part_sheet, plasmid_maps):
    # try plasmid, strain, part
    pass


def goldengate(*args):
    return "gg", args


def threeg(*args):
    return "3g", args


commands = {"GG": goldengate, "3G": threeg}

get_named_sequence("pLIB27")

In [None]:
reference_parser = tatsu.compile(grammar_preamble + reference_grammar)


class CloningCommandSemantics(object):
    def __init__(self, commands):
        self.commands = commands

    def command(self, ast):
        if ast.command_name not in self.commands:
            raise tatsu.semantics.SemanticError(
                "command must be one of: {}".format(
                    ", ".join([f"@{k}" for k in commands.keys()])
                )
            )
        command = self.commands[ast.command_name]
        return command(ast.arguments)

    def int_(self, ast):
        return int(s)

    def float_(self, ast):
        return float(s)

    def name(self, ast):
        return ast.name

    def assembly(self, ast):
        return ast.assembly


# parser.parse(command, semantics=CloningCommandSemantics())

In [None]:
# GG
command = "@GG(J23101, BCD11, mVenus, L3S3P11, p121/BsaI)"
# 3G
# command = "@3G(@GG(UNS1, J23101, BCD11, mVenus, L3S3P11, UNS5), @GG(UNS5, J23150, CFP, BCD16, L3S2P55, UNS10), JUMP_p15a_UNS1_UNS10)"

In [None]:
parser.parse(command, semantics=CloningCommandSemantics(commands))