In [None]:
import pandas as pd
import re
import toml
import pygsheets
import requests
import Bio.Restriction as Restriction
from Bio.Seq import Seq
from Bio.SeqRecord import SeqRecord

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import paulssonlab.api as api
import paulssonlab.cloning.registry as registry
import paulssonlab.cloning.workflow as workflow
import paulssonlab.cloning.sequence as sequence
import paulssonlab.cloning.enzyme as enzyme
import paulssonlab.api.geneious as geneious

# Setup

In [None]:
config = toml.load("config.toml")

In [None]:
gc = pygsheets.authorize(service_account_file="credentials.json")

In [None]:
reg = registry.Registry(gc, config["registry"]["folder"])

# Config

In [None]:
olib_oligos = reg[("oLIB", "oligos")]
olt_oligos = reg[("oLT", "oligos")]
plib_plasmids = reg[("pLIB", "plasmids")]
plib_maps = reg[("pLIB", "maps")]
flib_fragments = reg[("fLIB", "fragments")]
part_types = reg[("fLIB", "fragments", "Part types")]

# Ingest missing parts

## Double Terminators

In [None]:
def ingest_part(id_, base_row, storage_vector_id, storage_enzyme):
    name = flib_fragments[id_]["Name"]
    # this correctly prefers the annealed version of Voigt_DT100 (gBlock was bad)
    oligo_row = olt_oligos.find({"Name": f"{name}_sense"})
    if oligo_row is None:
        oligo_row = olt_oligos.find({"Name": name})
    if oligo_row is None:
        raise ValueError(f"cannot find oligo for '{name}'")
    plasmid_id = plib_plasmids.find_id({"Names": name})
    if plasmid_id is None:
        raise ValueError(f"cannot find plasmid for '{name}'")
    usage = f"{plasmid_id}/{part_enzyme}"
    if oligo_row["Name"].endswith("_sense"):
        storage_usage = "{}={}".format(
            oligo_row["ID"],
            olt_oligos.find_id({"Name": f"{name}_antisense"}),
        )
    else:
        storage_usage = oligo_row["ID"]
    usage += f",{storage_usage}/{part_enzyme}"
    seq = workflow.re_digest_part(oligo_row["Sequence"], part_enzyme)
    flib_fragments[id_] = {
        **base_row,
        "Name": name,
        "Usage": usage,
        "Sequence": seq.seq_lower(),
    }
    command = f"@GG({storage_usage}/{storage_enzyme}, {storage_vector_id}/{storage_enzyme})"
    plib_plasmids.upsert({"ID": plasmid_id, "Description": base_row["Description"],  "Command": command})
    plasmid_seq = reg.eval_command(command)["_seq"]
    plib_maps[plasmid_id] = plasmid_seq

In [None]:
double_terminators = [
    id_ for id_, row in flib_fragments.items() if row["Name"].startswith("Voigt_DT")
]

In [None]:
part_type = "Terminator_DE"
overhangs = workflow.overhangs_for(part_types[part_type])
part_enzyme = Restriction.BsaI
storage_enzyme = Restriction.BsmBI
storage_vector_id = "pLIB112"

base_row = {
    "Upstream overhang": overhangs[0],
    "Downstream overhang": overhangs[1],
    "Type": part_type,
    "Author": "Jacob Quinn Shenker",
    "Date": workflow.date(),
    "Species/codon usage": "E. coli",
    "Description": "Voigt strong double terminator",
    "Reference": "Park, Y., Espah Borujeni, A., Gorochowski, T.E., Shin, J. and Voigt, C.A., 2020. P recision design of stable genetic circuits carried in highly‐insulated E. coli genomic landing pads. Molecular systems biology, 16(8), p.e9584.",
}

In [None]:
for id_ in double_terminators:
    ingest_part(id_, base_row, storage_vector_id, storage_enzyme)

## RiboJs

In [None]:
part_type = "5UTR_1"
overhangs = workflow.overhangs_for(part_types[part_type])
part_enzyme = Restriction.BsaI
storage_enzyme = Restriction.BsmBI
storage_vector_id = "pLIB112"

base_row = {
    "Upstream overhang": overhangs[0],
    "Downstream overhang": overhangs[1],
    "Type": part_type,
    "Author": "Jacob Quinn Shenker",
    "Date": workflow.date(),
    "Species/codon usage": "E. coli",
    "Description": "Voigt ribozyme insulator",
    "Reference": "Nielsen, A.A., Der, B.S., Shin, J., Vaidyanathan, P., Paralanov, V., Strychalski, E.A., Ross, D., Densmore, D. and Voigt, C.A., 2016. Genetic circuit design automation. Science, 352(6281), p.aac7341.",
}

In [None]:
for id_ in [f"fLIB{n}" for n in range(325, 341)]:
    ingest_part(id_, base_row, storage_vector_id, storage_enzyme)

In [None]:
flib_fragments.commit()

In [None]:
plib_plasmids.commit()

In [None]:
plib_maps.commit()

# Test

In [None]:
len(reg.get("fLIB255")["_seq"])

In [None]:
s = reg.get("mKate2_nocut")
s

In [None]:
len(s["_seq"])

In [None]:
s["_seq"]

In [None]:
s = reg.get("pLIB112")["_seq"]

In [None]:
reg.eval_expr("pLIB112/AarI/BbsI")

# NAO strains

In [None]:
nao_sheets = gc.open_by_key("1nuEeeEwU4Rj4HFjwCWKi6P9sapH2-mPDf4XSDQBiptk")
tus_sheet = nao_sheets.worksheet(value=0)
strains_sheet = nao_sheets.worksheet(value=2)
tus = tus_sheet.get_as_df()
strains = strains_sheet.get_as_df()

In [None]:
flib_fragments_df = reg[("fLIB", "fragments")].remote
plib_plasmids_df = reg[("pLIB", "plasmids")].remote
lib_strains_df = reg[("LIB", "strains")].remote

In [None]:
plasmid_wells = lib_strains_df[lib_strains_df["Names"].str.len() != 0]
plasmid_wells = plasmid_wells[
    plasmid_wells["Reference"]
    == "Halleran, A. D., Swaminathan, A., & Murray, R. M. (2018). Single day construction of multigene circuits with 3G assembly. ACS synthetic biology, 7(5), 1477-1480."
]
plasmid_wells = plasmid_wells[["Plasmids", "Other Notes"]]
plasmid_wells["Well"] = plasmid_wells["Other Notes"].map(
    lambda s: next(iter(re.findall(r"Well: ([A-Z0-9]+)", s)), None)
)
plasmid_wells = plasmid_wells[~plasmid_wells["Well"].isnull()]

In [None]:
well_to_plasmid = dict(plasmid_wells[["Plasmids", "Well"]].values[:, ::-1])

In [None]:
name_corrections = ["PsigW", "sigW", "rsiW"]
name_corrections = {k.lower(): k for k in name_corrections}

In [None]:
def _get_part_name(reg, parts_sheet, well_to_plasmid, name_corrections, name):
    # LIB number: LIB262
    if name.startswith("LIB"):
        strain = reg.get(name)
        plasmids = re.split(r"\s*,\s*", strain["Plasmids"])
        if len(plasmids) != 1:
            raise ValueError(
                f"expecting a single plasmid for strain {name}: {plasmids}"
            )
        plasmid = plasmids[0]
        return get_part_name(
            reg, parts_sheet, well_to_plasmid, name_corrections, plasmid
        )
    elif name.startswith("pLIB"):
        part_name = (
            parts_sheet[parts_sheet["Usage"].str.contains(f"{name}/")].iloc[0].name
        )
        return part_name
    # pLIB number: 199
    elif name.isdigit():  # isdigit isn't robust, but fine for this
        return get_part_name(
            reg, parts_sheet, well_to_plasmid, name_corrections, f"pLIB{name}"
        )
    # well number: A11
    elif re.match(r"(1|2)?[A-H][0-9]{1,2}b?", name):
        well = name
        if well[-1] == "b":
            well = f"2{well[:-1]}"
        if well[0] not in ("1", "2"):
            well = f"1{well}"
        plasmid = well_to_plasmid[well]
        return get_part_name(
            reg, parts_sheet, well_to_plasmid, name_corrections, plasmid
        )
    # part capitalization: Psigw -> PsigW, SigW -> sigW
    elif name.lower() in name_corrections:
        return name_corrections[name.lower()]
    # part
    return name


def get_part_name(reg, parts_sheet, well_to_plasmid, name_corrections, name):
    names = [re.sub(r"\s*\([^)]*\)\s*", "", n) for n in re.split(r"\s*\+\s*", name)]
    return names


test_names = [
    "199",
    "DT3",
    "Psigw",
    "A4b",
    "LIB254",
    "ScmJ+fLIB238",
    "mVenus",
    "SCFP3A",
    "fLIB311",
    "358 (pTet)",
    "331 (LacI) +199+SCFP3A",
    "ScmJ+fLIB314",
    "323 (PhlF)+199+mVenus",
    "RiboJ+oLT81",
    "RiboJ+PH_BsaI",
    "sigw+Deg_PH",
    "RsiW+Deg_PH",
    "Barcode PH",
    "Long Barcode PH",
]
for name in test_names:
    print(
        name,
        "->",
        get_part_name(reg, flib_fragments_df, well_to_plasmid, name_corrections, name),
    )

In [None]:
nao_names = pd.melt(tus.iloc[:, 4:8])["value"].value_counts()
with pd.option_context("display.max_rows", None):
    print(nao_names)

In [None]:
name_map = {}
for name in nao_names.index:
    name = str(name)
    new_name = get_part_name(
        reg, flib_fragments, well_to_plasmid, name_corrections, name
    )
    name_map[name] = new_name

In [None]:
tus[
    "Description of TU function (e.g. expression strength, expressed protein, circuit it is intended for)"
]

In [None]:
tu_to_command = {}
for _, row in tus.iterrows():
    uns_a = str(row["UNS_A"]).replace("*", "_r")
    uns_e = str(row["UNS_E"]).replace("*", "_r")
    promoter = name_map[str(row["Promoter (pLIB number or 3G part ID) "])]
    rbs = name_map[str(row["Ribosome binding site (pLIB number or 3G part ID) "])]
    cds = name_map[str(row["Coding region (pLIB number or 3G part ID) "])]
    term = name_map[str(row["Terminator (pLIB number or 3G part ID) "])]
    command = f"@GG(UNS{uns_a}_A, {promoter}, {rbs}, {cds}, {term}, UNS{uns_e}_E)"
    tu_to_command[int(row["tLIB number"].replace("tLIB", ""))] = command

In [None]:
tu_to_command[193]

In [None]:
strains = strains.loc[:, "TUs":]
strains = strains[strains["TUs"].str.len() != 0]

In [None]:
strains

In [None]:
strains["Plasmids"].unique()

In [None]:
backbone_pcrs = {
    "": "V37m_UNS1_UNS10",
    "p15A": "JUMP_p15a_UNS1_UNS10",
    "pSC101": "JUMP_pSC101_UNS1_UNS10",
}

In [None]:
strains["command"] = strains.apply(
    lambda row: "@Gib({}, {})".format(
        ", ".join([tu_to_command[int(t)] for t in str(row["TUs"]).split(",")]),
        backbone_pcrs[row["Plasmids"]],
    ),
    axis=1,
)

In [None]:
strains["command"].tail().values

In [None]:
strains.loc[152:158]["command"].values

In [None]:
# L3S1P13, L3S3P11, L3S2P55, L3S2P11

In [None]:
def eval_command(cmd, registry, context=None):
    ctx = commands.CommandContext(registry, context)
    return commands.command_parser.parse(
        cmd, semantics=commands.CommandSemantics(commands._commands, ctx)
    )

In [None]:
res = eval_command(strains.loc[152]["command"], reg, context="pLIB")

In [None]:
res

# GG test

In [None]:
import paulssonlab.cloning.design as design

In [None]:
design.golden_gate_placeholder?

In [None]:
def _make_frag(overhang1, overhang2):
    return enzyme.re_digest(design.golden_gate_placeholder(Restriction.BsaI, Restriction.BbsI, overhang1, overhang2), Restriction.BbsI)[1]

In [None]:
print(_make_frag("aaaa", "gggg"))