In [None]:
import pandas as pd
import toml
import re
import pygsheets
from tqdm.auto import tqdm
import Bio.Restriction as Restriction
import benchlingapi

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import paulssonlab.api as api
from paulssonlab.api.util import base_url
import paulssonlab.cloning.workflow as workflow
import paulssonlab.cloning.util as cloning_util
import paulssonlab.cloning.sequence as sequence
import paulssonlab.cloning.registry as registry
import paulssonlab.cloning.enzyme as enzyme

# Setup

In [None]:
config = toml.load("config.toml")

In [None]:
gc = pygsheets.authorize(service_account_file="credentials.json")

In [None]:
reg = registry.Registry(gc, config["registry"]["folder"])

# NAO strain sheet parsing

In [None]:
nao_sheets = gc.open_by_key("1nuEeeEwU4Rj4HFjwCWKi6P9sapH2-mPDf4XSDQBiptk")
tus_sheet = nao_sheets.worksheet(value=0)
strains_sheet = nao_sheets.worksheet(value=2)
tus = tus_sheet.get_as_df()
strains = strains_sheet.get_as_df()

In [None]:
tus_sheet.get_as_df?

In [None]:
lib_parts = reg.get_df_by_id(reg.registry[("LIB", "parts")])
# plib_plasmids = reg.get_df_by_id(reg.registry[("pLIB","plasmids")])
lib_strains = reg.get_df_by_id(reg.registry[("LIB", "strains")])

In [None]:
lib_parts[lib_parts["Usage"].str.contains("pLIB1/")].iloc[0]

In [None]:
lib_parts[lib_parts["Tags"].str.contains("3g")].iloc[0]["Description"]

In [None]:
plasmid_wells = lib_strains[lib_strains.index.str.len() != 0]
plasmid_wells = plasmid_wells[plasmid_wells["Tags"].str.contains("3g")]
plasmid_wells = plasmid_wells[["Plasmids", "Other Notes"]]
plasmid_wells["Well"] = plasmid_wells["Other Notes"].map(
    lambda s: re.search(r"Well: ([A-Z0-9]+)", s).group(1)
)

In [None]:
well_to_plasmid = dict(plasmid_wells[["Plasmids", "Well"]].values[:, ::-1])

In [None]:
name_corrections = ["PsigW", "sigW", "rsiW"]
name_corrections = {k.lower(): k for k in name_corrections}

In [None]:
def get_part_name(reg, parts_sheet, well_to_plasmid, name_corrections, name):
    # LIB number: LIB262
    if name.startswith("LIB"):
        strain = reg.get(name)
        plasmids = re.split(r"\s*,\s*", strain["Plasmids"])
        if len(plasmids) != 1:
            raise ValueError(
                f"expecting a single plasmid for strain {name}: {plasmids}"
            )
        plasmid = plasmids[0]
        return get_part_name(
            reg, parts_sheet, well_to_plasmid, name_corrections, plasmid
        )
    elif name.startswith("pLIB"):
        part_name = (
            parts_sheet[parts_sheet["Usage"].str.contains(f"{name}/")].iloc[0].name
        )
        return part_name
    # pLIB number: 199
    elif name.isdigit():  # isdigit isn't robust, but fine for this
        return get_part_name(
            reg, parts_sheet, well_to_plasmid, name_corrections, f"pLIB{name}"
        )
    # well number: A11
    elif re.match(r"(1|2)?[A-H][0-9]{1,2}b?", name):
        well = name
        if well[-1] == "b":
            well = f"2{well[:-1]}"
        if well[0] not in ("1", "2"):
            well = f"1{well}"
        plasmid = well_to_plasmid[well]
        return get_part_name(
            reg, parts_sheet, well_to_plasmid, name_corrections, plasmid
        )
    # part capitalization: Psigw -> PsigW, SigW -> sigW
    elif name.lower() in name_corrections:
        return name_corrections[name.lower()]
    # part
    return name

In [None]:
nao_names = pd.melt(tus.iloc[:, 4:8])["value"].value_counts()
nao_names

In [None]:
name_map = {}
for name in nao_names.index:
    name = str(name)
    new_name = get_part_name(reg, lib_parts, well_to_plasmid, name_corrections, name)
    name_map[name] = new_name

In [None]:
tus[
    "Description of TU function (e.g. expression strength, expressed protein, circuit it is intended for)"
]

In [None]:
tu_to_command = {}
for _, row in tus.iterrows():
    uns_a = row["UNS_A"]
    uns_e = row["UNS_E"]
    promoter = name_map[str(row["Promoter (pLIB number or 3G part ID) "])]
    rbs = name_map[str(row["Ribosome binding site (pLIB number or 3G part ID) "])]
    cds = name_map[str(row["Coding region (pLIB number or 3G part ID) "])]
    term = name_map[str(row["Terminator (pLIB number or 3G part ID) "])]
    command = f"@GG(UNS{uns_a}_A, {promoter}, {rbs}, {cds}, {term}, UNS{uns_e}_E)"
    tu_to_command[int(row["tLIB number"].replace("tLIB", ""))] = command

In [None]:
strains.tail()["TUs"].map(
    lambda s: ", ".join([tu_to_command[int(t)] for t in str(s).split(",")])
).values

In [None]:
tu_to_command