In [None]:
import toml
import re
import urllib
from datetime import datetime
import pygsheets
import benchlingapi
import requests_html

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import paulssonlab.api as api
from paulssonlab.api.util import base_url
import paulssonlab.cloning.workflow as workflow

# Setup

In [None]:
config = toml.load("config.toml")

In [None]:
session = benchlingapi.Session(config["benchling"]["api_key"])

In [None]:
gc = pygsheets.authorize(service_account_file="credentials.json")

In [None]:
col = workflow.get_strain_collection_sheets(gc.drive.service, "LIB")
col

In [None]:
strain_sheet = gc.open_by_key(col["strains"]).worksheet()
plasmid_sheet = gc.open_by_key(col["plasmids"]).worksheet()

# 3G/JUMP/Densmore

In [None]:
threeg_kit = "https://www.addgene.org/1000000161/"
marionette_kit = "https://www.addgene.org/1000000137/"
jump_plasmids = [
    "https://www.addgene.org/126956/",
    "https://www.addgene.org/126959/",
    "https://www.addgene.org/126960/",
    "https://www.addgene.org/126961/",
    "https://www.addgene.org/126962/",
    "https://www.addgene.org/126963/",
    "https://www.addgene.org/126964/",
    "https://www.addgene.org/126965/",
    "https://www.addgene.org/126966/",
    "https://www.addgene.org/126967/",
    "https://www.addgene.org/126973/",
    "https://www.addgene.org/126974/",
    "https://www.addgene.org/126975/",
    "https://www.addgene.org/126976/",
    "https://www.addgene.org/126991/",
    "https://www.addgene.org/126996/",
    "https://www.addgene.org/127015/",
    "https://www.addgene.org/127047/",
    "https://www.addgene.org/127051/",
    "https://www.addgene.org/127025/",
    "https://www.addgene.org/127000/",
    "https://www.addgene.org/126983/",
]
jump_plasmids = sorted(jump_plasmids)
densmore_kit = "https://www.addgene.org/1000000059/"
densmore_wells = [
    "A1",
    "A5",
    "A9",
    "B1",
    "B5",
    "B9",
    "C1",
    "C5",
    "C9",
    "D1",
    "D5",
    "D6",
    "D7",
    "D8",
    "D9",
    "D10",
    "D11",
    "D12",
    "E1",
    "E2",
    "E3",
    "E4",
    "E5",
    "E6",
    "E7",
]

# Densmore renaming

In [None]:
for sheet in (strain_sheet, plasmid_sheet):
    rows = sheet.get_all_records()
    # sheet.unlink()
    col_idx = list(rows[0].keys()).index("Aliases*") + 1
    for idx, row in enumerate(rows):
        if base_url(densmore_kit) in row["Source*"]:
            new_aliases = re.sub(
                r"([^()]*)\s\(([^()]*)\)([^()]*)", r"\1\3,\2\3", row["Aliases*"]
            )
            sheet.update_value((idx + 2, col_idx), new_aliases, parse=False)
    # sheet.link() # TODO: this gives 500 error, not sure why

# Ingest parts

In [None]:
rows = plasmid_sheet.get_all_records()

In [None]:
service = plasmid_sheet.client.drive.service

In [None]:
def import_threeg_part(plasmid, seq_file):
    part = plasmid_to_part(plasmid)
    # MoClo golden gate assembly BC part for BCD24 (low expression bi-cistronic RBS, engineered for downstream context-independence; see https://doi.org/10.1038/nmeth.2404).
    name = {
        "P18m": "pT7",
        "P33m": "pMutalik_med",
        "P34m": "pMutalik_weak",
        "C31m": "Bxb1",
        "C40m": "random_blank",
        "C71m": "CinR-CIDDHYRTC",
        "C95m": "T7_RNAP",
        "C114m": "Cas9_recoded",
        "UC16m": "gQi_gRNA_BD",
        "UC17m": "gV1_gRNA_BD",
        "UC20m": "gN2_gRNA_BD",
        "UCT1m": "gQi_gRNA_BE",
    }.get(part["Name*"])
    if name is not None:
        part["Name*"] = name
    else:
        part["Name*"] = re.search(
            r"(\S+)(?: (?:RBS|terminator|integrase|fusion|protease))? \(",
            plasmid["Description"],
        ).group(1)
    return part


def import_densmore_part(plasmid, seq_file):
    part = plasmid_to_part(plasmid)
    # part["Name*"]
    return part


def import_jump_part(plasmid, seq_file):
    part = plasmid_to_part(plasmid)
    part["Name*"] = re.sub(r"^pJUMP\d+-", "", plasmid["Names"])
    return part


# accept extra columns via overrides={"Tags": "foo"}
# pass through tags from plasmid


def plasmid_to_part(plasmid):
    part = {}
    part["Name*"] = plasmid["Names"].split(",")[-1]
    part["Tags"] = plasmid["Tags"]
    # part["Plasmid/Oligos (Cutter)*"] = ""
    # part["Author*"] = ""
    part["Date*"] = datetime.now().strftime("%Y-%m-%d %H:%M:%S.%f")
    part["Upstream overhang*"] = "aaa"
    part["Downstream overhang*"] = "bbb"
    part["Sequence*"] = "aaaseqbbb"
    part["Organism/codon usage*"] = ""
    part["Description"] = plasmid["Description"]
    return part


part_rules = [
    (
        lambda x: (base_url(threeg_kit) in x["Source*"]) and (x["Names"][0] != "V"),
        import_threeg_part,
    ),
    (lambda x: base_url(densmore_kit) in x["Source*"], import_densmore_part),
    (
        lambda x: (any(base_url(j) in x["Source*"] for j in jump_plasmids))
        and ("(Empty Backbone)" not in x["Description"]),
        import_jump_part,
    ),
]

In [None]:
plasmid_folder = col["plasmid_maps"]
plasmid_maps = api.google.list_drive(service, root=plasmid_folder)

In [None]:
rows[0]

In [None]:
for row in rows:
    for predicate, rule in part_rules:
        if predicate(row):
            seq_file = api.util.regex_key(
                plasmid_maps, row["ID*"], check_duplicates=True
            )["id"]
            part = rule(row, seq_file)
            print(row["Names"], part["Name*"])
            break

# Part cutting

In [None]:
seq = api.read_sequence(
    service.files()
    .get_media(
        fileId=api.util.regex_key(plasmid_maps, r"pLIB1\.", check_duplicates=True)["id"]
    )
    .execute()
    .decode("utf8")
)

In [None]:
seq

In [None]:
from Bio import Restriction

In [None]:
seq.seq.reverse_complement()

In [None]:
f.location._start._shift??

In [None]:
f.location._shift??

In [None]:
seq[10:15]

In [None]:
seq[10:20]

In [None]:
seq[10:15] + seq[15:20]

In [None]:
seq[10:20].features

In [None]:
(seq[10:15] + seq[15:20]).features

In [None]:
seq.features[0].location

In [None]:
Bio.SeqFeature.SeqFeature

In [None]:
seq.features

In [None]:
from Bio.SeqFeature import SeqFeature, FeatureLocation, ExactPosition

In [None]:
test_f = SeqFeature(FeatureLocation(0, 5, strand=1), type="foo")
test_f2 = SeqFeature(
    FeatureLocation(ExactPosition(3), ExactPosition(1), strand=1), type="foo2"
)

In [None]:
test_f

In [None]:
def _move_feature(feature, start, end):
    return SeqFeature(
        FeatureLocation(
            ExactPosition(start),
            ExactPosition(end),
            strand=feature.location.strand,
        ),
        type=feature.type,
        id=feature.id,
        qualifiers=feature.qualifiers,
    )


def slice_feature(feature, start, end):
    f_start = feature.location.nofuzzy_start
    f_end = feature.location.nofuzzy_end
    endpoints = sorted([f_start, f_end, start, end])
    # TODO: check equality
    if ((f_end - f_start) >= 0) == ((end - start) >= 0):
        es = (endpoints[1:3],)
    else:
        es = (endpoints[:2], endpoints[2:])
    return [_move_feature(feature, e1, e2) for e1, e2 in es if e1 < e2]


slice_feature(test_f, -2, 3)

In [None]:
import Bio


def slice_seq(seq, start, end):
    if start is None:
        start = 0
    if end is None:
        end = len(seq)
    # TODO: handle circular slices
    if end < start:
        slice1 = slice_seq(seq, start, None)
        slice2 = slice_seq(seq, None, end)
        new_seq = slice1 + slice2
        # copy letter annotations
    else:
        new_seq = seq[start:end]
        if hasattr(seq, "features"):
            features = []
            for feature in seq.features:
                if (
                    end <= feature.location.nofuzzy_start
                    or start >= feature.location.nofuzzy_end
                ):
                    continue
                new_feature = feature._shift(-start)
                start_loc = new_feature.location.start
                end_loc = new_feature.location.end
                if start > feature.location.nofuzzy_start:
                    start_loc = ExactPosition(0)
                if end < feature.location.nofuzzy_end:
                    end_loc = ExactPosition(end - start)
                new_feature.location = FeatureLocation(
                    start_loc, end_loc, strand=new_feature.location.strand
                )
                features.append(new_feature)
            new_seq.features = features
    return new_seq

In [None]:
cuts = sorted(Restriction.BsaI.search(seq.seq, linear=False))
cuts = cuts + cuts[:1]

In [None]:
fragments = [slice_seq(seq, x1 - 1, x2 - 1) for x1, x2 in zip(cuts[:-1], cuts[1:])]

In [None]:
# include overhangs

In [None]:
Restriction.BsaI.characteristic?

In [None]:
Restriction.BsaI.fst5

In [None]:
Restriction.BsaI.elucidate??

In [None]:
Restriction.BsaI._search??

In [None]:
Restriction.BsaI.elucidate()

In [None]:
Restriction.BsaI.charac

In [None]:
fragments[0]

In [None]:
len(seq)

In [None]:
seq

In [None]:
slice_seq(seq, 2100, 5).features

In [None]:
seq[40:80].features

In [None]:
f = seq.features[1]

In [None]:
f.location.nofuzzy_start

In [None]:
seq.features

In [None]:
seq[1:80].features

In [None]:
def catalyse(d, r, linear=True):
    """List the sequence fragments after cutting dna with enzyme.

    Return a tuple of dna as will be produced by using RE to restrict the
    dna.

    dna must be a Bio.Seq.Seq instance or a Bio.Seq.MutableSeq instance.

    If linear is False, the sequence is considered to be circular and the
    output will be modified accordingly.
    """
    if not r:
        return (d[1:],)
    length = len(r) - 1
    fragments = []
    if linear:
        #
        #   START of the sequence to FIRST site.
        #
        fragments.append(d[1 : r[0]])
        if length:
            #
            #   if more than one site add them.
            #
            fragments += [d[r[x] : r[x + 1]] for x in range(length)]
        #
        #   LAST site to END of the sequence.
        #
        fragments.append(d[r[-1] :])
    else:
        #
        #   circular : bridge LAST site to FIRST site.
        #
        fragments.append(d[r[-1] :] + d[1 : r[0]])
        if not length:
            #
            #   one site we finish here.
            #
            return tuple(fragments)
        #
        #   add the others.
        #
        fragments += [d[r[x] : r[x + 1]] for x in range(length)]
    return tuple(fragments)

In [None]:
seq.seq[0:4]

In [None]:
Restriction.BsaI.search(seq.seq, linear=False)

In [None]:
catalyse(seq.seq, [0, 10], linear=False)

In [None]:
catalyse(seq.seq, sorted(Restriction.BsaI.search(seq.seq, linear=False)))

In [None]:
Restriction.BsaI.catalyze??

In [None]:
Restriction.FormattedSeq(seq.seq)

In [None]:
Restriction.BsaI.catalyze(Restriction.FormattedSeq(seq.seq, linear=False), linear=False)

In [None]:
Restriction.BsaI.catalyze(seq.seq.reverse_complement(), linear=False)

In [None]:
Restriction.BsaI.catalyze(seq.seq, linear=True)

In [None]:
frags = Restriction.BsaI.catalyze(seq.seq, linear=False)

In [None]:
len(frags[0])

In [None]:
frags[1]