In [None]:
import toml
import re
import urllib
import benchlingapi
import requests_html

# Code

In [None]:
s.Folder.list(
    project_id=s.Project.find_by_name("Library projects").id, parentFolderId="NO_PARENT"
)

In [None]:
[a.name for a in _]

In [None]:
def get_folder(session, path=None, project=None, project_id=None):
    if project and project_id:
        raise ValueError("cannot specify both project and project_id")
    if project:
        if not isinstance(project, benchlingapi.models.Project):
            project = session.Project.find_by_name(project)
        project_id = project.id
    top_level_folders = session.Folder.list(
        parent_folder_id="NO_PARENT", project_id=project_id
    )
    if len(top_level_folders) != 1:
        raise Exception("expecting only one top level folder")
    folder = top_level_folders[0]
    if path:
        path_components = path.split("/")
        parent_folder_id = "NO_PARENT"
        for p in path_components:
            if not p:
                continue
            parent_folder_id = folder.id
            folder = session.Folder.find_by_name(
                p, parent_folder_id=parent_folder_id, project_id=project_id
            )
    return folder

In [None]:
def parse_addgene_well(s):
    m = re.match(r"Plate (\d+) / ([A-H]) / (\d+)", s)
    return m.groups()


def format_well_name(plate, row, column):
    return f"{plate if plate != 1 else ''}{row}{column}"


def _parse_table_row(column_names, row):
    url = None
    for name, td in zip(column_names, row.find("td")):
        link = td.find("a", first=True)
        if link is not None and not link.attrs["href"].startswith("#"):
            url = urllib.parse.urljoin(link.base_url, link.attrs["href"])
    d = {name: td.text for name, td in zip(column_names, row.find("td"))}
    if url is not None:
        d["url"] = url
    return d


def addgene_supplemental_urls(url, session=None):
    if not session:
        session = requests_html.HTMLSession()
    res = session.get(url)
    return _addgene_supplemental_urls(res.html)


def _addgene_supplemental_urls(html):
    urls = html.find(
        "div.field-label:contains('Supplemental') + ul.addgene-document-list"
    )[0].links
    return urls


def addgene_sequences(url, session=None):
    if not session:
        session = requests_html.HTMLSession()
    res = session.get(urllib.parse.urljoin(url, "sequences"))
    return _addgene_sequences(res.html)


def _addgene_sequences(html):
    seqs = {}
    for key in [
        "addgene_full",
        "depositor_full",
        "addgene_partial",
        "depositor_partial",
    ]:
        links = html.find(f"section#{key.replace('_', '-')} a.genbank-file-download")
        seq_urls = [link.attrs["href"] for link in links]
        seqs[key] = seq_urls
    return seqs


def parse_addgene_kit_table(url, include_sequences=True, include_supplemental=True):
    session = requests_html.HTMLSession()
    res = session.get("http://www.addgene.org/kits/murray-cidar-moclo-v1/")
    table = res.html.find("table.kit-inventory-table")[0]
    header = table.find("thead", first=True)
    rows = table.find("tr")
    column_names = [t.text for t in header.find("th")]
    wells = []
    for row in table.find("tbody tr"):
        well = _parse_table_row(column_names, row)
        if include_sequences:
            sequence_urls = addgene_supplemental_urls(row["url"], session=session)
            well["sequence_urls"] = sequence_urls
        if include_supplemental:
            supp_urls = addgene_supplemental_links(row["url"], session=session)
            well["supplemental_urls"] = supp_urls
        wells.append(well)
    return wells


def get_genbank(url):
    res = requests.get(url)
    gb = res.content
    buf = io.StringIO(gb.decode("utf8"))
    dna = list(SeqIO.parse(buf, "genbank"))
    return dna

In [None]:
addgene_sequences("https://www.addgene.org/108537/")

In [None]:
# gb = get_genbank(addgene_sequences('http://www.addgene.org/121010/')['depositor_full'][0])[0]
gb = get_genbank(
    addgene_sequences("https://www.addgene.org/108537/")["addgene_full"][0]
)[0]

In [None]:
gb.features[1].qualifiers

In [None]:
print(
    requests.get(
        "https://media.addgene.org/snapgene-media/v1.6.2-0-g4b4ed87/sequences/02/82/210282/addgene-plasmid-108537-sequence-210282.gbk"
    ).content.decode()
)

In [None]:
gb.features

In [None]:
gb.annotations

In [None]:
gb.description

In [None]:
gb.name

In [None]:
gb.annotations["references"]

# Setup

In [None]:
config = toml.load("config.toml")

In [None]:
s = benchlingapi.Session(config["benchling"]["api_key"])

# Reference

In [None]:
f = get_folder(s, project="Test")

In [None]:
bl = s.DNASequence.find_by_name("V37m", folder_id=f.id)

In [None]:
p = bl.primers[0]

In [None]:
p2 = {**p}

In [None]:
p2["start"] = p2["bindPosition"] = 2700

In [None]:
bl.primers = [p, p2]

In [None]:
pp = bl.primers

In [None]:
pp

In [None]:
s.Oligo.find("oJQS4")

In [None]:
z2 = [a for a in s.DNASequence.list(project_id=proj.id)][1]

In [None]:
o = s.Oligo.get(id=z.id)

In [None]:
o.raw

In [None]:
s.Oligo.get(z.id).raw

In [None]:
z.raw

In [None]:
bl.primers = [{"start": 20, "end": 40, "oligoId": z.id}]

In [None]:
bl.update()

In [None]:
bl.primers

In [None]:
bl.raw

In [None]:
p1m.raw.keys()

In [None]:
print(gb)

In [None]:
dna.

In [None]:
dna.description

In [None]:
gb.annotations

In [None]:
{
    "ORGANISM": {"value": "synthetic DNA construct"},
    "SOURCE": {"value": "synthetic DNA construct"},
    "accession": {"value": "addgene_121010_236173"},
},

In [None]:
s.

In [None]:
COLORS = [""]

In [None]:
s.Annotation()

In [None]:
print(gb.features[5])

In [None]:
gb.features

In [None]:
gb.features[3].strand

In [None]:
gb.features[3].qualifiers

In [None]:
gb.features[0].extract

In [None]:
{
    "color": "#C7B0E3",
    "end": 1514,
    "name": "pENTR-R",
    "start": 1494,
    "strand": -1,
    "type": "primer_bind",
}

In [None]:
gb.features[1].qualifiers

In [None]:
[(q.type, dict(q.qualifiers)) for q in gb.features]

In [None]:

            "start": 6,
            "end": 24,
            "strand": 1,
            "aminoAcids": "RIFVKE",
            "regions": [
              {
                "start": 6,
                "end": 24,
              }

In [None]:
gb.features[1].location.parts

In [None]:
gb.annotations[0]

In [None]:
def genbank_to_benchling(
    session,
    gb,
    name,
    folder_id,
    accession=None,
    long_annotations=True,
    custom_fields=None,
    sep=" / ",
):
    if gb.annotations["molecule_type"] != "ds-DNA":
        raise ValueError(
            f"unexpected value for molecule_type: {gb.annotations['molecule_type']}"
        )
    bases = gb.seq
    annotations = []
    translations = []
    for feature in gb.features:
        if feature.type == "source":
            feature_name = "source"
        else:
            if long_annotations:
                feature_name = sep.join(feature.qualifiers["label"])
                note = feature.qualifiers.get("note", None)
                if note is not None:
                    note = sep.join(note)
                if note:
                    feature_name += f" ({note})"
                gene = feature.qualifiers.get("gene", None)
                if gene is not None:
                    gene = sep.join(gene)
                if gene:
                    feature_name += f" (gene: {gene})"
                product = feature.qualifiers.get("product", None)
                if product is not None:
                    product = sep.join(product)
                if product:
                    feature_name += f" (product: {product})"

            else:
                feature_name = feature.qualifiers["label"] or feature.qualifiers["note"]
        if len(feature.location.parts) > 1:
            feature_name_all = feature_name + " [all]"
        else:
            feature_name_all = feature_name
        if len(feature.location.parts) > 1:
            for loc in feature.location.parts:
                start = int(loc.start)
                end = int(loc.end)
                annotation = {
                    "start": start,
                    "end": end,
                    "strand": loc.strand,
                    "name": feature_name + f" [{start}-{end}]",
                    "type": feature.type,
                }
                annotations.append(annotation)
        annotation = {
            "start": int(feature.location.start),
            "end": int(feature.location.end),
            "strand": feature.location.strand,
            "name": feature_name_all,
            "type": feature.type,
        }
        annotations.append(annotation)
        if feature.type == "CDS":  # TODO: are there any more CDS-like types?
            if int(feature.qualifiers["codon_start"][0]) != 1:
                raise ValueError("cannot handle codon_start != 1")
            translation = {
                "start": annotation["start"],
                "end": annotation["end"],
                "strand": annotation["strand"],
                "aminoAcids": feature.qualifiers["translation"],
                "regions": [
                    {"start": int(loc.start), "end": int(loc.end)}
                    for loc in feature.location.parts
                ],
            }
            translations.append(translation)
    translations = []
    if gb.annotations["topology"] == "circular":
        is_circular = True
    elif gb.annotations["topology"] == "linear":
        is_circular = False
    else:
        raise ValueError(f"unexpected value for topology: {gb.annotations['topology']}")
    _custom_fields = {}
    _custom_fields["organism"] = gb.annotations["organism"]
    _custom_fields["source"] = gb.annotations["source"]
    _custom_fields["division"] = gb.annotations["data_file_division"]
    _custom_fields["keywords"] = ",".join(gb.annotations["keywords"])
    _custom_fields["definition"] = gb.description
    if accession is None:
        accession = ",".join(gb.annotations["accessions"])
        if accession and accession != ".":
            _custom_fields["accession"] = accession
    else:
        _custom_fields["accession"] = accession
    if custom_fields:
        _custom_fields = {**_custom_fields, **custom_fields}
    _custom_fields = {k: {"value": v} for k, v in _custom_fields.items()}
    dna = session.DNASequence(
        name=name,
        folder_id=folder_id,
        bases=bases,
        annotations=annotations,
        translations=translations,
        is_circular=is_circular,
        custom_fields=_custom_fields,
    )
    return dna

In [None]:
dna = genbank_to_benchling(s, gb, "seq2", f.id, accession="addgene_blah")
dna.save()

In [None]:
gb.features[1].location

In [None]:
p1m.raw

# Test

In [None]:
res = requests.get(
    "https://media.addgene.org/data/plasmids/120/120934/120934-attachment_aMW6xjP7-DYm.gb"
)

In [None]:
gb = res.content

In [None]:
import os
import io
from Bio.Seq import Seq
from Bio.SeqRecord import SeqRecord
from Bio.SeqFeature import SeqFeature, FeatureLocation
from Bio import SeqIO

# get all sequence records for the specified genbank file
# recs = [rec for rec in SeqIO.parse("genbank_file.gbk", "genbank")]

In [None]:
buf = io.StringIO(gb.decode("utf8"))
dna = SeqIO.parse(buf, "genbank")

In [None]:
list(dna)

In [None]:
a = _

In [None]:
a[0].annotations

In [None]:
a[0].features

In [None]:
list(s.Project.all())

In [None]:
z = _

In [None]:
z[0].name

In [None]:
a = s.Folder.find_by_name("Test2")

In [None]:
proj = s.Project.list()[0]

In [None]:
proj

In [None]:
f = get_folder(s, "Test/Test2", project="Test")

In [None]:
f.name

In [None]:
proj.id

In [None]:
s.Folder.list(parentFolderId="NO_PARENT", projectId=None)  # [1].name