Skip to content

Commit

Permalink
Update drugcentral.py
Browse files Browse the repository at this point in the history
  • Loading branch information
cthoyt committed Nov 17, 2022
1 parent b825925 commit 589a164
Showing 1 changed file with 56 additions and 14 deletions.
70 changes: 56 additions & 14 deletions src/pyobo/sources/drugcentral.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,15 @@
"""Get DrugCentral as OBO."""

import logging
from typing import Iterable
from collections import defaultdict
from contextlib import closing
from typing import DefaultDict, Iterable, List

import pandas as pd
import bioregistry
import psycopg2
from tqdm.auto import tqdm

from pyobo.struct import Obo, Reference, Term
from pyobo.utils.path import ensure_df
from pyobo.struct import Obo, Reference, Synonym, Term

__all__ = [
"DrugCentralGetter",
Expand All @@ -18,6 +21,13 @@

PREFIX = "drugcentral"

HOST = "unmtid-dbs.net"
PORT = 5433
USER = "drugman"
PASSWORD = "dosage"
DBNAME = "drugcentral"
PARAMS = dict(dbname=DBNAME, user=USER, password=PASSWORD, host=HOST, port=PORT)


class DrugCentralGetter(Obo):
"""An ontology representation of the DrugCentral database."""
Expand All @@ -26,27 +36,59 @@ class DrugCentralGetter(Obo):

def iter_terms(self, force: bool = False) -> Iterable[Term]:
"""Iterate over terms in the ontology."""
return iter_terms(version=self._version_or_raise, force=force)
return iter_terms()


def get_obo(force: bool = False) -> Obo:
"""Get DrugCentral OBO."""
return DrugCentralGetter(force=force)


def iter_terms(version: str, force: bool = False) -> Iterable[Term]:
def iter_terms() -> Iterable[Term]:
"""Iterate over DrugCentral terms."""
url = f"https://unmtid-shinyapps.net/download/DrugCentral/{version}/structures.smiles.tsv"
df = ensure_df(PREFIX, url=url, version=version, force=force)
for smiles, inchi, inchi_key, drugcentral_id, drugcentral_name, cas in df.values:
if pd.isna(smiles) or pd.isna(inchi) or pd.isna(inchi_key):
logger.warning("missing data for drugcentral:%s", drugcentral_id)
continue
term = Term.from_triple(prefix=PREFIX, identifier=drugcentral_id, name=drugcentral_name)
with closing(psycopg2.connect(**PARAMS)) as conn:
with closing(conn.cursor()) as cur:
cur.execute(
"SELECT cd_id, name, cas_reg_no, mrdef, inchi, smiles, inchikey FROM public.structures"
)
structures = cur.fetchall()

with closing(conn.cursor()) as cur:
cur.execute("SELECT struct_id, id_type, identifier FROM public.identifier")
rows = cur.fetchall()
xrefs: DefaultDict[str, List[Reference]] = defaultdict(list)
for drugcentral_id, prefix, identifier in tqdm(
rows, unit_scale=True, desc="loading xrefs"
):
if not identifier or not prefix:
continue
if prefix == "ChEMBL_ID":
prefix = "chembl.compound"
xref_prefix_norm = bioregistry.normalize_prefix(prefix)
if xref_prefix_norm is None:
tqdm.write(f"did not normalize {prefix}:{identifier}")
continue
xrefs[str(drugcentral_id)].append(
Reference(prefix=xref_prefix_norm, identifier=identifier)
)
with closing(conn.cursor()) as cur:
cur.execute("SELECT id, name FROM public.synonyms")
synonyms: DefaultDict[str, List[Synonym]] = defaultdict(list)
for drugcentral_id, synonym in cur.fetchall():
synonyms[str(drugcentral_id)].append(Synonym(name=synonym))

for drugcentral_id, name, cas, definition, inchi, smiles, inchi_key in structures:
drugcentral_id = str(drugcentral_id)
term = Term(
reference=Reference(prefix=PREFIX, identifier=drugcentral_id, name=name),
definition=definition,
synonyms=synonyms.get(drugcentral_id, []),
xrefs=xrefs.get(drugcentral_id, []),
)
term.append_xref(Reference(prefix="inchikey", identifier=inchi_key))
term.append_property("smiles", smiles)
term.append_property("inchi", inchi)
if pd.notna(cas):
if cas:
term.append_xref(Reference(prefix="cas", identifier=cas))
yield term

Expand Down

0 comments on commit 589a164

Please sign in to comment.