# Update ontology sources

In [None]:
!lamin connect laminlabs/bionty-assets

In [None]:
import bionty as bt
import lamindb as ln
from bionty.core._source import register_source_in_bionty_assets
from lamin_utils import logger

ln.track("7extigZj6QNG")

All entities that are not listed in the following matrix must be curated manually as they require intervention.
Consult https://bionty-assets-gczz.netlify.app/ingest/ for guidance.

In [None]:
for entity, source_name, organism in [
    ("Disease", "mondo", "all"),
    ("CellType", "cl", "all"),
    ("Tissue", "uberon", "all"),
    ("Disease", "doid", "human"),
    ("ExperimentalFactor", "efo", "all"),
    ("Phenotype", "pato", "all"),
    ("Phenotype", "hp", "human"),
    ("Pathway", "go", "all"),
    # ("Pathway", "pw", "all"),  Currently leads to a URL error - upstream issue
    ("DevelopmentalStage", "hsapdv", "human"),
    ("DevelopmentalStage", "mmusdv", "mouse"),
    ("Ethnicity", "hancestro", "human"),
    # ("Drug", "dron", "all"),  Not a Bionty entity (yet)
]:
    from bionty.base._ontology_url import get_ontology_url

    _, latest_version = get_ontology_url(prefix=source_name)
    logger.info(
        f"Processing...    {entity:<20} {source_name:<10} {latest_version:<12} {organism}"
    )

    new_df = getattr(bt.base, entity)(source=source_name, version=latest_version).df()

    try:
        currently_used_source = bt.Source.filter(
            entity=f"bionty.{entity}",
            name=source_name,
            organism=organism,
            currently_used=True,
        ).one_or_none()
        if currently_used_source:
            current_version_df = getattr(bt.base, entity)(
                source=currently_used_source
            ).df()
            n_old = new_df.shape[0]
            n_new = current_version_df.shape[0]
            if n_old < n_new:
                raise ValueError(f"The new version has less rows: {n_new} < {n_old}")
    except ValueError as e:
        if "No source url is available" in str(e):
            # This occurs during testing in local instances where older versions are not registered
            pass

    source_rec = getattr(bt, entity).add_source(
        source=source_name, version=latest_version
    )

    try:
        register_source_in_bionty_assets(
            f"bionty/base/_dynamic/df_{organism}__{source_name}__{latest_version}__{entity}.parquet",
            source=source_rec,
            is_dataframe=True,
        )
        register_source_in_bionty_assets(
            f"bionty/base/_dynamic/ontology_{organism}__{source_name}__{latest_version}__{entity}",
            source=source_rec,
            is_dataframe=False,
        )
        logger.info(
            f"Registered a new version {latest_version} of {entity}. Ensure that it is the latest version in the `source.yaml` file."
        )
    except ValueError as e:
        if "artifact already exists" in str(e):
            logger.warning(
                f"Entiy {entity} using source {source_name} of version {latest_version} and organism {organism} is already registered. Skipping..."
            )
    except FileNotFoundError:
        logger.warning(
            f"Entiy {entity} using source {source_name} of version {latest_version} and organism {organism} file cannot be found. "
            "This can happen if the ontology was previously registered and the pronto ontology file did not get recreated. Skipping..."
        )

In [None]:
ln.finish()