# Initialize `lamindb/bionty-assets` instance

In [None]:
!lamin load laminlabs/bionty-assets

In [None]:
import lamindb as ln
import bionty as bt
from lamin_utils import logger


ln.settings.transform.stem_uid = "b8kF9q3jL3Ty"
ln.settings.transform.version = "1"
ln.track()

In [None]:
bt.Source.df().head()

In [None]:
ln.settings.storage.root.view_tree()

## Register DataFrames as artifacts

In [None]:
from bionty._bionty import list_biorecord_models

bt_models = list_biorecord_models(bt)

not_registered = {"no_source": [], "not_recognized": []}
for file in ln.settings.storage.root.glob('**/*'):
    filepath = file.path
    filestem = file.stem

    if file.is_dir():
        continue
    if filepath.startswith("bionty-assets/.lamindb"):
        continue
    
    if file.suffix == ".parquet":
        # new format
        if filestem.startswith("df_"):
            organism, source, version, entity = filestem.removeprefix("df_").split("__")
        # old format
        elif filestem.endswith("_lookup"):
            organism, source, version, entity = filestem.removesuffix("_lookup").split("_")
        else:
            not_registered["not_recognized"].append(file)
            logger.warning(f"Not recognized parquet file! {file}")
            continue
        if entity in bt_models:
            entity = f"bionty.{entity}"
        source_record = bt.Source.filter(organism=organism, name=source, version=version, entity=entity).one_or_none()
        if source_record is not None:
            artifact = ln.Artifact(file).save()
            source_record.dataframe_artifact = artifact
            source_record.save()
            logger.print(f"Registered parquet file: {file}")
        else:
            not_registered["no_source"].append(file)
            logger.warning(f"Source not found for parquet file! {file}")
    elif file.stem.startswith("ontology_"):
        try:
            organism, source, version, entity = file.name.removeprefix("ontology_").split("__")
        except ValueError:
            not_registered["not_recognized"].append(file)
            logger.warning(f"Not recognized file! {file}")
            continue
        if entity in bt_models:
            entity = f"bionty.{entity}"
        source_record = bt.Source.filter(organism=organism, name=source, version=version, entity=entity).one_or_none()
        if source_record is not None:
            artifact = ln.Artifact(file).save()
            source_record.artifacts.add(artifact)
            logger.print(f"Registered ontology file: {file}")
        else:
            not_registered["no_source"].append(file)
            logger.warning(f"Source not found for ontology file! {file}")
    else:
        not_registered["not_recognized"].append(file)
        logger.warning(f"Not recognized file! {file}")

## Non-registered files

In [None]:
not_registered

## BFXPipeline currently has no linked dataframe

Requires: https://github.com/laminlabs/bionty/issues/83

In [None]:
bt.Source.filter(dataframe_artifact=None).df()

In [None]:
ln.finish()