# Data provenance and hub

In [None]:
import lamindb as ln

ln.nb.header()

In [None]:
!lndb login test-user2

## Data objects

In [None]:
for entity in ln.schema.list_entities():
    if hasattr(ln.schema.core, entity):
        df = ln.db.query.table_as_df(entity)
        if df.shape[0] > 0:
            print(entity)
            display(df)

## Biological entities and wetlab

In [None]:
for entity in ln.schema.list_entities():
    if not hasattr(ln.schema.core, entity):
        df = ln.db.query.table_as_df(entity)
        if df.shape[0] > 0:
            print(entity)
            display(df)

## Query data with biological entities

You can query any fields of interest in the feature tables

In [None]:
ln.db.query.dobject(entity_name="gene", ncbi_gene_id=9535)

In [None]:
ln.db.query.dobject(entity_name="gene", symbol="IFNG")

In [None]:
from sqlalchemy import inspect
from lndb_setup import settings


def _get_all_foreign_keys(engine):
    """Result {'biosample': {'tissue_id': ('tissue', 'id')}}"""
    inspector = inspect(engine)

    def _get_foreign_keys(table_name, inspector):
        return {
            column["constrained_columns"][0]: (
                column["referred_table"],
                column["referred_columns"][0],
            )
            for column in inspector.get_foreign_keys(table_name)
        }

    foreign_keys = {}
    for table_name in inspector.get_table_names():
        foreign_keys_table = _get_foreign_keys(table_name, inspector)
        if len(foreign_keys_table) > 0:
            foreign_keys[table_name] = foreign_keys_table

    return foreign_keys


def _backpopulate_foreign_keys(foreign_keys):
    """Result {'tissue': {'id': {'biosample': 'tissue_id'}}}"""

    foreign_keys_backpop = {}

    for module_name, keys in foreign_keys.items():
        for key, (module, ref_key) in keys.items():
            if foreign_keys_backpop.get(module) is None:
                foreign_keys_backpop[module] = {}
            if foreign_keys_backpop[module].get(ref_key) is None:
                foreign_keys_backpop[module][ref_key] = {}
            foreign_keys_backpop[module][ref_key][module_name] = key

    return foreign_keys_backpop


def _get_meta_table_results(
    entity_name, link_tables, foreign_keys_backpop, **entity_kwargs
):
    results = getattr(ln.db.query, entity_name)(**entity_kwargs)
    results_ids = [i.id for i in results]
    module_name = entity_name
    while module_name not in link_tables:
        if "id" not in foreign_keys_backpop[module_name]:
            return results
        parents = foreign_keys_backpop[module_name]["id"]
        for table_name, table_ref_id in parents.items():
            results = []
            for result_id in results_ids:
                results += getattr(ln.db.query, table_name)(**{table_ref_id: result_id})
            if table_name not in link_tables:
                results_ids = [i.id for i in results]
        module_name = table_name
    return results


def query_dobject_from_metadata(entity_name, **entity_kwargs):
    engine = settings.instance.db_engine()
    foreign_keys = _get_all_foreign_keys(engine)
    foreign_keys_backpop = _backpopulate_foreign_keys(foreign_keys)
    link_tables = [i for i in ln.schema.list_entities() if i.startswith("dobject_")]
    meta_results = _get_meta_table_results(
        entity_name=entity_name,
        link_tables=link_tables,
        foreign_keys_backpop=foreign_keys_backpop**entity_kwargs,
    )
    dobject_ids = set([dobject.dobject_id for dobject in meta_results])
    if len(dobject_ids) > 0:
        dobjects = []
        for dobject_id in dobject_ids:
            dobjects += ln.db.query.dobject(id=dobject_id)
        return dobjects
    return []

In [None]:
dobjects = query_dobject_from_metadata(entity_name="tissue", ontology_id="CL:2000001")

In [None]:
dobjects = query_dobject_from_metadata(entity_name="tissue", ontology_id="CL:2000001")

## Collaborate on Lamin Hub

In [None]:
# ln.db.hub.push_instance()

You can visualize your instance on Lamin Hub: https://lamin.ai/test-user2/instances