# `Organism`: ncbitaxon, 2025-03-13

In [1]:
import bionty as bt
from bionty.base._ontology import Ontology

In [6]:
onto = bt.base._ontology.Ontology(
    "/home/lukas/code/bionty/bionty/base/_dynamic/ontology_all__ncbitaxon__2025-03-13__Organism"
)

In [7]:
onto

Ontology('/home/lukas/code/bionty/bionty/base/_dynamic/ontology_all__ncbitaxon__2025-03-13__Organism', timeout=100)

In [8]:
term = onto["NCBITaxon:9606"]
term

Term('NCBITaxon:9606', name='Homo sapiens')

In [9]:
[i.description for i in term.synonyms if i.scope == "EXACT"]

['human']

In [10]:
df_values = []
for term in onto.terms():
    # # term definition text
    definition = None if term.definition is None else term.definition.title()

    # get synonyms as a list
    synonyms_list = [i.description for i in term.synonyms if i.scope == "EXACT"]
    # concatenate synonyms into a string
    if len(synonyms_list) > 0:
        common_name = synonyms_list[0]
        synonyms_list = synonyms_list[1:]
    else:
        common_name = term.name
    synonyms = "|".join(synonyms_list)
    if len(synonyms) == 0:
        synonyms = None  # type:ignore

    # get 1st degree parents as a list
    superclasses = [
        s.id
        for s in term.superclasses(distance=1, with_self=False).to_set()
        if s.id.startswith("NCBITaxon")
    ]

    df_values.append(
        (term.id, common_name, term.name, definition, synonyms, superclasses)
    )

In [11]:
len(df_values)

2650584

In [12]:
import pandas as pd

df = pd.DataFrame(
    df_values,
    columns=[
        "ontology_id",
        "name",
        "scientific_name",
        "definition",
        "synonyms",
        "parents",
    ],
).set_index("ontology_id")

In [13]:
df

Unnamed: 0_level_0,name,scientific_name,definition,synonyms,parents
ontology_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
NCBITaxon:1,root,root,,,[]
NCBITaxon:10,Cellvibrio,Cellvibrio,,,[NCBITaxon:1706371]
NCBITaxon:100,Ancylobacter aquaticus,Ancylobacter aquaticus,,,[NCBITaxon:99]
NCBITaxon:100000,Herbaspirillum sp. BA12,Herbaspirillum sp. BA12,,,[NCBITaxon:2624150]
NCBITaxon:1000000,Microbacterium sp. 6.11-VPa,Microbacterium sp. 6.11-VPa,,,[NCBITaxon:2609290]
...,...,...,...,...,...
NCBITaxon:tribe,tribe,tribe,,,[]
NCBITaxon:varietas,varietas,varietas,,,[]
http://purl.obolibrary.org/obo/NCBITaxon#_species_group,species group,species group,,,[]
http://purl.obolibrary.org/obo/NCBITaxon#_species_subgroup,species subgroup,species subgroup,,,[]


In [14]:
df.name = df.name.str.lower()

In [15]:
df.loc["NCBITaxon:9606"]

name                          human
scientific_name        Homo sapiens
definition                     None
synonyms                       None
parents            [NCBITaxon:9605]
Name: NCBITaxon:9606, dtype: object

In [16]:
df.to_parquet(
    "/home/lukas/code/bionty/bionty/base/_dynamic/df_all__ncbitaxon__2025-03-13__Organism.parquet"
)