In [None]:
import pandas as pd
import pronto

from pronto import LiteralPropertyValue

In [None]:
df_lipids = pd.read_table(
    filepath_or_buffer="../data/in/lipids.tsv.gz",
    encoding="latin-1",
)

In [None]:
df_lipids.info()

In [None]:
df_lipids.head(50)

In [None]:
df_lipids.columns = df_lipids.columns.map(lambda x: ''.join(c if c.isalnum() or c == '_' else '_' for c in str(x)))

In [None]:
df_lipids.columns

In [None]:
df_lipids.head(50)

In [None]:
df_lipids[df_lipids["Lipid_ID"] == 'SLM:000508823']

In [None]:
METADATA_ONTOLOGY = {
    "title": "SwissLipids Ontology",
    "description": (
        "Ontology representing SwissLipids data, including lipid IDs, classes, and parent relationships."
    ),
    "version": "1.0.0",
    "creators": ["SIB Swiss Institute of Bioinformatics."],
    "license": "CC-BY 4.0",
    "created": "2025-08-29",
}


ID_COLUMNS = {
    "term_id": "Lipid_ID",
    "parent_id": "Parent",
    "class_id": "Lipid_class_"
}

def to_obo(file_path, ontology):
    with open(file_path, "wb") as f:
        ontology.dump(f, format="obo")


def add_ontology_metadata(ontology, metadata):
    for key, value in metadata.items():
        setattr(ontology.metadata, key, value)


def generate_ontology_from_table(dataset: pd.DataFrame, id_columns: dict, metadata:dict):

    # Create Ontology
    ontology = pronto.Ontology()
    
    # Add ontology metadata
    add_ontology_metadata(ontology=ontology, metadata=metadata)

    # Extract all classes
    all_classes = set()
    dataset[id_columns.get("class_id")].dropna().apply(lambda x: [all_classes.add(i.strip()) for i in str(x).split("|")])
    for term_class_id in all_classes:
        if term_class_id not in ontology:
            ontology.create_term(term_class_id)

    # Extract all parents
    all_parents = set()
    dataset[id_columns.get("parent_id")].dropna().apply(lambda x: [all_parents.add(i.strip()) for i in str(x).split("|")])
    for term_parent_id in all_parents:
        if term_parent_id not in ontology:
            ontology.create_term(term_parent_id)

    ontology_keys = set(ontology.keys())
    for row in dataset.itertuples(index=False):
        term_id = str(getattr(row, id_columns.get("term_id"))).strip()
        parent_id = str(getattr(row, id_columns.get("parent_id"))).strip()
        classes_id = str(getattr(row, id_columns.get("class_id"))).strip()

               
        if term_id not in ontology_keys:
            term = ontology.create_term(term_id)
            ontology_keys.add(term_id)
        else:
            term = ontology.get_term(term_id)

        # Add properties
        term.name = getattr(row, "Name")

        # Add parent relationship
        if parent_id != "nan":
            ontology.get_term(parent_id).subclasses().add(term)


        # Add class relationship
        if classes_id != "nan":
            for item in classes_id.split("|"):
                class_term_id = item.strip()
                ontology.get_term(class_term_id).subclasses().add(term)
            

    return ontology

In [None]:
swissontology = generate_ontology_from_table(dataset=df_lipids,
                                             id_columns=ID_COLUMNS,
                                             metadata=METADATA_ONTOLOGY)

In [None]:
len(list(swissontology.terms()))

In [None]:
to_obo(file_path="../data/out/lipids.obo", ontology=swissontology)

In [None]:
def generate_mapping_file_from_table(dataset):
    
    pass

In [None]:
swissontology.metadata.description

In [None]:
# Create Pronto Ontology
swisslipids = pronto.Ontology()


# Create nodes
for _, row in subset.iterrows():
    term_id = str(row["Lipid ID"])

    if term_id not in swisslipids:
        term = swisslipids.create_term(term_id)
        # Add name
        term.name = row.get("Name", "")
        # Add properties
        term.annotations.add(f"Level={row.get('Level', '')}")
        term.annotations.add(f"Lipid class={row.get('Lipid class*', '')}")


# Create relationships (parent-chlid relationship)
for _, row in subset.iterrows():
    term_id = str(row["Lipid ID"])
    parent_id = str(row["Parent"]) if pd.notna(row["Parent"]) else None

    if parent_id:
        term = swisslipids[term_id]
        swisslipids[parent_id].subclasses().add(term)

In [None]:
def get_properties(term):
    properties = {a.property: a.literal for a in term.annotations}
    return properties

In [None]:
swisslipids = pronto.Ontology()


# Create terms
for _, row in subset.iterrows():
    term_id = str(row["Lipid ID"])
    if term_id not in swisslipids:
        term = swisslipids.create_term(term_id)
        term.name = row.get("Name", "")

        # Build Annotation objects explicitly
        level = str(row.get("Level", ""))
        term_class = str(row.get("Lipid class*", ""))
        term.annotations.add(LiteralPropertyValue("Level", str(level)))
        term.annotations.add(LiteralPropertyValue("Class", str(term_class)))

In [None]:
# Create relationships (parent-chlid relationship)
for _, row in subset.iterrows():
    term_id = str(row["Lipid ID"])
    parent_id = str(row["Parent"]) if pd.notna(row["Parent"]) else None

    if parent_id:
        term = swisslipids[term_id]
        swisslipids[parent_id].subclasses().add(term)

In [None]:
my_term = swisslipids["SLM:000000084"]

In [None]:
for term in swisslipids.terms():
    print(term)
    for annotation in term.annotations:
        print(f"\t", annotation.property, annotation.literal)

In [None]:
my_term = swisslipids["SLM:000780586"]

In [None]:
get_properties(swisslipids["SLM:000780583"])["Level"]

In [None]:
with open("lipids.obo", "wb") as f:
    swisslipids.dump(f, format="obo")

In [None]:
from ontograph.client import ClientOntology

In [None]:
swiss_client = ClientOntology()

In [None]:
swiss_client.load(file_path_ontology="lipids.obo")

In [None]:
swiss_client.get_root()

In [None]:
df_lipids[df_lipids["Lipid ID"] == "SLM:000399814"]

In [None]:
swisslipids = pronto.Ontology()

# Create terms
for _, row in subset.iterrows():
    term_id = str(row["Lipid ID"])

    if term_id not in swisslipids:
        term = swisslipids.create_term(term_id)
        term.name = row.get("Name", "")

        # Build Annotation objects explicitly
        level = str(row.get("Level", ""))
        term_class = str(row.get("Lipid class*", ""))
        term.annotations.add(LiteralPropertyValue("Level", str(level)))
        term.annotations.add(LiteralPropertyValue("Class", str(term_class)))

In [None]:
total = 0
for term in swisslipids.terms():
    total += 1
print(total)

In [None]:
for item in classes:
    print(item)

print(len(classes))

In [None]:
df_lipids[(df_lipids["Parent"].isna())]

In [None]:
df_lipids.groupby(df_lipids["Lipid class*"].str.strip()).size()

In [None]:
df_lipids[df_lipids["Lipid ID"] == "SLM:000001080"]

In [None]:
df_lipids[df_lipids["Lipid class*"] == "SLM:000399814"]

In [None]:
df_lipids["Lipid class*"].unique()

In [None]:
df_lipids["Lipid ID"].unique()

In [None]:
missing_rows = df_lipids[df_lipids["Lipid class*"].isna()]
missing_rows.head()

In [None]:
df_lipids["Level"].unique()

In [None]:
df_lipids[(df_lipids["Level"] == "Category")]

In [None]:
df_lipids[(df_lipids["Parent"].isna()) & (df_lipids["Lipid class*"].isna())]

In [None]:
df_lipids[(df_lipids["Parent"].isna()) & (df_lipids["Lipid class*"].isna())]

In [None]:
# Create terms
for idx, row in subset.iterrows():
    term_id = str(row["Lipid ID"]).strip()
    lipid_class = str(row["Lipid class*"]).strip()
    parent_id = str(row["Parent"]).strip()

    print(f"\nLipid ID: {term_id}")
    print(f"Lipid class: {lipid_class}")
    print(f"Parent ID: {parent_id}")

In [None]:
df_lipids[df_lipids["Lipid class*"].isna() == False]

In [None]:
"SLM:000501265".split("|")

In [None]:
from ontograph.client import ClientOntology

In [None]:
swiss_client = ClientOntology()

swiss_client.load(file_path_ontology="../data/out/lipids.obo")

In [None]:
traj = swiss_client.get_trajectories_from_root(term_id="SLM:000782225")

In [None]:
swiss_client.print_term_trajectories_tree(traj)