In [None]:
import pandas as pd
import pronto

from pronto import LiteralPropertyValue

In [None]:
df_lipids = pd.read_table(
    filepath_or_buffer="../data/in/lipids.tsv.gz",
    encoding="latin-1",
)

In [None]:
df_lipids.info()

In [None]:
df_lipids.head(20)

In [None]:
# List of important columns to reconstruct the graph.
columns_graph = [
    "Name",
    "Level",
    "Lipid ID",
    "Lipid class*",
    "Parent",
    "Components*",
]


subset = df_lipids[columns_graph]
subset.head()

In [None]:
subset["Parent"].dropna().count()

In [None]:
subset["Components*"].dropna()

In [None]:
ontology = pronto.Ontology()

# Add terms
term_a = ontology.create_term("LIPID:0001")
term_a.name = "Example Lipid A"

term_b = ontology.create_term("LIPID:0002")
term_b.name = "Example Lipid B"

# Set hierarchical relationship (is_a)
term_b.superclasses().add(term_a)

In [None]:
for term in ontology.terms():
    print(term)

In [None]:
# Create Pronto Ontology
swisslipids = pronto.Ontology()


# Create nodes
for _, row in subset.iterrows():
    lipid_id = str(row["Lipid ID"])

    if lipid_id not in swisslipids:
        term = swisslipids.create_term(lipid_id)
        # Add name
        term.name = row.get("Name", "")
        # Add properties
        term.annotations.add(f"Level={row.get('Level', '')}")
        term.annotations.add(f"Lipid class={row.get('Lipid class*', '')}")


# Create relationships (parent-chlid relationship)
for _, row in subset.iterrows():
    lipid_id = str(row["Lipid ID"])
    parent_id = str(row["Parent"]) if pd.notna(row["Parent"]) else None

    if parent_id:
        term = swisslipids[lipid_id]
        swisslipids[parent_id].subclasses().add(term)

In [None]:
def get_properties(term):
    properties = {a.property: a.literal for a in term.annotations}
    return properties

In [None]:
df_lipids[df_lipids["Lipid ID"] == "SLM:000392021"]

In [None]:
swisslipids = pronto.Ontology()


# Create terms
for _, row in subset.iterrows():
    lipid_id = str(row["Lipid ID"])
    if lipid_id not in swisslipids:
        term = swisslipids.create_term(lipid_id)
        term.name = row.get("Name", "")

        # Build Annotation objects explicitly
        level = str(row.get("Level", ""))
        term_class = str(row.get("Lipid class*", ""))
        term.annotations.add(LiteralPropertyValue("Level", str(level)))
        term.annotations.add(LiteralPropertyValue("Class", str(term_class)))

In [None]:
# Create relationships (parent-chlid relationship)
for _, row in subset.iterrows():
    lipid_id = str(row["Lipid ID"])
    parent_id = str(row["Parent"]) if pd.notna(row["Parent"]) else None

    if parent_id:
        term = swisslipids[lipid_id]
        swisslipids[parent_id].subclasses().add(term)

In [None]:
my_term = swisslipids["SLM:000000084"]

In [None]:
for term in swisslipids.terms():
    print(term)
    for annotation in term.annotations:
        print(f"\t", annotation.property, annotation.literal)

In [None]:
my_term = swisslipids["SLM:000780586"]

In [None]:
get_properties(swisslipids["SLM:000780583"])["Level"]

In [None]:
with open("lipids.obo", "wb") as f:
    swisslipids.dump(f, format="obo")

In [None]:
from ontograph.client import ClientOntology

In [None]:
swiss_client = ClientOntology()

In [None]:
swiss_client.load(file_path_ontology="lipids.obo")

In [None]:
swiss_client.get_root()

In [None]:
df_lipids[df_lipids["Lipid ID"] == "SLM:000399814"]

In [None]:
swisslipids = pronto.Ontology()

# Create terms
for _, row in subset.iterrows():
    lipid_id = str(row["Lipid ID"])

    if lipid_id not in swisslipids:
        term = swisslipids.create_term(lipid_id)
        term.name = row.get("Name", "")

        # Build Annotation objects explicitly
        level = str(row.get("Level", ""))
        term_class = str(row.get("Lipid class*", ""))
        term.annotations.add(LiteralPropertyValue("Level", str(level)))
        term.annotations.add(LiteralPropertyValue("Class", str(term_class)))

In [None]:
# Create relationships (parent-chlid relationship)
for _, row in subset.iterrows():
    lipid_id = str(row["Lipid ID"])
    class_id = str(row["Lipid class*"]) if pd.notna(row["Lipid class*"]) else None:

    if parent_id:
        term = swisslipids[lipid_id]
        swisslipids[parent_id].subclasses().add(term)

In [None]:
df_lipids[df_lipids["Lipid class*"] == "SLM:000399814"]

In [None]:
df_lipids["Lipid class*"].unique()

In [None]:
df_lipids["Lipid ID"].unique()

In [None]:
missing_rows = df_lipids[df_lipids["Lipid class*"].isna()]
missing_rows.head()

In [None]:
df_lipids["Level"].unique()

In [None]:
df_lipids[df_lipids["Level"] == "Category"]