In [None]:
import pandas as pd
import pronto

from pronto import LiteralPropertyValue

In [None]:
df_lipids = pd.read_table(
    filepath_or_buffer="../data/in/lipids.tsv.gz",
    encoding="latin-1",
)

In [None]:
df_lipids.info()

In [None]:
df_lipids.head(50)

In [None]:
# List of important columns to reconstruct the graph.
columns_graph = [
    "Lipid ID",
    "Lipid class*",
    "Parent",
    "Name",
    "Level",
    "Components*",
]


subset = df_lipids[columns_graph]
subset.head(50)

In [None]:
subset["Parent"].dropna().count()

In [None]:
subset["Components*"].dropna()

In [None]:
# Best time: 30 seconds
# Create Ontology
swissontology = pronto.Ontology()


# Extract all terms (nodes)
for _, row in subset.iterrows():
    lipid_id = str(row["Lipid ID"]).strip()
    lipid_parent = str(row["Parent"]).strip()
    lipid_classes = str(row["Lipid class*"]).strip()

    if lipid_id != "nan":
        if lipid_id not in swissontology:
            term = swissontology.create_term(lipid_id)
        else:
            term = swissontology.get_term(lipid_id)
        term.name = row.get("Name", "")

    # Add parent relationship
    if lipid_parent != "nan":
        if lipid_parent not in swissontology:
            term_parent = swissontology.create_term(lipid_parent)
        else:
            term_parent = swissontology.get_term(lipid_parent)
        term_parent.subclasses().add(term)

    # Add class relationship
    if lipid_classes != "nan":
        for item in lipid_classes.split("|"):
            lipid_class = item.strip()

            if lipid_class not in swissontology:
                term_class = swissontology.create_term(lipid_class)
            else:
                term_class = swissontology.get_term(lipid_class)
            term_class.subclasses().add(term)

In [None]:
with open("../data/out/lipids.obo", "wb") as f:
    swissontology.dump(f, format="obo")

In [None]:
df_lipids["Level"].unique()

In [None]:
df_lipids[df_lipids["Level"] == "Category"]

In [None]:
df_lipids[df_lipids["Lipid ID"] == "SLM:000782223"]

In [None]:
# Create Pronto Ontology
swisslipids = pronto.Ontology()


# Create nodes
for _, row in subset.iterrows():
    lipid_id = str(row["Lipid ID"])

    if lipid_id not in swisslipids:
        term = swisslipids.create_term(lipid_id)
        # Add name
        term.name = row.get("Name", "")
        # Add properties
        term.annotations.add(f"Level={row.get('Level', '')}")
        term.annotations.add(f"Lipid class={row.get('Lipid class*', '')}")


# Create relationships (parent-chlid relationship)
for _, row in subset.iterrows():
    lipid_id = str(row["Lipid ID"])
    parent_id = str(row["Parent"]) if pd.notna(row["Parent"]) else None

    if parent_id:
        term = swisslipids[lipid_id]
        swisslipids[parent_id].subclasses().add(term)

In [None]:
def get_properties(term):
    properties = {a.property: a.literal for a in term.annotations}
    return properties

In [None]:
swisslipids = pronto.Ontology()


# Create terms
for _, row in subset.iterrows():
    lipid_id = str(row["Lipid ID"])
    if lipid_id not in swisslipids:
        term = swisslipids.create_term(lipid_id)
        term.name = row.get("Name", "")

        # Build Annotation objects explicitly
        level = str(row.get("Level", ""))
        term_class = str(row.get("Lipid class*", ""))
        term.annotations.add(LiteralPropertyValue("Level", str(level)))
        term.annotations.add(LiteralPropertyValue("Class", str(term_class)))

In [None]:
# Create relationships (parent-chlid relationship)
for _, row in subset.iterrows():
    lipid_id = str(row["Lipid ID"])
    parent_id = str(row["Parent"]) if pd.notna(row["Parent"]) else None

    if parent_id:
        term = swisslipids[lipid_id]
        swisslipids[parent_id].subclasses().add(term)

In [None]:
my_term = swisslipids["SLM:000000084"]

In [None]:
for term in swisslipids.terms():
    print(term)
    for annotation in term.annotations:
        print(f"\t", annotation.property, annotation.literal)

In [None]:
my_term = swisslipids["SLM:000780586"]

In [None]:
get_properties(swisslipids["SLM:000780583"])["Level"]

In [None]:
with open("lipids.obo", "wb") as f:
    swisslipids.dump(f, format="obo")

In [None]:
from ontograph.client import ClientOntology

In [None]:
swiss_client = ClientOntology()

In [None]:
swiss_client.load(file_path_ontology="lipids.obo")

In [None]:
swiss_client.get_root()

In [None]:
df_lipids[df_lipids["Lipid ID"] == "SLM:000399814"]

In [None]:
swisslipids = pronto.Ontology()

# Create terms
for _, row in subset.iterrows():
    lipid_id = str(row["Lipid ID"])

    if lipid_id not in swisslipids:
        term = swisslipids.create_term(lipid_id)
        term.name = row.get("Name", "")

        # Build Annotation objects explicitly
        level = str(row.get("Level", ""))
        term_class = str(row.get("Lipid class*", ""))
        term.annotations.add(LiteralPropertyValue("Level", str(level)))
        term.annotations.add(LiteralPropertyValue("Class", str(term_class)))

In [None]:
total = 0
for term in swisslipids.terms():
    total += 1
print(total)

In [None]:
for item in classes:
    print(item)

print(len(classes))

In [None]:
df_lipids[(df_lipids["Parent"].isna())]

In [None]:
df_lipids.groupby(df_lipids["Lipid class*"].str.strip()).size()

In [None]:
df_lipids[df_lipids["Lipid ID"] == "SLM:000001080"]

In [None]:
df_lipids[df_lipids["Lipid class*"] == "SLM:000399814"]

In [None]:
df_lipids["Lipid class*"].unique()

In [None]:
df_lipids["Lipid ID"].unique()

In [None]:
missing_rows = df_lipids[df_lipids["Lipid class*"].isna()]
missing_rows.head()

In [None]:
df_lipids["Level"].unique()

In [None]:
df_lipids[(df_lipids["Level"] == "Category")]

In [None]:
df_lipids[(df_lipids["Parent"].isna()) & (df_lipids["Lipid class*"].isna())]

In [None]:
df_lipids[(df_lipids["Parent"].isna()) & (df_lipids["Lipid class*"].isna())]

In [None]:
# Create terms
for idx, row in subset.iterrows():
    lipid_id = str(row["Lipid ID"]).strip()
    lipid_class = str(row["Lipid class*"]).strip()
    parent_id = str(row["Parent"]).strip()

    print(f"\nLipid ID: {lipid_id}")
    print(f"Lipid class: {lipid_class}")
    print(f"Parent ID: {parent_id}")

In [None]:
df_lipids[df_lipids["Lipid class*"].isna() == False]

In [None]:
"SLM:000501265".split("|")

In [None]:
from ontograph.client import ClientOntology

In [None]:
swiss_client = ClientOntology()

swiss_client.load(file_path_ontology="../data/out/lipids.obo")

In [None]:
traj = swiss_client.get_trajectories_from_root(term_id="SLM:000782225")

In [None]:
swiss_client.print_term_trajectories_tree(traj)