# Jupyter Template for Tutorials

[//]: # (------------------------------------------    DO NOT MODIFY THIS    ------------------------------------------)
<style type="text/css">
.tg  {border-collapse:collapse;
      border-spacing:0;
     }
.tg td{border-color:black;
       border-style:solid;
       border-width:1px;
       font-family:Arial, sans-serif;
       font-size:14px;
       overflow:hidden;
       padding:10px 5px;
       word-break:normal;
      }
.tg th{border-color:black;
       border-style:solid;
       border-width:1px;
       font-family:Arial, sans-serif;
       font-size:14px;
       font-weight:normal;
       overflow:hidden;
       padding:10px 5px;
       word-break:normal;
      }
.tg .tg-fymr{border-color:inherit;
             font-weight:bold;
             text-align:left;
             vertical-align:top
            }
.tg .tg-0pky{border-color:inherit;
             text-align:left;
             vertical-align:top
            }
[//]: # (--------------------------------------------------------------------------------------------------------------)
[//]: # (-------------------------------------    FILL THIS OUT WITH YOUR DATA    -------------------------------------)
[//]: # (--------------------------------------------------------------------------------------------------------------)
</style>
<table class="tg">
    <tbody>
      <tr>
        <td class="tg-fymr" style="font-weight: bold">Title:</td>
        <td class="tg-0pky">Jupyter Template for Tutorials</td>
      </tr>
      <tr>
        <td class="tg-fymr" style="font-weight: bold">Authors:</td>
        <td class="tg-0pky">
            <a href="https://github.com/ecarrenolozano" target="_blank" rel="noopener noreferrer">Edwin Carreño</a>,
            <a href="" target="_blank" rel="noopener noreferrer">AUTHOR 2</a>,
            <a href="" target="_blank" rel="noopener noreferrer">AUTHOR 3</a>
        </td>
      </tr>
      <tr>
        <td class="tg-fymr" style="font-weight: bold">Affiliations:</td>
        <td class="tg-0pky">
            <a href="https://www.ssc.uni-heidelberg.de/en" target="_blank" rel="noopener noreferrer">Scientific Software Center</a>,
            <a href="https://saezlab.org/" target="_blank" rel="noopener noreferrer">Saez-Rodriguez Group</a>,
            <a href="" target="_blank" rel="noopener noreferrer">AFFILIATION 3</a>
        </td>
      </tr>
      <tr>
        <td class="tg-fymr" style="font-weight: bold">Date Created:</td>
        <td class="tg-0pky">30.10.2024</td>
      </tr>
      <tr>
        <td class="tg-fymr" style="font-weight: bold">Description:</td>
        <td class="tg-0pky">Minimal format for a professional Jupyter Notebook for improved clarity and maintenance</td>
      </tr>
    </tbody>
</table>

[//]: # (--------------------------------------------------------------------------------------------------------------)

## Overview

This Jupyter Notebook explores the capabilities of the Pronto library as a potential core component for developing a custom ontology library within Saezlab. We aim to complete the following:

- [ ] Download ontologies from Standards Ontology Services (OBO foundy and OLS)
- [ ] Use Pronto library to solve  

## How to Use This Notebook

1. Run cells sequentially from top to bottom.
2. If using an environment manager (e.g., `conda`, `venv`), activate it before running.
3. To reset, click `Kernel -> Restart & Run All`.

## Importing Libraries

In [None]:
# 1. Standard library imports
import sys
from collections import defaultdict
from types import NoneType

sys.path.append("/home/ecarreno/SSC-Projects/b_REPOSITORIES/ontograph")

# 2. Related third party imports
import pronto

# 3. Local application/library specific imports
from ontograph._downloader import download_file
import ontograph._constants as _constants

## Download the Gene Ontology

In [None]:
CACHE_DIRECTORY = "cache"

url_go_obo = "https://purl.obolibrary.org/obo/go.obo"

# Download and cache the Gene Ontology OWL file
gene_ontology_path = download_file(
    url=url_go_obo,
    fname="gene_ontology.obo",
    cache_dir=CACHE_DIRECTORY,
)

print(f"Gene Ontology OBO file cached at: {gene_ontology_path}")

## Read Gene Ontology



In [None]:
gene_ontology = pronto.Ontology(gene_ontology_path)

### Extract Metadata

In [None]:
def extract_annotations(pronto_ontology):
    annotations = defaultdict(list)

    if pronto_ontology.metadata.annotations:
        for item in pronto_ontology.metadata.annotations:
            if isinstance(item, pronto.ResourcePropertyValue):
                annotations[item.property].append(item.resource)
            elif isinstance(item, pronto.LiteralPropertyValue):
                annotations[item.property].append(item.literal)

        # Collapse single-item lists
        annotations_collapsed = {
            key: values[0] if len(values) == 1 else values
            for key, values in annotations.items()
        }
        return annotations_collapsed
    return None


def extract_metadata(pronto_ontology):
    metadata = dict()
    metadata["Format_version"] = pronto_ontology.metadata.format_version
    metadata["Data_version"] = pronto_ontology.metadata.data_version
    metadata["Ontology_identifier"] = pronto_ontology.metadata.ontology
    metadata["Last_update"] = pronto_ontology.metadata.date

    annotations = extract_annotations(pronto_ontology)

    if not isinstance(annotations, NoneType):
        metadata["Name"] = annotations["dc:title"]
        metadata["Description"] = annotations["dc:description"]
        metadata["License"] = annotations["terms:license"]
        metadata["Roots"] = annotations["has_ontology_root_term"]

    return metadata


def print_metadata(pronto_ontology):
    for key, value in extract_metadata(pronto_ontology).items():
        print(f"{key}: \n\t{value}")

In [None]:
print_metadata(gene_ontology)

## References and Further Reading

- [ Documentation](https://.org/docs/)
- [NetworkX Guide](https://networkx.org/documentation/stable/tutorial.html)

Include citations for any external content or suggested resources for deeper learning.

In [None]:
import sys
import os

# Get the absolute path to the parent folder (where both 'notebooks' and 'ontograph' live)
project_root = os.path.abspath(os.path.join(os.getcwd(), ".."))

# Insert at front of sys.path so it's prioritized
sys.path.insert(0, project_root)

print("Added to sys.path:", project_root)

Added to sys.path: /home/edwin/SSC-Projects/b_REPOSITORIES/ontograph


In [3]:
from ontograph.api.client import OntologyClient

client = OntologyClient()
gene_ontology = client.load(name_id="go", format="obo")

print(gene_ontology.metadata())
print([root.id for root in gene_ontology.roots()])

Downloading data from 'http://purl.obolibrary.org/obo/go.obo' to file '/home/edwin/.ontograph_cache/go.obo'.
SHA256 hash of downloaded file: 0908af70601dd41f2c55f664ca5d5959e939162362e9a5573c6794b5d850e601
Use this value as the 'known_hash' argument of 'pooch.retrieve' to ensure that the file hasn't changed if it is downloaded again in the future.


{'name': 'go', 'version': 'releases/2025-06-01', 'format': '1.2'}


AttributeError: 'Ontology' object has no attribute 'roots'

In [None]:
print(os.listdir(sys.path[0]))

In [None]:
import ontograph.api.client


ontograph.api.