In [1]:
import requests
import csv
from datetime import datetime
from dotenv import dotenv_values

In [2]:
BASE_URL = 'https://data.bioontology.org'

# Save the ontology class counts as a TSV file
output_file = 'bioportal_ontology_class_counts.tsv'

In [3]:
# Specify the path to your .env file
env_path = "../../local/.env"

In [4]:
# Load variables into a dictionary
env_vars = dotenv_values(env_path)

In [5]:
API_KEY = env_vars['BIOPORTAL_API_KEY']

In [6]:
# Function to get the list of ontologies
def get_ontologies():
    url = f'{BASE_URL}/ontologies'
    response = requests.get(url, params={'apikey': API_KEY})
    if response.status_code == 200:
        return response.json()
    else:
        print(f'Error fetching ontologies: {response.status_code}')
        return []

In [7]:
# Function to get metrics for a specific ontology
def get_ontology_metrics(ontology_acronym):
    url = f'{BASE_URL}/ontologies/{ontology_acronym}/metrics'
    response = requests.get(url, params={'apikey': API_KEY})
    if response.status_code == 200:
        return response.json()
    else:
        print(f'Error fetching metrics for {ontology_acronym}: {response.status_code}')
        return None


In [8]:
# Main script
# ~ 15 seconds
ontologies = get_ontologies()


In [9]:
# In-place sort (modifies the original list)
ontologies.sort(key=lambda x: x["acronym"])

In [10]:
ontology_class_counts = {}
ontology_summaries = []

for ontology in ontologies:
    acronym = ontology.get('acronym')
    ontoname = ontology.get('name')
    metrics = get_ontology_metrics(acronym)
    class_count = None
    if metrics:
        class_count = metrics.get('classes')

    # Get current timestamp in ISO 8601 format
    iso_timestamp = datetime.now().isoformat()

    print(f"{iso_timestamp} {acronym}")
    temp_dict = {
        "acronym": acronym,
        'name': ontoname,
        'class_count': class_count

    }
    ontology_summaries.append(temp_dict)


2025-02-25T20:33:28.032566 ABA-AMB
2025-02-25T20:33:28.489920 ABD
2025-02-25T20:33:28.936831 ACESO
2025-02-25T20:33:29.314332 ACGT-MO
2025-02-25T20:33:29.670613 ACVD_ONTOLOGY
2025-02-25T20:33:30.099917 AD-DROP
Error fetching metrics for ADALAB: 404
2025-02-25T20:33:30.604820 ADALAB
Error fetching metrics for ADALAB-META: 404
2025-02-25T20:33:31.022180 ADALAB-META
2025-02-25T20:33:31.453427 ADAR
2025-02-25T20:33:31.959765 ADCAD
2025-02-25T20:33:32.318301 ADHER_INTCARE_EN
2025-02-25T20:33:32.671245 ADHER_INTCARE_SP
Error fetching metrics for ADMF: 404
2025-02-25T20:33:33.025577 ADMF
Error fetching metrics for ADMIN: 404
2025-02-25T20:33:33.793508 ADMIN
2025-02-25T20:33:34.341299 ADMO
2025-02-25T20:33:34.766414 ADO
2025-02-25T20:33:35.127531 ADW
2025-02-25T20:33:35.490495 AEO
2025-02-25T20:33:35.848456 AERO
2025-02-25T20:33:36.208156 AFO
2025-02-25T20:33:37.003359 AFPO
2025-02-25T20:33:37.444347 AGRO
2025-02-25T20:33:38.893172 AGROCYMAC
2025-02-25T20:33:39.248149 AGROMOP
2025-02-25T20:33:

In [11]:
# Write the list of dictionaries to a TSV file
with open(output_file, mode='w', encoding='utf-8', newline='') as f:
    # Get fieldnames from the keys of the first dictionary
    fieldnames = ontology_summaries[0].keys() if ontology_summaries else []

    # Initialize the DictWriter
    writer = csv.DictWriter(f, fieldnames=fieldnames, delimiter='\t')

    # Write header
    writer.writeheader()

    # Write rows
    writer.writerows(ontology_summaries)

print(f"List of dictionaries successfully written to {output_file}")


List of dictionaries successfully written to bioportal_ontology_class_counts.tsv
