In [17]:
from datetime import datetime
import sys
import time
from SPARQLWrapper import SPARQLWrapper, JSON
import requests
import html


api_url = "https://climatekg.semanticclimate.net/api.php"

def fetch_mediawiki_data(api_url, params):
    try:
        response = requests.get(api_url, params=params)
        response.raise_for_status()  # Raise an exception for HTTP errors
        data = response.json()
        return data
    except requests.exceptions.RequestException as e:
        print(f"Error fetching data from MediaWiki API: {e}")
        return None

def extract_items(data):
    items = []
    results = data.get("query", {}).get("results", {})
    
    for item_id, item_data in results.items():
       
        subcats = item_data["printouts"].get("subcategoryOf", [None])
        subcat = ""
        if len(subcats) > 0: 
            subcat = subcats[0] 
        item = {
            "id": item_id,
            "term": item_data["printouts"].get("term", [None])[0],
            "definition": item_data["printouts"].get("definition", [{}])[0].get("Text", {}).get("item", [None])[0],
            "subCategoryOf": subcat
        }
        items.append(item)
    
    return items

def generate_html_table(items):
    html = '<table border="1">'
    # Header
    html += '<tr>'
    html += '<th>ID</th>'
    html += '<th>Term</th>'
    html += '<th>Definition</th>'
    html += '<th>SubCategory Of</th>'
    html += '</tr>'
    
    # Rows
    for item in items:
        html += '<tr>'
        html += f'<td>{item["id"]}</td>'
        html += f'<td>{item["term"]}</td>'
        html += f'<td>{item["definition"]}</td>'
        html += f'<td>{item["subCategoryOf"]}</td>'
        html += '</tr>'
    
    html += '</table>'
    return html



def get_glossary():
    params = {
        'action': 'ask',
        'query': '[[Item:+]] [[has subobject::+]] [[P1::Term]] |?Wikibase description=definition |?P3=term |?P10.P3=subcategoryOf',
        'format': 'json'
    }

    response = fetch_mediawiki_data(api_url, params)
    items = extract_items(response)
    if items:
        # Generate the HTML for the table
        html_table = generate_html_table(items)
        print(html_table)





In [18]:
get_glossary()

<table border="1"><tr><th>ID</th><th>Term</th><th>Definition</th><th>SubCategory Of</th></tr><tr><td>Item:Q10</td><td>more likely than not</td><td>>50–100% probability (Indicates the assessed likelihood of an outcome or a result)</td><td>qualifiers</td></tr><tr><td>Item:Q100</td><td>biochemical oxygen demand</td><td>The amount of oxygen required to decompose organic material in water, an indicator of water quality.</td><td>terms</td></tr><tr><td>Item:Q1000</td><td>AQ</td><td>air quality</td><td>acronyms</td></tr><tr><td>Item:Q1001</td><td>AR4</td><td>Fourth Assessment Report of the Intergovernmental Panel on Climate Change</td><td>acronyms</td></tr><tr><td>Item:Q1002</td><td>AR5</td><td>Fifth Assessment Report of the Intergovernmental Panel on Climate Change</td><td>acronyms</td></tr><tr><td>Item:Q1003</td><td>AR6</td><td>Sixth Assessment Report of the Intergovernmental Panel on Climate Change</td><td>acronyms</td></tr><tr><td>Item:Q1004</td><td>AR7</td><td>Seventh Assessment Cycle of 