In [None]:
import requests
import pandas as pd
from functools import lru_cache
import asyncio
import aiohttp
import nest_asyncio

# Caching page info results to avoid repeated requests
@lru_cache(maxsize=1000)
def fetch_wikipedia_page_info(title):
    url = "https://en.wikipedia.org/w/api.php"
    params = {
        "action": "query",
        "prop": "info",
        "format": "json",
        "titles": title,
        "inprop": "url|id"
    }
    response = requests.get(url, params=params)
    data = response.json()
    pages = data['query']['pages']
    page_info = {}
    for page_id, page in pages.items():
        page_info['id'] = page.get('pageid')  # Using .get() to avoid KeyError
        page_info['title'] = page.get('title')
        page_info['url'] = f"https://en.wikipedia.org/?curid={page_info['id']}"
    return page_info

# Asynchronous functions
async def fetch_wikipedia_page_info_async(session, title):
    url = "https://en.wikipedia.org/w/api.php"
    params = {
        "action": "query",
        "prop": "info",
        "format": "json",
        "titles": title,
        "inprop": "url|id"
    }
    async with session.get(url, params=params) as response:
        data = await response.json()
        pages = data['query']['pages']
        page_info = {}
        for page_id, page in pages.items():
            page_info['id'] = page.get('pageid')  # Using .get() to avoid KeyError
            page_info['title'] = page.get('title')
            page_info['url'] = f"https://en.wikipedia.org/?curid={page_info['id']}" if page_info.get('id') else None
    return page_info

async def fetch_wikipedia_links_async(session, title):
    url = "https://en.wikipedia.org/w/api.php"
    params = {
        "action": "query",
        "prop": "links",
        "format": "json",
        "titles": title,
        "pllimit": "max"
    }
    async with session.get(url, params=params) as response:
        data = await response.json()
        pages = data['query']['pages']
        links = []
        for page_id, page in pages.items():
            link_list = page.get('links', [])
            for link in link_list:
                links.append({'title': link['title']})
    return links

# Async batch fetching of page IDs using aiohttp
async def fetch_all_link_ids_async(links):
    async with aiohttp.ClientSession() as session:
        tasks = [fetch_wikipedia_page_info_async(session, link['title']) for link in links]
        results = await asyncio.gather(*tasks, return_exceptions=True)  # Ensuring all coroutines are awaited properly
    return results

# Function to save data to CSV
def save_to_csv(base_page_info, links_with_ids):
    base_title = base_page_info['title']
    filename = f"{base_title.replace(' ', '_')}.csv"

    data = []
    for link in links_with_ids:
        data.append([
            base_page_info['id'],
            base_page_info['title'],
            base_page_info['url'],
            link.get('title'),  # Using .get() to avoid errors
            link.get('id', None)  # Get the page ID if available
        ])

    df = pd.DataFrame(data, columns=['Base Page ID', 'Base Page Title', 'Base Page URL', 'Link Title', 'Link Page ID'])
    df.to_csv(filename, index=False, encoding='utf-8')

    print(f"Data saved to {filename}")

# Main function for running async tasks in Jupyter
async def main():
    base_page_title = "Mathematics"

    # Fetch base page info
    base_page_info = fetch_wikipedia_page_info(base_page_title)

    # Asynchronously fetch links and link IDs
    async with aiohttp.ClientSession() as session:
        links = await fetch_wikipedia_links_async(session, base_page_title)
        links_with_ids = await fetch_all_link_ids_async(links)

    # Save to CSV
    save_to_csv(base_page_info, links_with_ids)
    print()

# Avoid "asyncio.run()" in Jupyter notebooks, use nest_asyncio instead
if __name__ == "__main__":
    nest_asyncio.apply()
    await main()  # Using await instead of asyncio.run(main())
