In [1]:
import requests

In [2]:
main_url = "https://en.wikipedia.org/wiki"
WIKIPEDIA_API_URL = "https://en.wikipedia.org/w/api.php"

In [3]:
def get_subcategories(category, depth=1):
    params = {
        'action': 'query',
        'format': 'json',
        'list': 'categorymembers',
        'cmtitle': f'Category:{category}',
        'cmlimit': 'max',
        'cmtype': 'subcat'
    }

    subcategories = []
    for _ in range(depth):
        response = requests.get(WIKIPEDIA_API_URL, params=params)
        data = response.json()
        subcategories.extend([item['title'][9:] for item in data['query']['categorymembers']])  # Remove "Category:" prefix
        if 'continue' not in data:
            break
        params['cmcontinue'] = data['continue']['cmcontinue']

    return subcategories

def get_page_urls(category, main_url):
    params = {
        'action': 'query',
        'format': 'json',
        'list': 'categorymembers',
        'cmtitle': f'Category:{category}',
        'cmlimit': 'max',
        'cmtype': 'page'
    }

    response = requests.get(WIKIPEDIA_API_URL, params=params)
    data = response.json()

    return [main_url + "/" + item['title'].replace(" ","_") for item in data['query']['categorymembers']]

def build_category_tree(category, depth=1):
    category_tree = {'name': category, 'subcategories': [], 'page_urls': []}

    if depth > 1:
        subcategories = get_subcategories(category, depth)
        for subcategory in subcategories:
            category_tree['subcategories'].append(build_category_tree(subcategory.replace(" ","_"), depth - 1))

    page_urls = get_page_urls(category, main_url)
    category_tree['page_urls'].extend(page_urls)

    return category_tree

def print_category_tree(category_tree, depth=0):
    indent = '  ' * depth
    print(f"{indent}{category_tree['name']}")

    for subcategory in category_tree['subcategories']:
        print_category_tree(subcategory, depth + 1)

    for page_url in category_tree['page_urls']:
        print(f"{indent}  - {page_url}")

In [5]:
category_name = 'Machine_learning'
#category_name = 'Time_series'
depth = 3  # Specify the depth of subcategories to explore

category_tree = build_category_tree(category_name, depth)
print_category_tree(category_tree)

Machine_learning
  Applied_machine_learning
    AlphaGo
      - https://en.wikipedia.org/wiki/AlphaGo
      - https://en.wikipedia.org/wiki/AlphaDev
      - https://en.wikipedia.org/wiki/AlphaGo_(film)
      - https://en.wikipedia.org/wiki/AlphaGo_versus_Fan_Hui
      - https://en.wikipedia.org/wiki/AlphaGo_versus_Ke_Jie
      - https://en.wikipedia.org/wiki/AlphaGo_versus_Lee_Sedol
      - https://en.wikipedia.org/wiki/AlphaGo_Zero
      - https://en.wikipedia.org/wiki/AlphaZero
      - https://en.wikipedia.org/wiki/Future_of_Go_Summit
      - https://en.wikipedia.org/wiki/Aja_Huang
      - https://en.wikipedia.org/wiki/Master_(software)
      - https://en.wikipedia.org/wiki/MuZero
      - https://en.wikipedia.org/wiki/The_MANIAC
    Deep_learning_software_applications
      - https://en.wikipedia.org/wiki/15.ai
      - https://en.wikipedia.org/wiki/Adobe_Enhanced_Speech
      - https://en.wikipedia.org/wiki/AlphaFold
      - https://en.wikipedia.org/wiki/Artbreeder
      - https://en