In [2]:
import networkx as nx
import requests
import re

url = "https://en.wikipedia.org/w/api.php"
params = {
    "action": "query",
    "format": "json",
    "prop": "links",
    "plnamespace": "0",
    "pllimit": "max",
}

def get_linked_articles(title, depth, num_of_links):
    if depth <= 0:
        return []

    params["titles"] = title
    response = requests.get(url, params=params)
    data = response.json()
    page = next(iter(data["query"]["pages"].values()))
    links = []

    if "links" in page:
        for link in page["links"][:num_of_links]:
            linked_title = link["title"]
            if not re.match(r'^\d+$', linked_title):
                links.append(linked_title)
                
                if depth > 1:
                    child_links = get_linked_articles(linked_title, depth - 1, num_of_links)
                    links.extend(child_links)

    return links

def add_links_to_graph(graph, main_title, links):
    for title in links:
        graph.add_node(title)
        graph.add_edge(main_title, title)

G = nx.DiGraph()
depth = 2
num_of_links = 50
africa = "Africa"
linked_titles = get_linked_articles(africa, depth, num_of_links)

G.add_node(africa)
add_links_to_graph(G, africa, linked_titles)

for title in linked_titles:
    next_title = get_linked_articles(title, depth, 10)
    add_links_to_graph(G, title, next_title)

In [3]:
nx.write_graphml(G, "africa03.graphml")