In [1]:
from tqdm import tqdm
import requests

# Better than pandas
import polars as pl

In [None]:
def wbsearchentities(name: str):
    response = requests.get(
        "https://www.wikidata.org/w/api.php",
        params={
            "action": "wbsearchentities",
            "search": name,
            "language": "en",
            "format": "json",
        },
    )
    response.raise_for_status()
    return response.json()

In [9]:
def load_entity(id: str):
    response = requests.get(
        "https://www.wikidata.org/w/rest.php/wikibase/v0/entities/items/" + id
    )
    response.raise_for_status()
    return response.json()

In [2]:
countries = (
    pl.scan_csv("./landlocked 2023-06-26 - Лист1.csv")
    .select(pl.col("landlocked country").alias("name"))
    .collect()
)
countries

name
str
"""Switzerland"""
"""Austria"""
"""Czech Republic…"
"""Hungary"""
"""Serbia"""
"""Slovakia"""
"""Ethiopia"""
"""Kazakhstan"""
"""Luxembourg"""
"""Belarus"""


In [4]:
search_results_by_name = {}
for name in tqdm(countries["name"], total=len(countries)):
    search_results_by_name[name] = wbsearchentities(name)

# Cache
%store search_results_by_name

100%|█████████████████████████████████████████████████████████████████████████████████| 44/44 [00:17<00:00,  2.55it/s]

Stored 'search_results_by_name' (dict)





In [None]:
# Load from cache
%store -r search_results_by_name

In [None]:
countries = countries.with_columns(
    pl.col("name")
    .apply(lambda name: search_results_by_name[name]["search"][0]["id"])
    .alias("code")
)
countries

In [10]:
entity_by_code = {}
for code in tqdm(countries["code"], total=len(countries)):
    entity_by_code[code] = load_entity(code)


# Cache
%store entity_by_code

100%|█████████████████████████████████████████████████████████████████████████████████| 44/44 [00:34<00:00,  1.26it/s]

Stored 'entity_by_code' (dict)





In [None]:
# Load from cache
%store -r entity_by_code

In [16]:
def property_values(entity: dict, prop_name: str):
    return list(
        map(lambda prop: prop["value"]["content"], entity["statements"][prop_name])
    )


# Property:P47 - shares border with
bordering = (
    countries.select("code")
    .with_columns(
        pl.col("code")
        .apply(lambda code: property_values(entity_by_code[code], "P47"))
        .alias("bordering_code")
    )
    .explode("bordering_code")
)
bordering

code,bordering_code
str,str
"""Q39""","""Q40"""
"""Q39""","""Q347"""
"""Q39""","""Q183"""
"""Q39""","""Q142"""
"""Q39""","""Q38"""
"""Q39""","""Q28513"""
"""Q39""","""Q713750"""
"""Q39""","""Q131964"""
"""Q39""","""Q458"""
"""Q39""","""Q7318"""


In [20]:
countries.join(bordering, on="code").join(
    countries, left_on="bordering_code", right_on="code", how="left"
).rename({"name_right": "bordering_name"})

name,code,bordering_code,bordering_name
str,str,str,str
"""Switzerland""","""Q39""","""Q40""","""Austria"""
"""Switzerland""","""Q39""","""Q347""","""Liechtenstein"""
"""Switzerland""","""Q39""","""Q183""",
"""Switzerland""","""Q39""","""Q142""",
"""Switzerland""","""Q39""","""Q38""",
"""Switzerland""","""Q39""","""Q28513""",
"""Switzerland""","""Q39""","""Q713750""",
"""Switzerland""","""Q39""","""Q131964""",
"""Switzerland""","""Q39""","""Q458""",
"""Switzerland""","""Q39""","""Q7318""",
