In [93]:
import polars as pl  # Better than pandas
from SPARQLWrapper import SPARQLWrapper, JSON as JsonReturn
from tqdm import tqdm

In [34]:
pl.Config.set_fmt_str_lengths(100)

polars.config.Config

In [2]:
input_countries = (
    pl.scan_csv("./data/landlocked 2023-06-26 - Лист1.csv")
    .select(pl.col("landlocked country").alias("name"))
    .collect()
    .to_series()
    .to_list()
)
input_countries

['Switzerland',
 'Austria',
 'Czech Republic',
 'Hungary',
 'Serbia',
 'Slovakia',
 'Ethiopia',
 'Kazakhstan',
 'Luxembourg',
 'Belarus',
 'Uganda',
 'Nepal',
 'Azerbaijan',
 'Armenia',
 'North Macedonia',
 'Uzbekistan',
 'Zimbabwe',
 'Malawi',
 'Zambia',
 'Moldova',
 'Botswana',
 'Burkina Faso',
 'Rwanda',
 'Mongolia',
 'Bolivia',
 'Paraguay',
 'Mali',
 'Kyrgyzstan',
 'Laos',
 'Afghanistan',
 'Niger',
 'Liechtenstein',
 'Tajikistan',
 'Eswatini',
 'Bhutan',
 'Burundi',
 'Lesotho',
 'Chad',
 'Central African Republic',
 'Vatican City',
 'South Sudan',
 'San Marino',
 'Andorra',
 'Turkmenistan']

In [140]:
class WikidataRequests:
    def __init__(self) -> None:
        self.sparql = SPARQLWrapper("https://query.wikidata.org/sparql")
        self.sparql.setReturnFormat(JsonReturn)

    def query(self, query: str, schema=None):
        self.sparql.setQuery(query)
        result: dict = self.sparql.queryAndConvert()  # type:ignore

        return pl.DataFrame(result["results"]["bindings"], schema=schema).select(
            pl.col("*").apply(lambda var: var["value"])
        )

    def find_country(self, name: str):
        return self.query(
            f"""\
SELECT ?country ?countryLabel ?countryDescription WHERE {{
    ?country rdfs:label "{name}"@en;
        wdt:P31 wd:Q3624078. # 'instance of' 'sovereign state'
    FILTER(NOT EXISTS {{ ?country (p:P31/ps:P31) wd:Q3024240. }}) # is NOT a 'historical country'
    SERVICE wikibase:label {{ bd:serviceParam wikibase:language "en". }}
}}
LIMIT 1""",
            schema=["country", "countryLabel", "countryDescription"],
        )

    def find_bordering(self, id: str):
        return self.query(
            f"""\
SELECT ?bordering ?borderingLabel WHERE {{
    ?bordering wdt:P31 wd:Q3624078. # 'instance of' 'sovereign state'
    ?bordering wdt:P47 wd:{id}. # 'shares border with'
    FILTER(NOT EXISTS {{ ?bordering (p:P31/ps:P31) wd:Q3024240. }}) # is NOT a 'historical country'
    SERVICE wikibase:label {{ bd:serviceParam wikibase:language "en". }}
}}""",
            schema=["bordering", "borderingLabel"],
        )


wikidata = WikidataRequests()

In [128]:
def find_countries(countries):
    dfs = []
    for country in tqdm(countries, total=len(countries)):
        dfs.append(wikidata.find_country(country))
    return pl.DataFrame({"query": countries}).join(
        pl.concat(dfs), left_on="query", right_on="countryLabel", how="left"
    )

In [133]:
found_countries = find_countries(input_countries)
found_countries

100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 44/44 [00:16<00:00,  2.72it/s]


query,country,countryDescription
str,str,str
"""Switzerland""","""http://www.wikidata.org/entity/Q39""","""country in Central Europe"""
"""Austria""","""http://www.wikidata.org/entity/Q40""","""country in Central Europe"""
"""Czech Republic""","""http://www.wikidata.org/entity/Q213""","""country in Central Europe"""
"""Hungary""","""http://www.wikidata.org/entity/Q28""","""country in Central Europe"""
"""Serbia""","""http://www.wikidata.org/entity/Q403""","""country in southeastern Europe"""
"""Slovakia""","""http://www.wikidata.org/entity/Q214""","""country in Central Europe"""
"""Ethiopia""","""http://www.wikidata.org/entity/Q115""","""country in the Horn of Africa"""
"""Kazakhstan""","""http://www.wikidata.org/entity/Q232""","""sovereign state in Eastern Europe and Central Asia"""
"""Luxembourg""","""http://www.wikidata.org/entity/Q32""","""country in Western Europe"""
"""Belarus""","""http://www.wikidata.org/entity/Q184""","""Country in Eastern Europe"""


In [136]:
print(
    "Found",
    len(found_countries.filter(pl.col("country").is_not_null())),
    "/",
    len(found_countries),
)

Found 44 / 44


In [138]:
def extract_id(uri: str):
    return uri.rsplit("/", 1)[1]


found_countries.with_columns(pl.col("country").apply(extract_id).alias("id"))

query,country,countryDescription,id
str,str,str,str
"""Switzerland""","""http://www.wikidata.org/entity/Q39""","""country in Central Europe""","""Q39"""
"""Austria""","""http://www.wikidata.org/entity/Q40""","""country in Central Europe""","""Q40"""
"""Czech Republic""","""http://www.wikidata.org/entity/Q213""","""country in Central Europe""","""Q213"""
"""Hungary""","""http://www.wikidata.org/entity/Q28""","""country in Central Europe""","""Q28"""
"""Serbia""","""http://www.wikidata.org/entity/Q403""","""country in southeastern Europe""","""Q403"""
"""Slovakia""","""http://www.wikidata.org/entity/Q214""","""country in Central Europe""","""Q214"""
"""Ethiopia""","""http://www.wikidata.org/entity/Q115""","""country in the Horn of Africa""","""Q115"""
"""Kazakhstan""","""http://www.wikidata.org/entity/Q232""","""sovereign state in Eastern Europe and Central Asia""","""Q232"""
"""Luxembourg""","""http://www.wikidata.org/entity/Q32""","""country in Western Europe""","""Q32"""
"""Belarus""","""http://www.wikidata.org/entity/Q184""","""Country in Eastern Europe""","""Q184"""


In [142]:
b = wikidata.find_bordering("Q403")
b

bordering,borderingLabel
str,str
"""http://www.wikidata.org/entity/Q28""","""Hungary"""
"""http://www.wikidata.org/entity/Q218""","""Romania"""
"""http://www.wikidata.org/entity/Q219""","""Bulgaria"""
"""http://www.wikidata.org/entity/Q221""","""North Macedonia"""
"""http://www.wikidata.org/entity/Q222""","""Albania"""
"""http://www.wikidata.org/entity/Q224""","""Croatia"""
"""http://www.wikidata.org/entity/Q225""","""Bosnia and Herzegovina"""
"""http://www.wikidata.org/entity/Q236""","""Montenegro"""


In [144]:
b.with_columns(pl.lit("Q403").alias("id"))

bordering,borderingLabel,id
str,str,str
"""http://www.wikidata.org/entity/Q28""","""Hungary""","""Q403"""
"""http://www.wikidata.org/entity/Q218""","""Romania""","""Q403"""
"""http://www.wikidata.org/entity/Q219""","""Bulgaria""","""Q403"""
"""http://www.wikidata.org/entity/Q221""","""North Macedonia""","""Q403"""
"""http://www.wikidata.org/entity/Q222""","""Albania""","""Q403"""
"""http://www.wikidata.org/entity/Q224""","""Croatia""","""Q403"""
"""http://www.wikidata.org/entity/Q225""","""Bosnia and Herzegovina""","""Q403"""
"""http://www.wikidata.org/entity/Q236""","""Montenegro""","""Q403"""
