In [1]:
import os
import requests
import pandas as pd
import numpy as np
import pycountry

from functions import (
    sparql_query,
    kb_props,
    kb_datasources,
    valid_classes
)

from wikibaseintegrator.wbi_config import config as wbi_config
from wikibaseintegrator import WikibaseIntegrator, wbi_login
from wikibaseintegrator.models import Qualifiers, References, Reference
from wikibaseintegrator import datatypes
from wikibaseintegrator.wbi_helpers import execute_sparql_query

In [2]:
wbi_config['MEDIAWIKI_API_URL'] = os.environ['MEDIAWIKI_API_URL']
wbi_config['SPARQL_ENDPOINT_URL'] = os.environ['SPARQL_ENDPOINT_URL']
wbi_config['WIKIBASE_URL'] = os.environ['WIKIBASE_URL']
wbi_config['USER_AGENT'] = f'EDJIBot/1.0 ({os.environ["WIKIBASE_URL"]})'

login_instance = wbi_login.Login(
    user=os.environ['BOT_NAME'],
    password=os.environ['BOT_PASS']
)

wbi = WikibaseIntegrator(login=login_instance)

In [3]:
prop_item_definitions, properties = kb_props()
classes = valid_classes()
datasources = kb_datasources()

In [9]:
pycountry_states = []
for st in pycountry.subdivisions.get(country_code='US'):
    pycountry_states.append({
        'iso_code': st.code,
        'pycountry_name': st.name,
        'pycountry_type': st.type
    })
df_pycountry_states = pd.DataFrame(pycountry_states)

In [15]:
query_wd_states = """
prefix wd: <http://www.wikidata.org/entity/>
prefix wdt: <http://www.wikidata.org/prop/direct/>

SELECT ?item ?itemLabel ?iso_code WHERE {
  ?item wdt:P300 ?iso_code .
  ?item wdt:P131 wd:Q30
  SERVICE wikibase:label { bd:serviceParam wikibase:language "en". }
}
"""

wd_states = sparql_query(
    endpoint='https://query.wikidata.org/sparql',
    query=query_wd_states,
    output='dataframe'
)

In [18]:
df_pycountry_states[~df_pycountry_states.iso_code.isin(wd_states.iso_code)]

Unnamed: 0,iso_code,pycountry_name,pycountry_type
8,US-PR,Puerto Rico,Outlying area
24,US-MP,Northern Mariana Islands,Outlying area
26,US-UM,United States Minor Outlying Islands,Outlying area
35,US-VI,"Virgin Islands, U.S.",Outlying area


In [12]:
for index, row in wd_countries_plus.iterrows():
    print("PROCESSING:", row.countryLabel)

    # Set aliases we want to use here
    aliases = []
    if row.pycountry_name != row.countryLabel:
        aliases.append(row.pycountry_name)
    if isinstance(row.pycountry_official_name, str) and row.pycountry_official_name != row.countryLabel:
        aliases.append(row.pycountry_official_name)
    
    item = wbi.item.new()
    
    # Set label and description
    item.labels.set('en', row.countryLabel)
    item.descriptions.set('en', row.countryDescription)

    item.claims.add(
        datatypes.Item(
            prop_nr=property_lookup['instance of'],
            value=classes['country']
        )
    )

    item.claims.add(
        datatypes.ExternalID(
            prop_nr=properties['ISO 3166-1 alpha-2 code'],
            value=row.alpha2_code
        )
    )

    item.claims.add(
        datatypes.ExternalID(
            prop_nr=properties['ISO 3166-1 alpha-3 code'],
            value=row.alpha3_code
        )
    )

    item.claims.add(
        datatypes.ExternalID(
            prop_nr=properties['ISO 3166-1 numeric code'],
            value=row.num_code
        )
    )

    wd_link_qualifiers = Qualifiers()
    wd_link_caveat = datatypes.String(
        prop_nr=properties['caveat'],
        value='Wikidata link made on simple alignment with alpha 2 country code; not confirmed for specific use'
    )
    wd_link_qualifiers.add(wd_link_caveat)

    item.claims.add(
        datatypes.ExternalID(
            prop_nr=properties['related wikidata item'],
            value=row.country.split("/")[-1],
            qualifiers=wd_link_qualifiers
        )
    )

    item.write()

PROCESSING: Japan
PROCESSING: Republic of Ireland
PROCESSING: United States of America
PROCESSING: Italy
PROCESSING: Netherlands
PROCESSING: Uruguay
PROCESSING: Egypt
PROCESSING: Ethiopia
PROCESSING: Ghana
PROCESSING: Andorra
PROCESSING: Cyprus
PROCESSING: Kazakhstan
PROCESSING: Uzbekistan
PROCESSING: Australia
PROCESSING: Chad
PROCESSING: Samoa
PROCESSING: Fiji
PROCESSING: Paraguay
PROCESSING: Guyana
PROCESSING: Ecuador
PROCESSING: Jamaica
PROCESSING: Haiti
PROCESSING: Iran
PROCESSING: Yemen
PROCESSING: Kuwait
PROCESSING: Maldives
PROCESSING: Nepal
PROCESSING: Oman
PROCESSING: Sri Lanka
PROCESSING: Taiwan
PROCESSING: Turkmenistan
PROCESSING: Tanzania
PROCESSING: Central African Republic
PROCESSING: Zimbabwe
PROCESSING: Botswana
PROCESSING: Burkina Faso
PROCESSING: Republic of the Congo
PROCESSING: Djibouti
PROCESSING: Eritrea
PROCESSING: Guinea
PROCESSING: Cameroon
PROCESSING: Madagascar
PROCESSING: Malawi
PROCESSING: Western Sahara
PROCESSING: Northern Mariana Islands
PROCESSING: Mar

### Fixing Stuff

I figured out how to run through and remove a claim. This is not really described in the WBI docs. You have to get the item and then get the claims on the item, specifying the property for the claims being sought. You then get the specific claim for that property you want to remove and then remove() it. You can then re-add the claim with correct values.

In [41]:
country_query = """
PREFIX wd: <https://edji-knows.wikibase.cloud/entity/>
PREFIX wdt: <https://edji-knows.wikibase.cloud/prop/direct/>

SELECT ?country ?countryLabel ?wd_url WHERE {
?country wdt:P1 wd:Q1897 .
?country wdt:P16 ?wd_url .
SERVICE wikibase:label { bd:serviceParam wikibase:language "en" . }
}
"""

countries = sparql_query(
    endpoint='https://edji-knows.wikibase.cloud/query/sparql',
    query=country_query,
    output='dict'
)

for c in countries:
    if c["wd_url"].startswith('http://'):
        print("Processing", c["country"])
        wd_id = c["wd_url"].split('/')[-1]

        c_item = wbi.item.get(c["country"].split("/")[-1])
        c_item_wd_claim = c_item.claims.get(property='P16')[0]
        c_item_wd_claim.remove()

        wd_link_qualifiers = Qualifiers()
        wd_link_caveat = datatypes.String(
            prop_nr=properties['caveat'],
            value='Wikidata link made on simple alignment with alpha 2 country code; not confirmed for specific use'
        )
        wd_link_qualifiers.add(wd_link_caveat)

        c_item.claims.add(
            datatypes.ExternalID(
                prop_nr='P16',
                value=wd_id,
                qualifiers=wd_link_qualifiers
            )
        )

        c_item.write() 

Processing https://edji-knows.wikibase.cloud/entity/Q2100
Processing https://edji-knows.wikibase.cloud/entity/Q2101
Processing https://edji-knows.wikibase.cloud/entity/Q2102
Processing https://edji-knows.wikibase.cloud/entity/Q2103
Processing https://edji-knows.wikibase.cloud/entity/Q2104
Processing https://edji-knows.wikibase.cloud/entity/Q2105
Processing https://edji-knows.wikibase.cloud/entity/Q2106
Processing https://edji-knows.wikibase.cloud/entity/Q2107
Processing https://edji-knows.wikibase.cloud/entity/Q2108
Processing https://edji-knows.wikibase.cloud/entity/Q2109
Processing https://edji-knows.wikibase.cloud/entity/Q2110
Processing https://edji-knows.wikibase.cloud/entity/Q2111
Processing https://edji-knows.wikibase.cloud/entity/Q2112
Processing https://edji-knows.wikibase.cloud/entity/Q2113
Processing https://edji-knows.wikibase.cloud/entity/Q2114
Processing https://edji-knows.wikibase.cloud/entity/Q2115
Processing https://edji-knows.wikibase.cloud/entity/Q2116
Processing htt

In [44]:
country_query = """
PREFIX wd: <https://edji-knows.wikibase.cloud/entity/>
PREFIX wdt: <https://edji-knows.wikibase.cloud/prop/direct/>

SELECT ?country ?countryLabel ?alpha2 WHERE {
?country wdt:P1 wd:Q1897 .
?country wdt:P18 ?alpha2 .
SERVICE wikibase:label { bd:serviceParam wikibase:language "en" . }
}
"""

countries = sparql_query(
    endpoint='https://edji-knows.wikibase.cloud/query/sparql',
    query=country_query,
    output='dataframe'
)


In [51]:
country_names = pd.merge(
    left=countries,
    right=wd_countries_plus[["alpha2_code","pycountry_name","pycountry_official_name"]],
    how="left",
    left_on="alpha2",
    right_on="alpha2_code"
)

In [55]:
for index, row in country_names[
        (country_names.pycountry_name != country_names.countryLabel)
        |
        (
            (country_names.pycountry_official_name.notnull())
            &
            (country_names.pycountry_official_name != country_names.countryLabel)
        )
    ].iterrows():
    
    aliases = []
    if row.pycountry_name != row.countryLabel:
        aliases.append(row.pycountry_name)
    if isinstance(row.pycountry_official_name, str) and row.pycountry_official_name != row.countryLabel:
        aliases.append(row.pycountry_official_name)

    if aliases:
        print("ADDING ALIASES:", row.country, aliases)
        item = wbi.item.get(row.country.split("/")[-1])
        item.aliases.set('en', aliases)
        item.write()


ADDING ALIASES: https://edji-knows.wikibase.cloud/entity/Q2100 ['Commonwealth of Dominica']
ADDING ALIASES: https://edji-knows.wikibase.cloud/entity/Q2101 ['Republic of Costa Rica']
ADDING ALIASES: https://edji-knows.wikibase.cloud/entity/Q2102 ['State of Israel']
ADDING ALIASES: https://edji-knows.wikibase.cloud/entity/Q2103 ['Republic of Panama']


<a style='text-decoration:none;line-height:16px;display:flex;color:#5B5B62;padding:10px;justify-content:end;' href='https://deepnote.com?utm_source=created-in-deepnote-cell&projectId=30a1da16-8d37-4863-b767-04fc5292d9a6' target="_blank">
 </img>
Created in <span style='font-weight:600;margin-left:4px;'>Deepnote</span></a>