This notebook works through the process of adding records for U.S. States to the knowledgebase. It uses Wikidata as its source here because it has a complete list that is reasonably stable at this point other than new properties being added to state records.

In [1]:
import os
import requests
import pandas as pd
import numpy as np

from functions import (
    sparql_query,
    kb_props,
    kb_datasources,
    valid_classes
)

from wikibaseintegrator.wbi_config import config as wbi_config
from wikibaseintegrator import WikibaseIntegrator, wbi_login
from wikibaseintegrator.models import Qualifiers, References, Reference
from wikibaseintegrator import datatypes
from wikibaseintegrator.wbi_helpers import execute_sparql_query

In [3]:
wbi_config['MEDIAWIKI_API_URL'] = os.environ['MEDIAWIKI_API_URL']
wbi_config['SPARQL_ENDPOINT_URL'] = os.environ['SPARQL_ENDPOINT_URL']
wbi_config['WIKIBASE_URL'] = os.environ['WIKIBASE_URL']
wbi_config['USER_AGENT'] = f'EDJIBot/1.0 ({os.environ["WIKIBASE_URL"]})'

login_instance = wbi_login.Login(
    user=os.environ['BOT_NAME'],
    password=os.environ['BOT_PASS']
)

wbi = WikibaseIntegrator(login=login_instance)

In [23]:
prop_item_definitions, properties = kb_props()
classes = valid_classes()
datasources = kb_datasources()

In [44]:
query_wd_states = """
prefix wd: <http://www.wikidata.org/entity/>
prefix wdt: <http://www.wikidata.org/prop/direct/>

SELECT ?item ?itemLabel ?itemDescription ?iso_code WHERE {
  ?item wdt:P300 ?iso_code .
  ?item wdt:P31 wd:Q35657 .
  SERVICE wikibase:label { bd:serviceParam wikibase:language "en". }
}
"""

wd_states = sparql_query(
    endpoint='https://query.wikidata.org/sparql',
    query=query_wd_states,
    output='dataframe'
)

query_edji_kb_states = """
prefix wd: <https://edji-knows.wikibase.cloud/entity/>
prefix wdt: <https://edji-knows.wikibase.cloud/prop/direct/>

SELECT ?item ?itemLabel ?itemDescription ?iso_code WHERE {
  ?item wdt:P21 ?iso_code .
  ?item wdt:P1 wd:Q2150 .
  SERVICE wikibase:label { bd:serviceParam wikibase:language "en". }
}
"""

edji_kb_states = sparql_query(
    endpoint=os.environ["SPARQL_ENDPOINT_URL"],
    query=query_edji_kb_states,
    output='dataframe'
)

In [46]:
for index, row in wd_states[~wd_states.iso_code.isin(edji_kb_states.iso_code)].iterrows():
    print("PROCESSING:", row.itemLabel)

    item = wbi.item.new()
    
    # Set label and description
    item.labels.set('en', row.itemLabel)
    item.descriptions.set('en', row.itemDescription)

    item.claims.add(
        datatypes.Item(
            prop_nr=properties['instance of'],
            value=classes['U.S. State']
        )
    )

    item.claims.add(
        datatypes.ExternalID(
            prop_nr=properties['ISO 3166-2 code'],
            value=row.iso_code
        )
    )

    wd_link_qualifiers = Qualifiers()
    wd_link_caveat = datatypes.String(
        prop_nr=properties['caveat'],
        value='Not all properties from U.S. State records in Wikidata may be appropriate in this context'
    )
    wd_link_qualifiers.add(wd_link_caveat)

    item.claims.add(
        datatypes.ExternalID(
            prop_nr=properties['related wikidata item'],
            value=row['item'].split("/")[-1],
            qualifiers=wd_link_qualifiers
        )
    )

    item.write(summary="Adding new U.S. state/territory item derived from wikidata")

PROCESSING: California
PROCESSING: Alabama
PROCESSING: Maine
PROCESSING: New Hampshire
PROCESSING: Connecticut
PROCESSING: Hawaii
PROCESSING: Alaska
PROCESSING: Florida
PROCESSING: Arizona
PROCESSING: Oregon
PROCESSING: Utah
PROCESSING: Michigan
PROCESSING: North Dakota
PROCESSING: South Dakota
PROCESSING: Montana
PROCESSING: Wyoming
PROCESSING: Idaho
PROCESSING: Washington
PROCESSING: Nevada
PROCESSING: Colorado
PROCESSING: Virginia
PROCESSING: West Virginia
PROCESSING: New York
PROCESSING: Rhode Island
PROCESSING: Maryland
PROCESSING: Delaware
PROCESSING: Ohio
PROCESSING: Pennsylvania
PROCESSING: New Jersey
PROCESSING: Indiana
PROCESSING: Georgia
PROCESSING: Texas
PROCESSING: North Carolina
PROCESSING: South Carolina
PROCESSING: Tennessee
PROCESSING: Minnesota
PROCESSING: Wisconsin
PROCESSING: Iowa
PROCESSING: Nebraska
PROCESSING: Kansas
PROCESSING: Missouri
PROCESSING: Louisiana
PROCESSING: Kentucky
PROCESSING: Arkansas
PROCESSING: Oklahoma
PROCESSING: Vermont


<a style='text-decoration:none;line-height:16px;display:flex;color:#5B5B62;padding:10px;justify-content:end;' href='https://deepnote.com?utm_source=created-in-deepnote-cell&projectId=30a1da16-8d37-4863-b767-04fc5292d9a6' target="_blank">
 </img>
Created in <span style='font-weight:600;margin-left:4px;'>Deepnote</span></a>