Check for reference statements with a reference DOI (P27) but without a "stated in" (P23) link to a reference item.

When editing, references can be added with just a reference DOI (P27) without creating a corresponding reference item first. The DOIs can then be used to retrieve article titles (from Wikidata) and create reference items.

The reference statements are then linked to the reference items with the same DOI value.

In [1]:
import pandas as pd
import json
from pprint import pprint

from wikibaseintegrator.wbi_config import config as wbi_config
from wikibaseintegrator import wbi_login, WikibaseIntegrator, datatypes, wbi_helpers

wbi_config['MEDIAWIKI_API_URL'] = 'https://ppsdb.wikibase.cloud/w/api.php'
wbi_config['SPARQL_ENDPOINT_URL'] = 'https://ppsdb.wikibase.cloud/query/sparql'
wbi_config['WIKIBASE_URL'] = 'https://ppsdb.wikibase.cloud'

In [2]:
with open("secrets/bot_password.json", "r") as fh:
    pw = json.load(fh)
login = wbi_login.Login(user = pw['user'], password=pw['password'])

In [3]:
sparql_prefixes = """
PREFIX pp: <https://ppsdb.wikibase.cloud/entity/>
PREFIX ppt: <https://ppsdb.wikibase.cloud/prop/direct/>
PREFIX pps: <https://ppsdb.wikibase.cloud/prop/>
PREFIX ppss: <https://ppsdb.wikibase.cloud/prop/statement/>
PREFIX ppsq: <https://ppsdb.wikibase.cloud/prop/qualifier/>
PREFIX ppsr: <https://ppsdb.wikibase.cloud/prop/reference/>
"""

In [4]:
query_refs = """
SELECT DISTINCT ?ref ?DOI WHERE {
    ?ref ppt:P13 ?DOI
}
"""

In [5]:
all_refs = wbi_helpers.execute_sparql_query(query=query_refs, prefix=sparql_prefixes)

In [6]:
doi2ref = { i['DOI']['value'] : i['ref']['value'].split("/")[-1] for i in all_refs['results']['bindings'] } 

In [23]:
query_items_with_unlinked_refs = """
SELECT DISTINCT ?host ?interaction WHERE {
  ?host pps:P19 ?interaction.
  ?interaction prov:wasDerivedFrom ?refnode.
  ?refnode ppsr:P27 ?doi.
  FILTER NOT EXISTS { ?refnode ppsr:P23 ?statedin }
  BIND (UCASE(STR(?doi)) AS ?DOI)
}
"""

In [24]:
unlinked_refs = wbi_helpers.execute_sparql_query(query=query_items_with_unlinked_refs, prefix=sparql_prefixes)

In [25]:
wbi = WikibaseIntegrator(login=login)

In [26]:
items_to_process = list(set([i['host']['value'].split("/")[-1] for i in unlinked_refs['results']['bindings']])) # for testing

In [28]:
len(items_to_process)

311

In [None]:
for q in items_to_process:
    current_item = wbi.item.get(q)
    for interaction in current_item.claims.get('P19'):
        counter = 0
        for current_ref in interaction.references:
            if len(current_ref.snaks.get("P23")) == 0 and len(current_ref.snaks.get("P27")) == 1: # no stated in and only one doi
                # print(doi)
                doi = current_ref.snaks.get("P27")[0].datavalue['value'].upper()
                if doi in doi2ref:
                    current_ref.add(datatypes.Item(doi2ref[doi], prop_nr="P23"))
                    counter += 1
    current_item.write(summary="link items to reference statements by DOI")
    print(q + " " + str(counter))