# Access details for the Wikibase which host the SAF

In [25]:
import rdflib
from wikidataintegrator import wdi_core, wdi_login
from getpass import getpass
import pandas as pd
import sys
import traceback

wikibase = "http://localhost:8080"
api = "http://localhost:8080/w/api.php"
sparql = "http://localhost:8282/proxy/wdqs/bigdata/namespace/wdq/sparql"
#entityUri = wikibase.replace("https:", "http:")+"entity/"
entityUri = "http://mediawiki.svc/entity/"  # this is used to strip it from results! 

WBUSER = "SafAdmin"
#WBPASS = getpass()
WBPASS = "ExampleAdminPassword"
login = wdi_login.WDLogin(WBUSER, WBPASS, mediawiki_api_url=api)

# Python libraries and function definitions

In [26]:
localEntityEngine = wdi_core.WDItemEngine.wikibase_item_engine_factory(api, sparql)

def createProperty(login=login, wdprop=None, lulabel="", enlabel="", frlabel="", delabel="", description="", property_datatype=""):
    if wdprop== None:
        s = []
    else:
        s = [wdi_core.WDUrl(wdprop, prop_nr="P1")]
    localEntityEngine = wdi_core.WDItemEngine.wikibase_item_engine_factory(api,sparql)
    item = localEntityEngine(data=s)
    item.set_label(lulabel, lang="lb")
    item.set_label(enlabel, lang="en")
    item.set_label(delabel, lang="de")
    item.set_label(frlabel, lang="fr")
    item.set_description(description, lang="en")
    
    print(item.write(login, entity_type="property", property_datatype=property_datatype))

# Import CIDOC-CRM definition
TODO: confirm that versions are same between DMSAF and the CIDOC-CRM definitions here

In [27]:
cidoc = rdflib.Graph()
cidoc.load("https://raw.githubusercontent.com/erlangen-crm/ecrm/master/ecrm_current.owl", format="xml")

In [37]:
propertyID = dict()
query = "PREFIX wdt: <http://mediawiki.svc/prop/direct/> SELECT ?item ?label WHERE {{?item rdfs:label ?label }}"
#wdi_core.WDItemEngine.execute_sparql_query(query, as_dataframe = True, endpoint=sparql)
for index, row in wdi_core.WDItemEngine.execute_sparql_query(query, as_dataframe = True, endpoint=sparql).iterrows():
    propertyID[row["label"]] =row["item"].replace(entityUri, "")
    
qid = dict()
query = "PREFIX wdt: <http://mediawiki.svc/prop/direct/> SELECT ?item ?label WHERE {{?item rdfs:label ?label }}"
#wdi_core.WDItemEngine.execute_sparql_query(query, as_dataframe = True, endpoint=sparql)
for index, row in wdi_core.WDItemEngine.execute_sparql_query(query, as_dataframe = True, endpoint=sparql).iterrows():
    qid[row["label"]] =row["item"].replace(entityUri, "")

Q1


# Create CIDOC-CRM items on WB SAF

Note: At some point the namespace for wdt changes or is set from localhost:8080 to mediawiki.svc

In [52]:
# Note: These are sparql queries made against the cidoc definition, not the local blazegraph
query = "SELECT ?cidoc ?label WHERE {?cidoc rdf:type <http://www.w3.org/2002/07/owl#Class> ; rdfs:label ?label .}"
for row in cidoc.query(query):
    #print(str(row[0]), str(row[1]), " ".join(str(row[1]).split(" ")[1:]))
    statements=[]
    statements.append(wdi_core.WDItemID(value=qid["Class"], prop_nr=propertyID["instance of"]))
    statements.append(wdi_core.WDUrl(value=str(row[0]), prop_nr=propertyID['exact match']))
    item = localEntityEngine(new_item=True, data=statements)
    label = " ".join(str(row[1]).split(" "))
    item.set_label(label, lang="en")
    #item.set_aliases([str(row[1]), str(row[1]).replace(" ", "_")], lang="en")
    created_item = item.write(login)
    print("created local item {} for Class with label {}".format(created_item, label))


created local item Q425 for class with label E83 Type Creation
created local item Q426 for class with label E17 Type Assignment
created local item Q427 for class with label E93 Spacetime Snapshot
created local item Q428 for class with label E4 Period
created local item Q429 for class with label E3 Condition State
created local item Q430 for class with label E26 Physical Feature
created local item Q431 for class with label E77 Persistent Item
created local item Q432 for class with label E57 Material
created local item Q433 for class with label E36 Visual Item
created local item Q434 for class with label E35 Title
created local item Q435 for class with label E79 Part Addition
created local item Q436 for class with label E41 Appellation
created local item Q437 for class with label E66 Formation
created local item Q438 for class with label E92 Spacetime Volume
created local item Q439 for class with label E65 Creation
created local item Q440 for class with label E33 Linguistic Object
create

In [None]:

# #propertyClasses
query = "SELECT ?cidoc ?label WHERE {?cidoc rdf:type <http://www.w3.org/2002/07/owl#ObjectProperty> ; rdfs:label ?label .}"
for row in cidoc.query(query):
    # print(str(row[0]), str(row[1]), " ".join(str(row[1]).split(" ")[1:]))
    statements=[]
    statements.append(wdi_core.WDItemID(value=qid["Property"], prop_nr=qid["instance of"]))
    statements.append(wdi_core.WDUrl(value=str(row[0]), prop_nr=propertyID["exact match"]))
    item = localEntityEngine(new_item=True, data=statements)
    label = " ".join(str(row[1]).split(" "))
    item.set_label(label, lang="en")
    #item.set_aliases([str(row[1]), str(row[1]).replace(" ", "_")], lang="en")
    try:
        created_item = item.write(login)
        print("created local item {} for Property with label {}".format(created_item, label))
    except:
        print("ERROR: "+str(row[1]))
        continue


In [None]:

# # P3 - exact match        
query = "PREFIX wdt: <http://mediawiki.svc/prop/direct/> SELECT * WHERE {{?item wdt:P3 ?uri .}}"
for index, row in wdi_core.WDItemEngine.execute_sparql_query(query, as_dataframe = True, endpoint=sparql).iterrows():
    qid[row["uri"]] = row["item"].replace(entityUri, "")

# Add CIDOC-CRM relations
## Domain

In [49]:
# CURRENTLY fails because there are no local QIDs for these! (e.g. P177_assigned_property_type)
# Note: this is a query run on the cidoc ontology, NOT on the wiki!
query = """SELECT DISTINCT ?cidoc ?domain  WHERE {
                 ?cidoc rdf:type <http://www.w3.org/2002/07/owl#ObjectProperty> ; 
                        rdfs:domain ?domain .}"""
for row in cidoc.query(query):
    try:
        print(str(row['cidoc']))
        print(str(row['cidoc']).replace('http://erlangen-crm.org/current/', ''))
        localIdentifier = str(row['cidoc']).replace('http://erlangen-crm.org/current/', '')
        print(qid[localIdentifier].replace(entityUri, ""))
        localEntityEngine = wdi_core.WDItemEngine.wikibase_item_engine_factory(api, sparql)
        statements=[]
        statements.append(wdi_core.WDItemID(value=qid[str(row["domain"])], prop_nr=propertyID["domain"]))
        item = localEntityEngine(wd_item_id=qid[str(row["cidoc"])], data=statements)
        print(item.write(login))
    except Exception as e:
        print("failure looking up {} locally".format(localIdentifier))
        #print(traceback.format_exc())
        continue


http://erlangen-crm.org/current/P176_starts_before_the_start_of
P176_starts_before_the_start_of
failure looking up P176_starts_before_the_start_of locally
http://erlangen-crm.org/current/P177_assigned_property_type
P177_assigned_property_type
failure looking up P177_assigned_property_type locally
http://erlangen-crm.org/current/P1i_identifies
P1i_identifies
failure looking up P1i_identifies locally
http://erlangen-crm.org/current/P182_ends_before_or_at_the_start_of
P182_ends_before_or_at_the_start_of
failure looking up P182_ends_before_or_at_the_start_of locally
http://erlangen-crm.org/current/P145_separated
P145_separated
failure looking up P145_separated locally
http://erlangen-crm.org/current/P125i_was_type_of_object_used_in
P125i_was_type_of_object_used_in
failure looking up P125i_was_type_of_object_used_in locally
http://erlangen-crm.org/current/P45_consists_of
P45_consists_of
failure looking up P45_consists_of locally
http://erlangen-crm.org/current/P62i_is_depicted_by
P62i_is_de

## Range

In [46]:
# Note: this is a query run on the cidoc ontology, NOT on the wiki!
query = """SELECT DISTINCT ?cidoc ?range  WHERE {
                 ?cidoc rdf:type <http://www.w3.org/2002/07/owl#ObjectProperty> ; 
                        rdfs:range ?range .}"""
for row in cidoc.query(query):
    try:
        print(str(row['cidoc']))
        localIdentifier = str(row['cidoc']).replace('http://erlangen-crm.org/current/', '')
        print("Item is {} and the local item id is {}".format(localIdentifier, qid[localIdentifier]))
        localEntityEngine = wdi_core.WDItemEngine.wikibase_item_engine_factory(api, sparql)
        statements=[]
        statements.append(wdi_core.WDItemID(value=qid[str(row["range"])].replace(entityUri, ""), prop_nr=propertyID["range"]))
        item = localEntityEngine(wd_item_id=qid[str(row["cidoc"])].replace(entityUri, ""), data=statements)
        print(item.write(login))
    except:
        # print(qid)
        print(str(row['cidoc']).replace('http://erlangen-crm.org/current/', ''))
        print("failure looking up {} locally".format(str(row["cidoc"])))
        continue

http://erlangen-crm.org/current/P76i_provides_access_to
P76i_provides_access_to
failure looking up http://erlangen-crm.org/current/P76i_provides_access_to locally
http://erlangen-crm.org/current/P176_starts_before_the_start_of
P176_starts_before_the_start_of
failure looking up http://erlangen-crm.org/current/P176_starts_before_the_start_of locally
http://erlangen-crm.org/current/P177_assigned_property_type
P177_assigned_property_type
failure looking up http://erlangen-crm.org/current/P177_assigned_property_type locally
http://erlangen-crm.org/current/P1i_identifies
P1i_identifies
failure looking up http://erlangen-crm.org/current/P1i_identifies locally
http://erlangen-crm.org/current/P182_ends_before_or_at_the_start_of
P182_ends_before_or_at_the_start_of
failure looking up http://erlangen-crm.org/current/P182_ends_before_or_at_the_start_of locally
http://erlangen-crm.org/current/P125i_was_type_of_object_used_in
P125i_was_type_of_object_used_in
failure looking up http://erlangen-crm.org

In [34]:
## SubPropertyOf

In [32]:
query = """SELECT DISTINCT ?cidoc ?subPropertyOf  WHERE {
                 ?cidoc rdf:type <http://www.w3.org/2002/07/owl#ObjectProperty> ; 
                        rdfs:subPropertyOf ?subPropertyOf ;
                        rdfs:range ?range .}"""
for row in cidoc.query(query):
    try:
        print(qid[str(row["cidoc"])])
        localEntityEngine = wdi_core.WDItemEngine.wikibase_item_engine_factory(api,sparql)
        statements=[]
        statements.append(wdi_core.WDItemID(value=qid[str(row["subPropertyOf"])], prop_nr=propertyID["subproperty of"]))
        item = localEntityEngine(wd_item_id=qid[str(row["cidoc"])], data=statements)
        print(item.write(login))
    except:
        continue

In [35]:
## InverseOf

In [33]:
query = """SELECT DISTINCT ?cidoc ?inverseOf  WHERE {
                 ?cidoc rdf:type <http://www.w3.org/2002/07/owl#ObjectProperty> ; 
                        owl:inverseOf ?inverseOf ;
                        rdfs:range ?range .}"""
for row in cidoc.query(query):
    try:
        print(qid[str(row["cidoc"])])
        localEntityEngine = wdi_core.WDItemEngine.wikibase_item_engine_factory(api,sparql)
        statements=[]
        statements.append(wdi_core.WDItemID(value=qid[str(row["inverseOf"])], prop_nr=propertyID["inverse of"]))
        item = localEntityEngine(wd_item_id=qid[str(row["cidoc"])], data=statements)
        print(item.write(login))
    except:
        continue