# Access details for the Wikibase which host the SAF

In [17]:
import rdflib
from wikidataintegrator import wdi_core, wdi_login
from getpass import getpass
import pandas as pd
import sys
import traceback

wikibase = "http://localhost:8080"
api = "http://localhost:8080/w/api.php"
sparql = "http://localhost:8282/proxy/wdqs/bigdata/namespace/wdq/sparql"
#entityUri = wikibase.replace("https:", "http:")+"entity/"
entityUri = "http://mediawiki.svc/entity/"  # this is used to strip it from query results! 

WBUSER = "SafAdmin"
#WBPASS = getpass()
WBPASS = "ExampleAdminPassword"
login = wdi_login.WDLogin(WBUSER, WBPASS, mediawiki_api_url=api)

# Python libraries and function definitions

Create a function named `createPropery` that will create (or update) properties
in our WB installation.

In [18]:
localEntityEngine = wdi_core.WDItemEngine.wikibase_item_engine_factory(api, sparql)

def createProperty(
    login=login, 
    wdprop=None, 
    lulabel="", 
    enlabel="", 
    frlabel="", 
    delabel="", 
    description="", 
    property_datatype=""
):
    if wdprop == None:
        s = []
    else:
        s = [wdi_core.WDUrl(wdprop, prop_nr="P1")]
    #localEntityEngine = wdi_core.WDItemEngine.wikibase_item_engine_factory(api,sparql)
    item = localEntityEngine(data=s)
    item.set_label(lulabel, lang="lb")
    item.set_label(enlabel, lang="en")
    item.set_label(delabel, lang="de")
    item.set_label(frlabel, lang="fr")
    item.set_description(description, lang="en")
    
    print(item.write(login, entity_type="property", property_datatype=property_datatype))

# Import CIDOC-CRM definition
TODO: confirm that versions are same between DMSAF and the CIDOC-CRM definitions here

## Create mapping

Assemble a hash of all existing items and properties on the mediawiki instance to 
be used for mapping later on.

In [20]:
propertyID = dict()
query = """PREFIX wdt: <http://mediawiki.svc/prop/direct/> 
SELECT ?prop ?propLabel WHERE { ?prop wikibase:directClaim ?wdt. SERVICE wikibase:label { bd:serviceParam wikibase:language "en". }  }
"""
for index, row in wdi_core.WDItemEngine.execute_sparql_query(query, as_dataframe = True, endpoint=sparql).iterrows():
  propertyID[row["propLabel"]] = row["prop"].replace(entityUri, "")

print('Retrieved properties are {}'.format(propertyID))

# Assemble a list of all entities on the mediawiki instance (wd: /entity) (We should actually only have "Qx:Class" and "Qy:Property" at this point)
qid = dict()
# query = "PREFIX wd: <http://mediawiki.svc/entity/> SELECT ?item ?label WHERE {{ ?item rdfs:label ?label }}"
query = """
SELECT ?item ?label WHERE { 
  VALUES ?label { "Class"@en "Property"@en }
  ?item rdfs:label ?label . 
}
"""
for index, row in wdi_core.WDItemEngine.execute_sparql_query(query, as_dataframe = True, endpoint=sparql).iterrows():
    qid[row["label"]] = row["item"].replace(entityUri, "")

print('Retrieved items are {}'.format(qid))

Retrieved properties are {'instance of': 'P1', 'subclass of': 'P2', 'exact match': 'P3', 'domain': 'P4', 'range': 'P5', 'subproperty of': 'P6', 'inverse of': 'P7', 'name': 'P8', 'name format': 'P9', 'numeration': 'P10', 'title': 'P11', 'source of information - text': 'P12', 'type of source': 'P13', 'superclass of': 'P14', 'source of information - url': 'P16', 'alternative name': 'P17', 'date of birth': 'P18', 'place of birth': 'P19', 'date of death': 'P20', 'place of death': 'P21', 'gender': 'P22', 'profession': 'P23', 'date of beginning': 'P24', 'end date': 'P25', 'activity': 'P26', 'internal note': 'P27', 'public note': 'P28', 'AFL identifier': 'P29', 'creator': 'P30', 'institutional affiliation': 'P31', 'date of creation': 'P32', 'date of modification': 'P33', 'editor': 'P34', 'status': 'P35', 'ISNI': 'P36', 'VIAF': 'P37', 'GND': 'P38', 'WikiData': 'P39', 'Getty': 'P40', 'ARK': 'P75'}
Retrieved items are {'Class': 'Q1', 'Property': 'Q2'}


In [21]:
cidoc = rdflib.Graph()
cidoc.load("https://raw.githubusercontent.com/erlangen-crm/ecrm/master/ecrm_current.owl", format="xml")

# Create CIDOC-CRM items on WB SAF

Using the OWL definition of CIDOC-CRM loaded above, 
we retrieve all entities of type `Owl:Class` form this definition.

For each entity of type class, we create a new item inside the mediawiki 
instance that will have a statement of: `$new_item` is `instance_of` `Class`.
Where both `instance_of` and `Class` are looked up inside the maps we 
created earlier: `qid` and `propertyID`.

The result of this will be items (Qxyz) inside the wikibase for all CIDOC 
Class entities, e.g.

| Title | Label  | instance_of |
| ----- | ------ | ----------- |
| Q502  | E17 Type Assignment | Class (Q1) |

It also creates a statement using property `exact_match` (again mapped to its wikibase representation)
that takes the entities IRI.

Note that this will require duplicates if entities already exist!

In [25]:
# Note: These are sparql queries made against the cidoc definition, not the local blazegraph
# Class (Exyz) 
query = """
SELECT ?entity ?label WHERE {
    ?entity rdf:type <http://www.w3.org/2002/07/owl#Class> ; 
            rdfs:label ?label .
}
"""
for row in cidoc.query(query):
    try:
        print('creating local child entity "{}" using label "{}"'.format(str(row[0]), str(row[1])))
        # print(str(row[0]), str(row[0]), " ".join(str(row[1]).split(" ")[1:]))
        statements = []
        statements.append(wdi_core.WDItemID(value = qid["Class"], prop_nr = propertyID["instance of"]))
        statements.append(wdi_core.WDUrl(value = str(row[0]), prop_nr = propertyID['exact match']))
        item = localEntityEngine(new_item = True, data = statements)
        label = " ".join(str(row[1]).split(" "))    # DR: no idea why this is done
        item.set_label(label, lang = "en")
        # #item.set_aliases([str(row[1]), str(row[1]).replace(" ", "_")], lang="en")
        created_item = item.write(login)
        print("created local entity {} using label {}".format(created_item, label))
    except Exception as e:
        print('Failed creating entity for "{}" because of {}'.format(str(row[1]), str(e)))
    


creating local Class-child entity http://erlangen-crm.org/current/E17_Type_Assignment using label E17 Type Assignment
created local entity Q502 using label E17 Type Assignment
creating local Class-child entity http://erlangen-crm.org/current/E14_Condition_Assessment using label E14 Condition Assessment
created local entity Q503 using label E14 Condition Assessment
creating local Class-child entity http://erlangen-crm.org/current/E26_Physical_Feature using label E26 Physical Feature
created local entity Q504 using label E26 Physical Feature
creating local Class-child entity http://erlangen-crm.org/current/E65_Creation using label E65 Creation
created local entity Q505 using label E65 Creation
creating local Class-child entity http://erlangen-crm.org/current/E79_Part_Addition using label E79 Part Addition
created local entity Q506 using label E79 Part Addition
creating local Class-child entity http://erlangen-crm.org/current/E69_Death using label E69 Death
created local entity Q507 using

# Creating entities for Properties

We repeat the same exercise as above, but instead of fetch `Owl:Class` items,
we fetch `Owl:ObjectProperty` items this time.

The result of this will be items (Qxyz) inside the wikibase for all CIDOC 
objectProperties, e.g.

| Title | Label  | instance_of |
| ----- | ------ | ----------- |
| Q576  | P46 is composed of | Project (Q2) |

__Why do we create these as entites (items) and not as properties?__

In [31]:

# objectProperty (Pxyz)
query = """
SELECT ?property ?label WHERE {
    ?property rdf:type <http://www.w3.org/2002/07/owl#ObjectProperty> ; 
              rdfs:label ?label .
}
"""
for row in cidoc.query(query):
    try:
        print('creating local child entity "{}" using label "{}"'.format(str(row[0]), str(row[1]), str(row[2])))
        # print(str(row[0]), str(row[0]), " ".join(str(row[1]).split(" ")[1:]))
        statements = []
        statements.append(wdi_core.WDItemID(value = qid["Property"], prop_nr = propertyID["instance of"]))
        statements.append(wdi_core.WDUrl(value = str(row[0]), prop_nr = propertyID['exact match']))
        item = localEntityEngine(new_item = True, data = statements)
        label = " ".join(str(row[1]).split(" "))    # DR: no idea why this is done
        item.set_label(label, lang = "en")
        # #item.set_aliases([str(row[1]), str(row[1]).replace(" ", "_")], lang="en")
        # created_item = item.write(login)
        print('created local entity "{}" using label "{}"'.format(created_item, label))
    except Exception as e:
        print('Failed creating entity for "{}" because of {}'.format(str(row[1]), str(e)))


(rdflib.term.URIRef('http://erlangen-crm.org/current/P123i_resulted_from'), rdflib.term.Literal('P123 resulted from', lang='en'), rdflib.term.URIRef('http://erlangen-crm.org/current/E77_Persistent_Item'))
creating local child entity "http://erlangen-crm.org/current/P123i_resulted_from" using label "P123 resulted from" and domain "http://erlangen-crm.org/current/E77_Persistent_Item"
created local entity Q834 using label P123 resulted from
(rdflib.term.URIRef('http://erlangen-crm.org/current/P42i_was_assigned_by'), rdflib.term.Literal('P42 was assigned by', lang='en'), rdflib.term.URIRef('http://erlangen-crm.org/current/E55_Type'))
creating local child entity "http://erlangen-crm.org/current/P42i_was_assigned_by" using label "P42 was assigned by" and domain "http://erlangen-crm.org/current/E55_Type"
created local entity Q834 using label P42 was assigned by
(rdflib.term.URIRef('http://erlangen-crm.org/current/P46_is_composed_of'), rdflib.term.Literal('P46 is composed of', lang='en'), rdfl

# Extending QID with new entities

We're now extending our qid map with the newly created ones. 
In order to avoid duplicated in `qid` we now look for all items
that have a statement using the `exact_match` property.

__Note__: This will only work correctly if `P3` is the `exact_match` property!

In [49]:
# # P3 - exact match        
# query = "PREFIX wdt: <http://mediawiki.svc/prop/direct/> SELECT * WHERE {{ ?item wdt:P3 ?uri .}}"  # Original query
query = """
PREFIX wdt: <http://mediawiki.svc/prop/direct/> 
SELECT * WHERE { 
    ?item   wdt:P3      ?uri;
            rdfs:label  ?label .
}
"""

erlangenItems = {}
for index, row in wdi_core.WDItemEngine.execute_sparql_query(query, as_dataframe = True, endpoint = sparql).iterrows():
    erlangenItems[row["uri"]] = row["item"].replace(entityUri, "")  # Original
    qid[row["label"]] = row["item"].replace(entityUri, "")  # DR

# print("Qid is now: {}".format(qid))
# print("erlangenItems is now {}".format(erlangenItems))
print('erlangenItems now contains {} items'.format(len(erlangenItems)))

erlangenItems now contains 332 items


# Add CIDOC-CRM relations

Rewrite:

Not all of them exist because not all of them have labels? --> use of `continue`


Once we created all the Classes and objectProperties, we can start creating relations, that is Domains and Ranges.

What we're doing here is looking up entities again, and adding new statements to them


## Domain

We're basically repeating the query from before for ObjectProperties, but instead of querying the label,
we query the domain this time.

However, we can only do this in a second run because entities might not yet exist. 
(They will only be created during the first run!)

In [55]:
# DR: This query might be wrong if there are items which have only a domain or only a range
query = """
SELECT DISTINCT ?cidoc ?domain ?range WHERE {
    ?cidoc rdf:type <http://www.w3.org/2002/07/owl#ObjectProperty> ; 
           rdfs:domain ?domain ;
           rdfs:range  ?range .
}
"""
for row in cidoc.query(query):
    try:
        # print(str(row['cidoc']), str(row['domain']), str(row['range']))
        localIdentifier = str(row['cidoc'])
        localEntityEngine = wdi_core.WDItemEngine.wikibase_item_engine_factory(api, sparql)
        
        statements = []
        statements.append(wdi_core.WDItemID(value = erlangenItems[str(row["domain"])], prop_nr = propertyID["domain"]))
        statements.append(wdi_core.WDItemID(value = erlangenItems[str(row["range"])], prop_nr = propertyID["range"]))

        # We're using wd_item_id here to look up existing items!
        item = localEntityEngine(wd_item_id = erlangenItems[localIdentifier], data = statements)
        updated_item = item.write(login)
        print('Updated item "{}" successfully'.format(updated_item))
    except KeyError as missing:
        print('We do not have an entity for "{}" locally'.format(str(row['cidoc'])))
        continue
    except Exception as e:
        print('failure updating "{}:{}" due to: "{}"'.format(erlangenItems[str(row['cidoc'])], str(row['cidoc']), str(e)))


Updated item "Q573" successfully

Updated item "Q575" successfully

Updated item "Q576" successfully

Updated item "Q577" successfully

Updated item "Q579" successfully

Updated item "Q580" successfully

Updated item "Q581" successfully

We do not have an entity for "http://erlangen-crm.org/current/P166_was_a_presence_of" locally
Updated item "Q582" successfully

Updated item "Q583" successfully

We do not have an entity for "http://erlangen-crm.org/current/P166i_had_presence" locally
Updated item "Q584" successfully

Updated item "Q586" successfully

Updated item "Q587" successfully

Updated item "Q589" successfully

Updated item "Q590" successfully

We do not have an entity for "http://erlangen-crm.org/current/P180_has_currency" locally
We do not have an entity for "http://erlangen-crm.org/current/P188i_is_production_tool_for" locally
Updated item "Q592" successfully

Updated item "Q593" successfully

Updated item "Q594" successfully

Updated item "Q595" successfully

Updated item "Q

## SubPropertyOf

In [56]:
query = """
SELECT DISTINCT ?cidoc ?subPropertyOf WHERE {
    ?cidoc rdf:type <http://www.w3.org/2002/07/owl#ObjectProperty> ; 
            rdfs:subPropertyOf ?subPropertyOf ;
            rdfs:range ?range .
}
"""
for row in cidoc.query(query):
    try:
        localEntityEngine = wdi_core.WDItemEngine.wikibase_item_engine_factory(api,sparql)
        statements = []
        statements.append(wdi_core.WDItemID(value = erlangenItems[str(row["subPropertyOf"])], prop_nr = propertyID["subproperty of"]))
        item = localEntityEngine(wd_item_id = erlangenItems[str(row["cidoc"])], data=statements)
        updated_item = item.write(login)
        print('Successfully updated item "{}" with subproperty statement'.format(updated_item))
    except KeyError as missing:
        print('We do not have an entity for "{}" locally'.format(str(row['cidoc'])))
        continue
    except Exception as e:
        print('failure updating "{}:{}" due to: "{}"'.format(erlangenItems[str(row['cidoc'])], str(row['cidoc']), str(e)))

Q573
Updated item "Q573" successfully
Q575
Updated item "Q575" successfully
Q577
Updated item "Q577" successfully
Q579
Updated item "Q579" successfully
We do not have an entity for "http://erlangen-crm.org/current/P166_was_a_presence_of" locally
Q582
Updated item "Q582" successfully
We do not have an entity for "http://erlangen-crm.org/current/P166i_had_presence" locally
Q584
Updated item "Q584" successfully
Q585
Updated item "Q585" successfully
Q586
Updated item "Q586" successfully
Q588
Updated item "Q588" successfully
Q589
Updated item "Q589" successfully
Q590
Updated item "Q590" successfully
Q591
Updated item "Q591" successfully
Q597
Updated item "Q597" successfully
Q600
Updated item "Q600" successfully
Q601
Updated item "Q601" successfully
Q602
Updated item "Q602" successfully
Q602
Updated item "Q602" successfully
Q603
Updated item "Q603" successfully
Q606
Updated item "Q606" successfully
Q606
Updated item "Q606" successfully
Q607
Updated item "Q607" successfully
Q608
Updated item 

## InverseOf

In [57]:
query = """
SELECT DISTINCT ?cidoc ?inverseOf WHERE {
    ?cidoc rdf:type <http://www.w3.org/2002/07/owl#ObjectProperty> ; 
            owl:inverseOf ?inverseOf ;
            rdfs:range ?range .
}
"""
for row in cidoc.query(query):
    try:
        localEntityEngine = wdi_core.WDItemEngine.wikibase_item_engine_factory(api,sparql)
        statements = []
        statements.append(wdi_core.WDItemID(value=erlangenItems[str(row["inverseOf"])], prop_nr = propertyID["inverse of"]))
        item = localEntityEngine(wd_item_id=erlangenItems[str(row["cidoc"])], data = statements)
        updated_item = item.write(login)
        print('Successfully updated item "{}" with inverseOf statement'.format(updated_item))
    except KeyError as missing:
        print('We do not have an entity for "{}" locally'.format(str(row['cidoc'])))
        continue
    except Exception as e:
        print('failure updating "{}:{}" due to: "{}"'.format(erlangenItems[str(row['cidoc'])], str(row['cidoc']), str(e)))

Successfully updated item "Q573" with inverseOf statement
Successfully updated item "Q575" with inverseOf statement
Successfully updated item "Q576" with inverseOf statement
Successfully updated item "Q577" with inverseOf statement
Successfully updated item "Q579" with inverseOf statement
Successfully updated item "Q580" with inverseOf statement
Successfully updated item "Q581" with inverseOf statement
We do not have an entity for "http://erlangen-crm.org/current/P166_was_a_presence_of" locally
Successfully updated item "Q582" with inverseOf statement
Successfully updated item "Q583" with inverseOf statement
We do not have an entity for "http://erlangen-crm.org/current/P166i_had_presence" locally
Successfully updated item "Q584" with inverseOf statement
Successfully updated item "Q585" with inverseOf statement
Successfully updated item "Q586" with inverseOf statement
Successfully updated item "Q587" with inverseOf statement
Successfully updated item "Q588" with inverseOf statement
Succ