# Import necessary python libraries

In [22]:
from wikidataintegrator import wdi_core, wdi_login
from getpass import getpass
import pandas as pd
import rdflib
import sys
import traceback

# Get CIDOC-CRM

In [1]:

cidoc = rdflib.Graph()
cidoc.load("https://raw.githubusercontent.com/erlangen-crm/ecrm/master/ecrm_current.owl", format="xml")

<Graph identifier=N3f64f953b22d477fa273cc2be31ad5df (<class 'rdflib.graph.Graph'>)>

# Login to Wikibase

In [3]:
wikibase = "https://saf1a.wiki.opencura.com/"
api = "https://saf1a.wiki.opencura.com/w/api.php"
sparql = "https://saf1a.wiki.opencura.com/query/sparql"
entityUri = wikibase.replace("https:", "http:")+"entity/"

WBUSER = "Andrawaag"
WBPASS = getpass()
login = wdi_login.WDLogin(WBUSER, WBPASS, mediawiki_api_url=api)

 ················


# define python functions

In [4]:
def createProperty(login=login, wdprop=None, lulabel="", enlabel="", frlabel="", delabel="", description="", property_datatype=""):
    if wdprop== None:
        s = []
    else:
        s = [wdi_core.WDUrl(wdprop, prop_nr="P1")]
    localEntityEngine = wdi_core.WDItemEngine.wikibase_item_engine_factory(api,sparql)
    item = localEntityEngine(data=s)
    #item.set_label(lulabel, lang="lu")
    item.set_label(enlabel, lang="en")
    item.set_label(delabel, lang="de")
    item.set_label(frlabel, lang="fr")
    item.set_description(description, lang="en")
    
    print(item.write(login, entity_type="property", property_datatype=property_datatype))

datatype_map = {'http://wikiba.se/ontology#CommonsMedia': 'commonsMedia',
                'http://wikiba.se/ontology#ExternalId': 'external-id',
                'http://wikiba.se/ontology#GeoShape': 'geo-shape',
                'http://wikiba.se/ontology#GlobeCoordinate': 'globe-coordinate',
                'http://wikiba.se/ontology#Math': 'math',
                'http://wikiba.se/ontology#Monolingualtext': 'monolingualtext',
                'http://wikiba.se/ontology#Quantity': 'quantity',
                'http://wikiba.se/ontology#String': 'string',
                'http://wikiba.se/ontology#TabularData': 'tabular-data',
                'http://wikiba.se/ontology#Time': 'time',
                '<http://wikiba.se/ontology#Edtf>': 'edtf',
                'http://wikiba.se/ontology#Url': 'url',
                'http://wikiba.se/ontology#WikibaseItem': 'wikibase-item',
                'http://wikiba.se/ontology#WikibaseProperty': 'wikibase-property'}

# predefined properties from rdfs, skos, etc

In [5]:
# skos:exact match
createProperty(login, lulabel="Ënnerklass vu(n)", 
                      enlabel="exact match",
                      frlabel="correspondance exacte",
                      delabel="exakte Übereinstimmung",
                      description="mapping",
                      property_datatype="url")
# instance of
createProperty(login, lulabel="", 
                      enlabel="instance of",
                      frlabel="instance de",
                      delabel="ist ein(e)",
                      property_datatype="wikibase-item")
# subclass of
## TODO work on multilangauge descriptions 
## 
createProperty(login, lulabel="Ënnerklass vu(n)", 
                      enlabel="subclass of",
                      frlabel="sous-classe de",
                      delabel="Unterklasse von",
                      property_datatype="wikibase-item")
#domain 
createProperty(login, lulabel="", 
                      enlabel="domain",
                      frlabel="domaine",
                      delabel="domain",
                      property_datatype="wikibase-item")
#range
createProperty(login, lulabel="", 
                      enlabel="range",
                      frlabel="intervalle",
                      delabel="reichweite",
                      property_datatype="wikibase-item")
#subPropertyOf
createProperty(login, lulabel="", 
                      enlabel="subPropertyOf",
                      frlabel="sous-propriété de",
                      delabel="untereigenschaft von",
                      property_datatype="wikibase-item")
#inverseOf
createProperty(login, lulabel="", 
                      enlabel="inverse of",
                      frlabel="inverse de",
                      delabel="invers von",
                      property_datatype="wikibase-item")

Please set P2302 and Q21502410 in your wikibase or set `core_props` manually.
Continuing with no core_props


P1


# Predefined classes

In [12]:
# class item
localEntityEngine = wdi_core.WDItemEngine.wikibase_item_engine_factory(api,sparql)
item = localEntityEngine(new_item=True)
item.set_label("Class", lang="en")
item.set_aliases(["Owl:Class"], lang="en")
item.write(login)

# property item
localEntityEngine = wdi_core.WDItemEngine.wikibase_item_engine_factory(api,sparql)
item = localEntityEngine(new_item=True)
item.set_label("Property", lang="en")
item.set_aliases(["owl:ObjectProperty"], lang="en")
item.write(login)

'Q1'

# Create Wikibase items for CIDOC-CRM classes (Erlanger)

In [17]:
query = "SELECT ?cidoc ?label WHERE {?cidoc rdf:type <http://www.w3.org/2002/07/owl#Class> ; rdfs:label ?label .}"
for row in cidoc.query(query):
    #print(str(row[0]), str(row[1]), " ".join(str(row[1]).split(" ")[1:]))
    statements=[]
    statements.append(wdi_core.WDItemID(value="Q1", prop_nr="P2"))
    statements.append(wdi_core.WDUrl(value=str(row[0]), prop_nr="P1"))
    item = localEntityEngine(new_item=True, data=statements)
    item.set_label(" ".join(str(row[1]).split(" ")), lang="en")
    #item.set_aliases([str(row[1]), str(row[1]).replace(" ", "_")], lang="en")
    print(item.write(login))
    

Q3
Q4
Q5
Q6
Q7
Q8
Q9
Q10
Q11
Q12
Q13
Q14
Q15
Q16
Q17
Q18
Q19
Q20
Q21
Q22
Q23
Q24
Q25
Q26
Q27
Q28
Q29
Q30
Q31
Q32
Q33
Q34
Q35
Q36
Q37
Q38
Q39
Q40
Q41
Q42
Q43
Q44
Q45
Q46
Q47
Q48
Q49
Q50
Q51
Q52
Q53
Q54
Q55
Q56
Q57
Q58
Q59
Q60
Q61
Q62
Q63
Q64
Q65
Q66
Q67
Q68
Q69
Q70
Q71
Q72
Q73


# Create Wikidata items for CIDOC CRM ObjectProperties (Erlanger)
# The properties are created as ITEMS on Wikibase

In [18]:
#propertyClasses
query = "SELECT ?cidoc ?label WHERE {?cidoc rdf:type <http://www.w3.org/2002/07/owl#ObjectProperty> ; rdfs:label ?label .}"
for row in cidoc.query(query):
    print(str(row[0]), str(row[1]), " ".join(str(row[1]).split(" ")[1:]))
    statements=[]
    statements.append(wdi_core.WDItemID(value="Q2", prop_nr="P2"))
    statements.append(wdi_core.WDUrl(value=str(row[0]), prop_nr="P1"))
    item = localEntityEngine(new_item=True, data=statements)
    item.set_label(" ".join(str(row[1]).split(" ")), lang="en")
    #item.set_aliases([str(row[1]), str(row[1]).replace(" ", "_")], lang="en")
    try:
        print(item.write(login))
    except:
        print("ERROR: "+str(row[1]))
        continue

http://erlangen-crm.org/current/P16i_was_used_for P16 was used for was used for
Q74
http://erlangen-crm.org/current/P130_shows_features_of P130 shows features of shows features of
Q75
http://erlangen-crm.org/current/P92i_was_brought_into_existence_by P92 was brought into existence by was brought into existence by
Q76
http://erlangen-crm.org/current/P35i_was_identified_by P35 was identified by was identified by
Q77
http://erlangen-crm.org/current/P106_is_composed_of P106 is composed of is composed of
Q78
http://erlangen-crm.org/current/P106i_forms_part_of P106 forms part of forms part of
Q79
http://erlangen-crm.org/current/P132_spatiotemporally_overlaps_with P132 overlaps with overlaps with
Q80
http://erlangen-crm.org/current/P96i_gave_birth P96 gave birth gave birth
Q81
http://erlangen-crm.org/current/P129i_is_subject_of P129 is subject of is subject of
Q82
http://erlangen-crm.org/current/P1i_identifies P1 identifies identifies
Q83
http://erlangen-crm.org/current/P174i_ends_after_the_s

In [32]:
import pprint
qid = dict()
prefix = entityUri.replace("entity","prop/direct")
query = "PREFIX wdt: <"+prefix+"> SELECT * WHERE {?item wdt:P1 ?uri .}"
for index, row in wdi_core.WDItemEngine.execute_sparql_query(query, as_dataframe = True, endpoint=sparql).iterrows():
    qid[row["uri"]] =row["item"].replace(entityUri, "")
pprint.pprint(qid)
print(query)

{'http://erlangen-crm.org/current/E10_Transfer_of_Custody': 'Q8',
 'http://erlangen-crm.org/current/E11_Modification': 'Q12',
 'http://erlangen-crm.org/current/E12_Production': 'Q43',
 'http://erlangen-crm.org/current/E13_Attribute_Assignment': 'Q47',
 'http://erlangen-crm.org/current/E14_Condition_Assessment': 'Q11',
 'http://erlangen-crm.org/current/E15_Identifier_Assignment': 'Q4',
 'http://erlangen-crm.org/current/E16_Measurement': 'Q73',
 'http://erlangen-crm.org/current/E17_Type_Assignment': 'Q70',
 'http://erlangen-crm.org/current/E18_Physical_Thing': 'Q38',
 'http://erlangen-crm.org/current/E19_Physical_Object': 'Q50',
 'http://erlangen-crm.org/current/E1_CRM_Entity': 'Q27',
 'http://erlangen-crm.org/current/E20_Biological_Object': 'Q7',
 'http://erlangen-crm.org/current/E21_Person': 'Q16',
 'http://erlangen-crm.org/current/E22_Human-Made_Object': 'Q72',
 'http://erlangen-crm.org/current/E24_Physical_Human-Made_Thing': 'Q37',
 'http://erlangen-crm.org/current/E25_Human-Made_Fea

## Add domain from CIDOC crm with local proparty

In [None]:
query = """SELECT DISTINCT ?cidoc ?domain  WHERE {
                 ?cidoc rdf:type <http://www.w3.org/2002/07/owl#ObjectProperty> ; 
                        rdfs:domain ?domain .}"""
for row in cidoc.query(query):
    try:
        print(qid[str(row["cidoc"])].replace(entityUri, ""))
        localEntityEngine = wdi_core.WDItemEngine.wikibase_item_engine_factory(api,sparql)
        statements=[]
        statements.append(wdi_core.WDItemID(value=qid[str(row["domain"])].replace(entityUri, ""), prop_nr="P4"))
        item = localEntityEngine(wd_item_id=qid[str(row["cidoc"])].replace(entityUri, ""), data=statements)
        print(item.write(login))
    except Exception as e:
        print(traceback.format_exc())
        continue

Q74
Q74
Q75
Q75
Q76
Q76
Q77
Q77
Q78
Q78
Q79
Q79
Q80
Q80
Q81
Q81
Q82
Q82
Q83
Q83
Q84
Q84
Q85
Q85
Q86
Q86
Q87
Q87
Q88
Q88
Q89
Q89
Q90
Q90
Q91
Q91
Q92
Q92
Q93
Q93
Q94
Q94
Q95
Q95
Q96
Q96
Q97
Q97
Q98
Q98
Traceback (most recent call last):
  File "<ipython-input-33-e0e944f3f573>", line 6, in <module>
    print(qid[str(row["cidoc"])].replace(entityUri, ""))
KeyError: 'http://erlangen-crm.org/current/P166_was_a_presence_of'

Q99
Q99
Q100
Q100
Q101
Q101
Q102
Q102
Q103
Q103
Q104
Q104
Q105
Q105
Q106
Q106
Q107
Q107
Q108
Q108
Q109
Q109
Q110
Q110
Q111
Q111
Q112
Q112
Q113


In [None]:
query = """SELECT DISTINCT ?cidoc ?range  WHERE {
                 ?cidoc rdf:type <http://www.w3.org/2002/07/owl#ObjectProperty> ; 
                        rdfs:range ?range .}"""
for row in cidoc.query(query):
    try:
        print(qid[str(row["cidoc"])])
        localEntityEngine = wdi_core.WDItemEngine.wikibase_item_engine_factory(api,sparql)
        statements=[]
        statements.append(wdi_core.WDItemID(value=qid[str(row["range"])].replace("http://safcidoc2.wiki.opencura.com/entity/", ""), prop_nr="P5"))
        item = localEntityEngine(wd_item_id=qid[str(row["cidoc"])].replace("http://safcidoc2.wiki.opencura.com/entity/", ""), data=statements)
        print(item.write(login))
    except:
        str(row["cidoc"])
        continue

In [None]:
query = """SELECT DISTINCT ?cidoc ?subPropertyOf  WHERE {
                 ?cidoc rdf:type <http://www.w3.org/2002/07/owl#ObjectProperty> ; 
                        rdfs:subPropertyOf ?subPropertyOf ;
                        rdfs:range ?range .}"""
for row in cidoc.query(query):
    try:
        print(qid[str(row["cidoc"])])
        localEntityEngine = wdi_core.WDItemEngine.wikibase_item_engine_factory(api,sparql)
        statements=[]
        statements.append(wdi_core.WDItemID(value=qid[str(row["subPropertyOf"])], prop_nr="P6"))
        item = localEntityEngine(wd_item_id=qid[str(row["cidoc"])], data=statements)
        print(item.write(login))
    except:
        continue

In [None]:
query = """SELECT DISTINCT ?cidoc ?inverseOf  WHERE {
                 ?cidoc rdf:type <http://www.w3.org/2002/07/owl#ObjectProperty> ; 
                        owl:inverseOf ?inverseOf ;
                        rdfs:range ?range .}"""
for row in cidoc.query(query):
    try:
        print(qid[str(row["cidoc"])])
        localEntityEngine = wdi_core.WDItemEngine.wikibase_item_engine_factory(api,sparql)
        statements=[]
        statements.append(wdi_core.WDItemID(value=qid[str(row["inverseOf"])], prop_nr="P7"))
        item = localEntityEngine(wd_item_id=qid[str(row["cidoc"])], data=statements)
        print(item.write(login))
    except:
        continue

# Import SAF properties as described in DM_SAF

## Import the Excel sheet that contains the LUX Saf view of CIDOC-CRM

In [None]:
model_def = pd.read_excel("DM_SAF_vers.1.1.xlsx", header=2)

## Data reformating

In [None]:
model_def["Data type"] = model_def["Data type"].str.replace("\ncoded / date time\n", "time")
model_def["Data type"] = model_def["Data type"].str.replace("none", "wikibase-item")
model_def["Data type"] = model_def["Data type"].str.replace("coded", "wikibase-item")

# extract property definitions
Here only the fields needed containing the mapping rules are being extracted. This is a step allowing close inspection, later it can be removed and the subsequent steps can be down directly on the source excel file

In [None]:
cleanedList = [x for x in set(model_def["English"].to_list()) if str(x) != 'nan']
cleanedList = list(cleanedList)
flat_model = model_def.loc[model_def["English"].isin(cleanedList)]
property_model = flat_model[["Deutsch", "English", "français", "Required : Y/N", "Repeated : Y/N", "Data type",  "Description - see document \"Data model - minimum set of fields: definitions\""]]
property_model.drop_duplicates()

wikibase_prop_model = property_model[["Deutsch", "English", "français", "Data type", "Description - see document \"Data model - minimum set of fields: definitions\""]].drop_duplicates()
wikibase_prop_model.sort_values(by=['English'])

## Create properties

In [None]:
def createProperty(login=login, wdprop=None, enlabel="", frlabel="", delabel="", description="", property_datatype=""):
    if wdprop== None:
        s = []
    else:
        s = [wdi_core.WDUrl(wdprop, prop_nr="P1")]
    localEntityEngine = wdi_core.WDItemEngine.wikibase_item_engine_factory(api,sparql)
    item = localEntityEngine(data=s)
    item.set_label(enlabel, lang="en")
    item.set_label(delabel, lang="de")
    item.set_label(frlabel, lang="fr")
    item.set_description(description, lang="en")
    
    print(item.write(login, entity_type="property", property_datatype=property_datatype))

datatype_map = {'http://wikiba.se/ontology#CommonsMedia': 'commonsMedia',
                'http://wikiba.se/ontology#ExternalId': 'external-id',
                'http://wikiba.se/ontology#GeoShape': 'geo-shape',
                'http://wikiba.se/ontology#GlobeCoordinate': 'globe-coordinate',
                'http://wikiba.se/ontology#Math': 'math',
                'http://wikiba.se/ontology#Monolingualtext': 'monolingualtext',
                'http://wikiba.se/ontology#Quantity': 'quantity',
                'http://wikiba.se/ontology#String': 'string',
                'http://wikiba.se/ontology#TabularData': 'tabular-data',
                'http://wikiba.se/ontology#Time': 'time',
                'http://wikiba.se/ontology#Url': 'url',
                'http://wikiba.se/ontology#WikibaseItem': 'wikibase-item',
                'http://wikiba.se/ontology#WikibaseProperty': 'wikibase-property'}

### ADD EDTF!!!!!!!

createProperty(login, enlabel="property in wikidata", description="The same property in Wikidata", property_datatype="url")
createProperty(login, enlabel="concept in wikidata", description="The same concept in Wikidata", property_datatype="url")
for index, row in wikibase_prop_model.iterrows():
    if row["Data type"] in datatype_map.values():
        createProperty(login, enlabel=row["English"], frlabel=row["français"], delabel=row["Deutsch"], description="Lux SAF Property", property_datatype=row["Data type"]) 