# Import necessary python libraries

In [16]:
from wikidataintegrator import wdi_core, wdi_login
from getpass import getpass
import pandas as pd
import rdflib
import sys
import traceback

In [2]:
cidoc = rdflib.Graph()
cidoc.load("https://raw.githubusercontent.com/erlangen-crm/ecrm/master/ecrm_current.owl", format="xml")

# Login to Wikibase

In [34]:
wikibase = "https://cidocoption1a.wiki.opencura.com/"
api = "https://cidocoption1a.wiki.opencura.com/w/api.php"
sparql = "https://cidocoption1a.wiki.opencura.com/query/sparql"
entityUri = wikibase.replace("https:", "http:")+"entity/"

WBUSER = "Andrawaag"
WBPASS = getpass()
login = wdi_login.WDLogin(WBUSER, WBPASS, mediawiki_api_url=api)

········


# define python functions

In [None]:
def createProperty(login=login, wdprop=None, lulabel="", enlabel="", frlabel="", delabel="", description="", property_datatype=""):
    if wdprop== None:
        s = []
    else:
        s = [wdi_core.WDUrl(wdprop, prop_nr="P1")]
    localEntityEngine = wdi_core.WDItemEngine.wikibase_item_engine_factory(api,sparql)
    item = localEntityEngine(data=s)
    #item.set_label(lulabel, lang="lu")
    item.set_label(enlabel, lang="en")
    item.set_label(delabel, lang="de")
    item.set_label(frlabel, lang="fr")
    item.set_description(description, lang="en")
    
    print(item.write(login, entity_type="property", property_datatype=property_datatype))

datatype_map = {'http://wikiba.se/ontology#CommonsMedia': 'commonsMedia',
                'http://wikiba.se/ontology#ExternalId': 'external-id',
                'http://wikiba.se/ontology#GeoShape': 'geo-shape',
                'http://wikiba.se/ontology#GlobeCoordinate': 'globe-coordinate',
                'http://wikiba.se/ontology#Math': 'math',
                'http://wikiba.se/ontology#Monolingualtext': 'monolingualtext',
                'http://wikiba.se/ontology#Quantity': 'quantity',
                'http://wikiba.se/ontology#String': 'string',
                'http://wikiba.se/ontology#TabularData': 'tabular-data',
                'http://wikiba.se/ontology#Time': 'time',
                '<http://wikiba.se/ontology#Edtf>': 'edtf',
                'http://wikiba.se/ontology#Url': 'url',
                'http://wikiba.se/ontology#WikibaseItem': 'wikibase-item',
                'http://wikiba.se/ontology#WikibaseProperty': 'wikibase-property'}

# predefined properties from rdfs, skos, etc

In [None]:
# skos:exact match
createProperty(login, lulabel="Ënnerklass vu(n)", 
                      enlabel="exact match",
                      frlabel="correspondance exacte",
                      delabel="exakte Übereinstimmung",
                      description="mapping",
                      property_datatype="url")
# instance of
createProperty(login, lulabel="", 
                      enlabel="instance of",
                      frlabel="instance de",
                      delabel="ist ein(e)",
                      property_datatype="wikibase-item")
# subclass of
## TODO work on multilangauge descriptions 
## 
createProperty(login, lulabel="Ënnerklass vu(n)", 
                      enlabel="subclass of",
                      frlabel="sous-classe de",
                      delabel="Unterklasse von",
                      property_datatype="wikibase-item")
#domain 
createProperty(login, lulabel="", 
                      enlabel="domain",
                      frlabel="domaine",
                      delabel="domain",
                      property_datatype="wikibase-item")
#range
createProperty(login, lulabel="", 
                      enlabel="range",
                      frlabel="intervalle",
                      delabel="reichweite",
                      property_datatype="wikibase-item")
#subPropertyOf
createProperty(login, lulabel="", 
                      enlabel="subPropertyOf",
                      frlabel="sous-propriété de",
                      delabel="untereigenschaft von",
                      property_datatype="wikibase-item")
#inverseOf
createProperty(login, lulabel="", 
                      enlabel="inverse of",
                      frlabel="inverse de",
                      delabel="invers von",
                      property_datatype="wikibase-item")

# Predefined classes

In [None]:
# class item
localEntityEngine = wdi_core.WDItemEngine.wikibase_item_engine_factory(api,sparql)
item = localEntityEngine(new_item=True)
item.set_label("Class", lang="en")
item.set_aliases(["Owl:Class"], lang="en")
item.write(login)

# property item
item = localEntityEngine(new_item=True)
item.set_label("Property", lang="en")
item.set_aliases(["owl:ObjectProperty"], lang="en")
item.write(login)

In [None]:
qid = dict()

# Create Wikibase items for CIDOC-CRM classes (Erlanger)

In [None]:
qid = dict()
query = "SELECT ?cidoc ?label WHERE {?cidoc rdf:type <http://www.w3.org/2002/07/owl#Class> ; rdfs:label ?label .}"
for row in cidoc.query(query):
    #print(str(row[0]), str(row[1]), " ".join(str(row[1]).split(" ")[1:]))
    try:
        statements=[]
        statements.append(wdi_core.WDItemID(value="Q1", prop_nr="P2"))
        statements.append(wdi_core.WDUrl(value=str(row[0]), prop_nr="P1"))
        item = localEntityEngine(new_item=True, data=statements)
        item.set_label(" ".join(str(row[1]).split(" ")), lang="en")
        #item.set_aliases([str(row[1]), str(row[1]).replace(" ", "_")], lang="en")
        print(item.write(login))
    except:
        print("tjoep")
    

# Create Wikidata items for CIDOC CRM ObjectProperties (Erlanger)
# The properties are created as ITEMS on Wikibase

In [None]:
import pprint

prefix = entityUri.replace("entity","prop/direct")
query = "PREFIX wdt: <"+prefix+"> SELECT * WHERE {?item wdt:P1 ?uri .}"
for index, row in wdi_core.WDItemEngine.execute_sparql_query(query, as_dataframe = True, endpoint=sparql).iterrows():
    qid[row["uri"]] =row["item"].replace(entityUri, "")
pprint.pprint(qid)
print(query)

In [None]:
localEntityEngine = wdi_core.WDItemEngine.wikibase_item_engine_factory(api,sparql)
#propertyClasses
query = "SELECT ?cidoc ?label WHERE {?cidoc rdf:type <http://www.w3.org/2002/07/owl#ObjectProperty> ; rdfs:label ?label .}"
for row in cidoc.query(query):
    if str(row[0]) not in qid.keys():
        print(row[0])
        print(str(row[0]), str(row[1]), " ".join(str(row[1]).split(" ")[1:]))
        statements=[]
        statements.append(wdi_core.WDItemID(value="Q2", prop_nr="P2"))
        statements.append(wdi_core.WDUrl(value=str(row[0]), prop_nr="P1"))
        item = localEntityEngine(new_item=True, data=statements)
        item.set_label(" ".join(str(row[1]).split(" ")), lang="en")
        #item.set_aliases([str(row[1]), str(row[1]).replace(" ", "_")], lang="en")
        try:
            print(item.write(login))
        except:
            print("ERROR: "+str(row[1]))
            continue
    else:
        print("correct")

## Add domain from CIDOC crm with local proparty

In [None]:
query = """SELECT DISTINCT ?cidoc ?domain  WHERE {
                 ?cidoc rdf:type <http://www.w3.org/2002/07/owl#ObjectProperty> ; 
                        rdfs:domain ?domain .}"""
for row in cidoc.query(query):
    try:
        print(qid[str(row["cidoc"])].replace(entityUri, ""))
        localEntityEngine = wdi_core.WDItemEngine.wikibase_item_engine_factory(api,sparql)
        statements=[]
        statements.append(wdi_core.WDItemID(value=qid[str(row["domain"])].replace(entityUri, ""), prop_nr="P4"))
        item = localEntityEngine(wd_item_id=qid[str(row["cidoc"])].replace(entityUri, ""), data=statements)
        print(item.write(login))
    except Exception as e:
        print(traceback.format_exc())
        continue

## Add range from CIDOC crm with local proparty

In [None]:
query = """SELECT DISTINCT ?cidoc ?range  WHERE {
                 ?cidoc rdf:type <http://www.w3.org/2002/07/owl#ObjectProperty> ; 
                        rdfs:range ?range .}"""
for row in cidoc.query(query):
    try:
        print(qid[str(row["cidoc"])])
        localEntityEngine = wdi_core.WDItemEngine.wikibase_item_engine_factory(api,sparql)
        statements=[]
        statements.append(wdi_core.WDItemID(value=qid[str(row["range"])].replace("http://safcidoc2.wiki.opencura.com/entity/", ""), prop_nr="P5"))
        item = localEntityEngine(wd_item_id=qid[str(row["cidoc"])].replace("http://safcidoc2.wiki.opencura.com/entity/", ""), data=statements)
        print(item.write(login))
    except:
        str(row["cidoc"])
        continue

In [None]:
query = """SELECT DISTINCT ?cidoc ?subPropertyOf  WHERE {
                 ?cidoc rdf:type <http://www.w3.org/2002/07/owl#ObjectProperty> ; 
                        rdfs:subPropertyOf ?subPropertyOf ;
                        rdfs:range ?range .}"""
for row in cidoc.query(query):
    try:
        print(qid[str(row["cidoc"])])
        localEntityEngine = wdi_core.WDItemEngine.wikibase_item_engine_factory(api,sparql)
        statements=[]
        statements.append(wdi_core.WDItemID(value=qid[str(row["subPropertyOf"])], prop_nr="P6"))
        item = localEntityEngine(wd_item_id=qid[str(row["cidoc"])], data=statements)
        print(item.write(login))
    except:
        continue

In [None]:
query = """SELECT DISTINCT ?cidoc ?inverseOf  WHERE {
                 ?cidoc rdf:type <http://www.w3.org/2002/07/owl#ObjectProperty> ; 
                        owl:inverseOf ?inverseOf ;
                        rdfs:range ?range .}"""
for row in cidoc.query(query):
    try:
        print(qid[str(row["cidoc"])])
        localEntityEngine = wdi_core.WDItemEngine.wikibase_item_engine_factory(api,sparql)
        statements=[]
        statements.append(wdi_core.WDItemID(value=qid[str(row["inverseOf"])], prop_nr="P7"))
        item = localEntityEngine(wd_item_id=qid[str(row["cidoc"])], data=statements)
        print(item.write(login))
    except:
        continue

# Import SAF properties as described in DM_SAF

## Import the Excel sheet that contains the LUX Saf view of CIDOC-CRM

In [3]:
model_def = pd.read_excel("DM_SAF_vers.1.1.3.xlsx", header=1)
model_def

Unnamed: 0,Field - AF,Deutsch,English,français,Domain,Property,Range,Required : Y/N,Repeated : Y/N,Data type,...,Public display,Example 1,Example 2,Unnamed: 20,Unnamed: 21,Unnamed: 22,Note,Minimum set of fields,Notes,Unnamed: 26
0,Name,namen,name,nom,E21 Person,is a subclass of,E39 Actor,,,monolingualtext,...,,,,,,,/,/,,
1,Type,Namensformat,name format,format du nom,E90 Symbolic Object,P2 has type,E55 Type,Y,N,wikibase-item,...,,1,0,,,,CL3 -final,Y,,
2,Numeration,Nummerierung,numeration,numeration,E21 Person,P1 is identified by,E82 Actor appellation,Y,N,wikibase-item,...,,,,,,,/,/,,
3,Title,Titel,title,titre,E21 Person,P1 is identified by,E82 Actor appellation,Y,N,monolingualtext,...,,,,,,,/,/,,
4,Birth date,Geburtsdatum,date of birth,date de naissance,E21 Person,P98 was born,E67 Birth,Y,N,edtf,...,,,,,,,/,/,,
5,Birth place,Geburtsort,place of birth,place de naissance,E21 Person,P98i was born,E67 Birth,N,N,edtf,...,,,,,,,/,/,,
6,Death date,Sterbedatum,date of death,date de décès,E21 Person,P100i died in,E69 Death,N,N,edtf,...,,,,,,,/,/,,
7,Death place,Ort des Todes,place of death,place de naissance,E21 Person,P100 died in,E69 Death,N,N,edtf,...,,,,,,,/,/,,
8,Gender,Geschlecht,gender,sexe,E21 Person,P2 has type,E55 Type,Y,N,wikibase-item,...,,1,2,,,,CL4,Y,,
9,Profession - type,Art des Berufs,type of profession,type de profession,E21 Person,P14i performed,E7 Activity,N,Y,monolingualtext,...,,,,,,,/,/,,


# extract property definitions
Here only the fields needed containing the mapping rules are being extracted. This is a step allowing close inspection, later it can be removed and the subsequent steps can be down directly on the source excel file

In [None]:
cleanedList = [x for x in set(model_def["English"].to_list()) if str(x) != 'nan']
cleanedList = list(cleanedList)
flat_model = model_def.loc[model_def["English"].isin(cleanedList)]
property_model = flat_model[["Deutsch", "English", "français", "Required : Y/N", "Repeated : Y/N", "Data type",  "Description - see document \"Data model - minimum set of fields: definitions\""]]
property_model.drop_duplicates()

wikibase_prop_model = property_model[["Deutsch", "English", "français", "Data type", "Description - see document \"Data model - minimum set of fields: definitions\""]].drop_duplicates()
wikibase_prop_model.sort_values(by=['English'])

## Create properties

In [None]:
property_model["English"].unique()
for index, row in wikibase_prop_model.iterrows():
    if row["Data type"] in datatype_map.values():
        try: 
            print(row["English"])
            createProperty(login, enlabel=row["English"], frlabel=row["français"], delabel=row["Deutsch"], description="Lux SAF Property", property_datatype=row["Data type"])
        except:
            print("tjope")

# Create Numerals

In [4]:
from collections import OrderedDict

def write_roman(num):

    roman = OrderedDict()
    roman[1000] = "M"
    roman[900] = "CM"
    roman[500] = "D"
    roman[400] = "CD"
    roman[100] = "C"
    roman[90] = "XC"
    roman[50] = "L"
    roman[40] = "XL"
    roman[10] = "X"
    roman[9] = "IX"
    roman[5] = "V"
    roman[4] = "IV"
    roman[1] = "I"

    def roman_num(num):
        for r in roman.keys():
            x, y = divmod(num, r)
            yield roman[r] * x
            num -= (r * x)
            if num <= 0:
                break

    return "".join([a for a in roman_num(num)])

In [5]:
write_roman(123)

'CXXIII'

In [None]:
for i in range(2021):
    statements = []
    statements.append(wdi_core.WDItemID(value="Q338", prop_nr="P2"))
    numeration_item = wdi_core.WDItemEngine(new_item=True, data=statements, mediawiki_api_url=api, sparql_endpoint_url=sparql)
    numeration_item.set_label(write_roman(i), lang="en")
    numeration_item.set_label(write_roman(i), lang="fr")
    numeration_item.set_label(write_roman(i), lang="de")
    print(i, write_roman(i), numeration_item.write(login))

0  Q339
1 I Q340
2 II Q341
3 III Q342
4 IV Q343
5 V Q344
6 VI Q345
7 VII Q346
8 VIII Q347
9 IX Q348
10 X Q349
11 XI Q350
12 XII Q351
13 XIII Q352
14 XIV Q353
15 XV Q354
16 XVI Q355
17 XVII Q356
18 XVIII Q357
19 XIX Q358
20 XX Q359
21 XXI Q360
22 XXII Q361
23 XXIII Q362
24 XXIV Q363
25 XXV Q364
26 XXVI Q365
27 XXVII Q366
28 XXVIII Q367
29 XXIX Q368
30 XXX Q369
31 XXXI Q370
32 XXXII Q371
33 XXXIII Q372
34 XXXIV Q373
35 XXXV Q374
36 XXXVI Q375
37 XXXVII Q376
38 XXXVIII Q377
39 XXXIX Q378
40 XL Q379
41 XLI Q380
42 XLII Q381
43 XLIII Q382
44 XLIV Q383
45 XLV Q384
46 XLVI Q385
47 XLVII Q386
48 XLVIII Q387
49 XLIX Q388
50 L Q389
51 LI Q390
52 LII Q391
53 LIII Q392
54 LIV Q393
55 LV Q394
56 LVI Q395
57 LVII Q396
58 LVIII Q397
59 LIX Q398
60 LX Q399
61 LXI Q400
62 LXII Q401
63 LXIII Q402
64 LXIV Q403
65 LXV Q404
66 LXVI Q405
67 LXVII Q406
68 LXVIII Q407
69 LXIX Q408
70 LXX Q409
71 LXXI Q410
72 LXXII Q411
73 LXXIII Q412
74 LXXIV Q413
75 LXXV Q414
76 LXXVI Q415
77 LXXVII Q416
78 LXXVIII Q417
79 L

536 DXXXVI Q875
537 DXXXVII Q876
538 DXXXVIII Q877
539 DXXXIX Q878
540 DXL Q879
541 DXLI Q880
542 DXLII Q881
543 DXLIII Q882
544 DXLIV Q883
545 DXLV Q884
546 DXLVI Q885
547 DXLVII Q886
548 DXLVIII Q887
549 DXLIX Q888
550 DL Q889
551 DLI Q890
552 DLII Q891
553 DLIII Q892
554 DLIV Q893
555 DLV Q894
556 DLVI Q895
557 DLVII Q896
558 DLVIII Q897
559 DLIX Q898
560 DLX Q899
561 DLXI Q900
562 DLXII Q901
563 DLXIII Q902
564 DLXIV Q903
565 DLXV Q904
566 DLXVI Q905
567 DLXVII Q906
568 DLXVIII Q907
569 DLXIX Q908
570 DLXX Q909
571 DLXXI Q910
572 DLXXII Q911
573 DLXXIII Q912
574 DLXXIV Q913
575 DLXXV Q914
576 DLXXVI Q915
577 DLXXVII Q916
578 DLXXVIII Q917
579 DLXXIX Q918
580 DLXXX Q919
581 DLXXXI Q920
582 DLXXXII Q921
583 DLXXXIII Q922
584 DLXXXIV Q923
585 DLXXXV Q924
586 DLXXXVI Q925
587 DLXXXVII Q926
588 DLXXXVIII Q927
589 DLXXXIX Q928
590 DXC Q929
591 DXCI Q930
592 DXCII Q931
593 DXCIII Q932
594 DXCIV Q933
595 DXCV Q934
596 DXCVI Q935
597 DXCVII Q936
598 DXCVIII Q937
599 DXCIX Q938
600 DC Q939
6

In [21]:
dir(login)

['__class__',
 '__delattr__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__le__',
 '__lt__',
 '__module__',
 '__ne__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__sizeof__',
 '__str__',
 '__subclasshook__',
 '__weakref__',
 'callback_url',
 'consumer_key',
 'consumer_secret',
 'continue_oauth',
 'edit_token',
 'generate_edit_credentials',
 'generate_rollback_credentials',
 'get_edit_cookie',
 'get_edit_token',
 'get_rollback_token',
 'get_session',
 'instantiation_time',
 'mediawiki_api_url',
 'mediawiki_index_url',
 'response_qs',
 'rollback_token',
 's',
 'token_renew_period',
 'user',
 'user_agent']

In [27]:
login.get_edit_token()

'a719e5c3d294d6d8f2666d531da3532f60a6c77f+\\'

In [28]:
login.get_rollback_token()

'7ece8e53c287be4f7d3730a1a985391160a6c9d8+\\'