In [2]:
#!/usr/bin/env python
# coding: utf-8

# # Notebook to create a new LUX-SAF Wikibase instance
# This script builds on an excel file that is provided by the data modellers. That excel file **TODO** Provide a link to repository containing the latest version(s).
# That file contains the CIDOC-CRM representation of the SAF with mappings to the perceived Wikibase(Qualifier).

from wikidataintegrator import wdi_core, wdi_login, wdi_config
from getpass import getpass
import pandas as pd
import os

wikibase = "http://localhost:8080"
api = "http://localhost:8080/w/api.php"
sparql = "http://localhost:8282/proxy/wdqs/bigdata/namespace/wdq/sparql"
#entityUri = wikibase.replace("https:", "http:")+"entity/"
entityUri = "http://mediawiki.svc/entity/"  # this is used to strip it from results! 

WBUSER = "SafAdmin"
#WBPASS = getpass()
WBPASS = "ExampleAdminPassword"
login = wdi_login.WDLogin(WBUSER, WBPASS, mediawiki_api_url=api)



In [3]:

localEntityEngine = wdi_core.WDItemEngine.wikibase_item_engine_factory(api, sparql)

## Function to create basic properties
def createProperty(login=login, wdprop=None, lulabel="", enlabel="", frlabel="", delabel="", description="", property_datatype=""):
    if wdprop == None:
        s = []
    else:
        s = [wdi_core.WDUrl(wdprop, prop_nr="P1")]
    localEntityEngine = wdi_core.WDItemEngine.wikibase_item_engine_factory(api,sparql)
    item = localEntityEngine(data=s)
    if lulabel != "":
        item.set_label(lulabel, lang="lb")
    item.set_label(enlabel, lang="en")
    item.set_label(delabel, lang="de")
    item.set_label(frlabel, lang="fr")
    item.set_description(description, lang="en")
    print(item.write(login, entity_type="property", property_datatype=property_datatype))


# # OWL properties to capture CIDOC-CRM

## DR: First we create the main properties (These will become P1, P2, etc.)
## This will fail if properties with the same label aalready exist!

# instance of
createProperty(login, lulabel="ass eng",
                      enlabel="instance of",
                      frlabel="instance de",
                      delabel="ist ein(e)",
                      property_datatype="wikibase-item")

# subclass of
createProperty(login, lulabel="Ënnerklass vu(n)",
                      enlabel="subclass of",
                      frlabel="sous-classe de",
                      delabel="Unterklasse von",
                      property_datatype="wikibase-item")
# skos:exact match
createProperty(login, lulabel="genauen Match",
                      enlabel="exact match",
                      frlabel="correspondance exacte",
                      delabel="exakte Übereinstimmung",
                      description="mapping",
                      property_datatype="url")
#domain
createProperty(login, lulabel="domain",
                      enlabel="domain",
                      frlabel="domaine",
                      delabel="domain",
                      property_datatype="wikibase-item")
#range
createProperty(login, lulabel="reechwäit",
                      enlabel="range",
                      frlabel="intervalle",
                      delabel="reichweite",
                      property_datatype="wikibase-item")
#subPropertyOf
createProperty(login, lulabel="Ënnerbesëtz vun",
                      enlabel="subproperty of",
                      frlabel="sous-propriété de",
                      delabel="untereigenschaft von",
                      property_datatype="wikibase-item")
#inverseOf
createProperty(login, lulabel="invers vun",
                      enlabel="inverse of",
                      frlabel="inverse de",
                      delabel="invers von",
                      property_datatype="wikibase-item")



Error while writing to Wikidata


Please set P2302 and Q21502410 in your wikibase or set `core_props` manually.
Continuing with no core_props


WDApiError: {'error': {'code': 'modification-failed', 'info': 'Property [[Property:P1|P1]] already has label "ass eng" associated with language code lb.', 'messages': [{'name': 'wikibase-validator-label-conflict', 'parameters': ['ass eng', 'lb', '[[Property:P1|P1]]'], 'html': {'*': 'Property <a href="/wiki/Property:P1" title="Property:P1">P1</a> already has label "ass eng" associated with language code lb.'}}], '*': 'See http://localhost:8080/w/api.php for API usage. Subscribe to the mediawiki-api-announce mailing list at &lt;https://lists.wikimedia.org/mailman/listinfo/mediawiki-api-announce&gt; for notice of API deprecations and breaking changes.'}}

Once they have been created, we fetch them from the WDQS, so that we may learn their prop (e.g. "P1") and corresponding label "e.g. 'instance of'"

Q: Why do we set propertyID? it's never used afterwards!
A: Well, in general, this looks more like a check than anything else

In [4]:

## This will be a list like:
##
# prop	propLabel
# http://mediawiki.svc/entity/P1	instance of
# http://mediawiki.svc/entity/P2	subclass of
# See http://localhost:8080/wiki/Special:ListProperties

propertyID = dict()
query = """SELECT ?prop ?propLabel WHERE {
  ?prop wikibase:directClaim ?wdt .
  SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
}"""
for index, row in wdi_core.WDItemEngine.execute_sparql_query(query, as_dataframe = True, endpoint=sparql).iterrows():
    print(row["prop"].replace(entityUri, ""), row["propLabel"])
    propertyID[row["propLabel"]] = row["prop"].replace(entityUri, "")


P1 instance of
P2 subclass of
P3 exact match
P4 domain
P5 range
P6 subproperty of
P7 inverse of
P8 name
P9 name format
P10 numeration
P11 title
P12 source of information - text
P13 type of source
P14 superclass of
P16 source of information - url
P17 alternative name
P18 date of birth
P19 place of birth
P20 date of death
P21 place of death
P22 gender
P23 profession
P24 date of beginning
P25 end date
P26 activity
P27 internal note
P28 public note
P29 AFL identifier
P30 creator
P31 institutional affiliation
P32 date of creation
P33 date of modification
P34 editor
P35 status
P36 ISNI
P37 VIAF
P38 GND
P39 WikiData
P40 Getty
P75 ARK


In [7]:
## DR: We additionally create two items named "Class" (Q1) and "Property" (Q2)
# Note: This will, however create duplicates. --> Setting new_item to False doesn't change that fact

# class item
localEntityEngine = wdi_core.WDItemEngine.wikibase_item_engine_factory(api, sparql)
item = localEntityEngine(new_item=True)
item.set_label("Class", lang="en")
item.set_aliases(["Owl:Class"], lang="en")
item.write(login)

# property item
localEntityEngine = wdi_core.WDItemEngine.wikibase_item_engine_factory(api, sparql)
item = localEntityEngine(new_item=True)
item.set_label("Property", lang="en")
item.set_aliases(["owl:ObjectProperty"], lang="en")
item.write(login)


'Q501'

In [14]:
# # import data from XLSX (the DataModel for the SAF)
model_def = pd.read_excel("DM_SAF_vers.1.0.2_andra.xlsx", header=1)

# # Read the property definitions from the DM_SAF and create them.
## Some properties, such as "subclass of" defined in the DM have already been created previously

for index, row in model_def.iterrows():
    if row["Data type"].strip() in wdi_config.property_value_types.keys():
        print('Creating a property of type "{}" with label "{}"'.format(row["Data type"], row["English"].strip()))
        try:
            createProperty(
                login,
                enlabel=row["English"],
                frlabel=row["français"],
                delabel=row["Deutsch"],
                description="Lux SAF Property",
                property_datatype=row["Data type"].strip()
            )
        except Exception as e:
            print('Failed creating property for "{}". Error: "{}"'.format(row["English"], e))
    else:
        print('Error: Unsupported data type "{}"'.format(row["Data type"]))


Creating a property of type "string" with label "name"
Error while writing to Wikidata


TypeError: 'WDApiError' object is not subscriptable

In [None]:

# # Create the controlled lists as described in DM_SAF
# ## CL4 Gender

CL4 = pd.read_excel("DM_SAF_vers.1.0.2_andra.xlsx", sheet_name="CL4 GENDER")
for index, row in CL4.iterrows():
    print(row["Label (English)"])
    item = localEntityEngine(new_item=True)
    item.set_label(row["Label (English)"], lang="en")
    item.set_label(row["Label (German)"], lang="de")
    item.set_label(row["Label (French)"], lang="fr")
    print(item.write(login))


# ## CL5 STATUS

CL5 = pd.read_excel("DM_SAF_vers.1.0.2_andra.xlsx", sheet_name="CL5 STATUS")
for index, row in CL5.iterrows():
    print(row["Label (English)"])
    item = localEntityEngine(new_item=True)
    item.set_label(row["Label (English)"], lang="en")
    item.set_label(row["Label (German)"], lang="de")
    item.set_label(row["Label (French)"], lang="fr")
    print(item.write(login))

# ## CL3 Name Format

CL3 = pd.read_excel("DM_SAF_vers.1.0.2_andra.xlsx", sheet_name="CL3 Name Format")
for index, row in CL3.iterrows():
    item = localEntityEngine(new_item=True)
    item.set_label(row["Cataloging specs"])
    print(item.write(login))


# # manually added external identifiers not yet covered in DMG

#ARK
createProperty(login, 
    lulabel="ARK",
    enlabel="ARK",
    frlabel="ARK",
    delabel="ARK",
    property_datatype="url"
)


## DR: Now we create an item (Q?) with the label E21 (this should probably not happen here, but in the cidoc-crm importer)
# person_item = localEntityEngine(new_item=True)
# person_item.set_label("E21 Person", lang="en")
# print(person_item.write(login))

# Results

At the end of this process, we end up with:
- a list of Properties (P1 - P75), 
- two items, Class (Owl:Class, Q1) and Property (Owl:ObjectProperty, Q2),
- as well as items for Gender (male Q5, female Q6), Status and Name formats