In [None]:

# write to wikibase

# a. install wikibase docker image (https://github.com/wmde/wikibase-docker) on digitalocean droplet.
# b. or wipe previous install: docker-compose down --volumes
# c. bring online: docker-compose up -d
# d. run account create function.
# e. updated localsettings: docker cp LocalSettings.php wikibase-docker_wikibase_1:/var/www/html/.
# f. copy icon: docker cp icon-small.jpg wikibase-docker_wikibase_1:/var/www/html/images/.
# g. restart base: docker-compose -f docker-compose.yml restart wikibase


In [None]:

# import libraries

from wikibaseintegrator import wbi_core, wbi_login
from IPython.display import clear_output
import pathlib, requests, datetime, rdflib
import pydash, time


In [None]:

# generate writing account

def generate_account(password, user, address):
    
    # shamelessly adapted from here https://www.mediawiki.org/wiki/API:Account_creation

    S = requests.Session()
    wikiurl = f"http://{address}:8181"
    endpoint = wikiurl + "/w/api.php"
    PARAMS_0 = {'action':"query",'meta':"tokens",'type':"createaccount",'format':"json"}
    R = S.get(url=endpoint, params=PARAMS_0)
    DATA = R.json()
    TOKEN = DATA['query']['tokens']['createaccounttoken']
    PARAMS_1 = {'action': "createaccount",'createtoken': TOKEN,'username': user,
        'password': password,'retype': password,'createreturnurl': wikiurl,'format': "json"}
    R = S.post(endpoint, data=PARAMS_1)
    print(R.json(), datetime.datetime.now())

with open(pathlib.Path.home() / 'wikibase_password.md') as motdepasse:
    motdepasse = motdepasse.read().replace('\n','')        

username = 'paulduchesne'
wikibase_url = '167.99.135.149'    
generate_account(motdepasse, username, wikibase_url)


In [None]:

# define graph and namespace

graph = rdflib.Graph()
name_wb = rdflib.Namespace('http://wikibas.se/ontology')
name_fiaf = rdflib.Namespace('https://www.fiafnet.org/')


In [None]:

# write properties

def write_property(label, datatype, api_url):
    localEntityEngine = wbi_core.ItemEngine.wikibase_item_engine_factory(mediawiki_api_url=api_url)
    item = localEntityEngine(data=[])
    if len(label):
        for n, k in enumerate(label):
            item.set_label(label[n], lang=label[n].language)  # no, go looking for a label!!
    item.write(login_instance, entity_type="property", property_datatype=datatype)
    return item.item_id

login_instance = wbi_login.Login(user=username, pwd=motdepasse, mediawiki_api_url=f'http://{wikibase_url}:8181/w/api.php')

data = pathlib.Path.cwd().resolve().parents[0] / '3-merge' / 'merge.ttl'
graph.parse(str(data), format="ttl")
    
properties = pydash.uniq([b for a,b,c in graph])

for y in [name_wb['#reference'], name_wb['#claim'], name_wb['#qualifier'], rdflib.RDFS.label]:
    properties = [x for x in properties if x != y]

prop_order = [
    rdflib.term.URIRef('https://www.fiafnet.org/ontology/property/instance_of'),
    rdflib.term.URIRef('https://www.fiafnet.org/ontology/property/work_type'),
    rdflib.term.URIRef('https://www.fiafnet.org/ontology/property/title'),
    rdflib.term.URIRef('https://www.fiafnet.org/ontology/property/forename'), 
    rdflib.term.URIRef('https://www.fiafnet.org/ontology/property/surname'), 
    rdflib.term.URIRef('https://www.fiafnet.org/ontology/property/gender'),   
    rdflib.term.URIRef('https://www.fiafnet.org/ontology/property/production_country'),              
    rdflib.term.URIRef('https://www.fiafnet.org/ontology/property/event'),
    rdflib.term.URIRef('https://www.fiafnet.org/ontology/property/agent'),               
    rdflib.term.URIRef('https://www.fiafnet.org/ontology/property/item_of'),
    rdflib.term.URIRef('https://www.fiafnet.org/ontology/property/held_at'),              
    rdflib.term.URIRef('https://www.fiafnet.org/ontology/property/manifestation'),  
    rdflib.term.URIRef('https://www.fiafnet.org/ontology/property/manifestation_of'),               
    rdflib.term.URIRef('https://www.fiafnet.org/ontology/property/item'),     
    rdflib.term.URIRef('https://www.fiafnet.org/ontology/property/work'),                   
    rdflib.term.URIRef('https://www.fiafnet.org/ontology/property/access'),      
    rdflib.term.URIRef('https://www.fiafnet.org/ontology/property/carrier'),               
    rdflib.term.URIRef('https://www.fiafnet.org/ontology/property/specific_carrier'), 
    rdflib.term.URIRef('https://www.fiafnet.org/ontology/property/element'),               
    rdflib.term.URIRef('https://www.fiafnet.org/ontology/property/base'),                              
    rdflib.term.URIRef('https://www.fiafnet.org/ontology/property/colour'),              
    rdflib.term.URIRef('https://www.fiafnet.org/ontology/property/sound'),  
    rdflib.term.URIRef('https://www.fiafnet.org/ontology/property/sound_format'),      
    rdflib.term.URIRef('https://www.fiafnet.org/ontology/property/maintitles_language'),  
    rdflib.term.URIRef('https://www.fiafnet.org/ontology/property/intertitles_language'),               
    rdflib.term.URIRef('https://www.fiafnet.org/ontology/property/extent_metres'),  
    rdflib.term.URIRef('https://www.fiafnet.org/ontology/property/extent_feet'),                
    rdflib.term.URIRef('https://www.fiafnet.org/ontology/property/duration'),    
    rdflib.term.URIRef('https://www.fiafnet.org/ontology/property/external_id')]

prop_order += [x for x in properties if x not in prop_order]

api_url = f'http://{wikibase_url}:8181/w/api.php'
property_dict = dict()    

for p in prop_order:
    labels = [o for s,v,o in graph.triples((p, rdflib.RDFS.label, None))]
    for x in [o for s,v,o in graph.triples((None, p, None))][:1]:
        if type(x) == type(rdflib.URIRef('')):
            datatype = 'wikibase-item'
        elif type(x) == type(rdflib.Literal('')):
            datatype = 'string'            
        else:
            raise Exception('Unknown property datatype.')
    property_dict[p] = write_property(labels, datatype, api_url)
    
rewrite = rdflib.Graph()
for s,v,o in graph:
    if v in property_dict.keys():
        v = rdflib.URIRef(property_dict[v])
    rewrite.add((s,v,o))  
graph = rewrite

print(datetime.datetime.now(), ';', len(graph), 'triples.')
  

In [None]:

# write items

claims = [o for s,p,o in graph.triples((None, name_wb['#claim'], None))] 
qualifiers = [o for s,p,o in graph.triples((None, name_wb['#qualifier'], None))] 
references = [o for s,p,o in graph.triples((None, name_wb['#reference'], None))] 
undesirable = claims+qualifiers+references

cleaned = rdflib.Graph()
for s,p,o in graph:
    cleaned.add((s,p,o))
cleaned.remove((None, rdflib.RDFS.label, None))

items = pydash.uniq([s for s,p,o in cleaned if s not in undesirable])
items += pydash.uniq([o for s,p,o in cleaned if o not in undesirable and isinstance(o, rdflib.URIRef)])
items = pydash.uniq(items)

start = datetime.datetime.now()
for num, i in enumerate(items):

    num = num+1
    duration = (datetime.datetime.now()-start)/num # this is how long per loop
    eta = (duration * (len(items)-num))+datetime.datetime.now()
    print('running:', num, 'of', len(items), ';', round((num/len(items))*100, 2), '% ; eta:', eta)
    clear_output(wait=True)      
    
    wd_item = wbi_core.ItemEngine(data=[], mediawiki_api_url=f'http://{wikibase_url}:8181/w/api.php')
    label = [o for s,v,o in graph.triples((i, rdflib.RDFS.label, None))]
    if len(label):
        for n, k in enumerate(label):
            wd_item.set_label(label[n], lang=label[n].language)  
    wd_item.write(login_instance)    
    
    rewrite = rdflib.Graph()
    for s,v,o in graph:
        if s == i:
            s = rdflib.URIRef(wd_item.item_id)
        if o == i:
            o = rdflib.URIRef(wd_item.item_id)
        rewrite.add((s,v,o))
    graph = rewrite
    
    time.sleep(1)
    
print(datetime.datetime.now(), ';', len(graph), 'triples.')
  

In [None]:

# write claims

elements = pydash.uniq([a for a,b,c in graph.triples((None, name_wb['#claim'], None))])

start = datetime.datetime.now()
for num, elem in enumerate(elements):

    num = num+1
    duration = (datetime.datetime.now()-start)/num # this is how long per loop
    eta = (duration * (len(elements)-num))+datetime.datetime.now()
    print('running:', num, 'of', len(elements), ';', round((num/len(elements))*100, 2), '% ; eta:', eta)
    clear_output(wait=True)    
 
    collected = [(a,b,c) for a,b,c in graph.triples((elem, name_wb['#claim'], None))]
    new_list = list()
    for a,b,c in collected:
        for d,e,f in graph.triples((c, None, None)):
            if e != name_wb['#reference'] and e != name_wb['#qualifier']:
                prop_order = str(e.replace('P','')).zfill(4)
                new_list.append((c,e, prop_order))

    new_list = sorted(new_list, key=lambda x:x[2]) 
    for y in sorted(pydash.uniq([x[2] for x in new_list])):
        sub_list = [x for x in new_list if x[2] == y]

        claimer = list()    
        for c, n, xx in sub_list:

            quals = list()
            for d,e,f in graph.triples((c, None, None)):
                if e == name_wb['#qualifier']:
                    quals.append(f)

            qual2 = list()
            for q in quals:
                for g,h,i in graph.triples((q, None, None)):
                    if isinstance(i, rdflib.URIRef):                    
                        qual2.append(wbi_core.ItemID(value=i, prop_nr=h, is_qualifier=True))
                    elif isinstance(i, rdflib.Literal):            
                        qual2.append(wbi_core.String(value=i, prop_nr=h, is_qualifier=True)) 
                    else:
                        raise Exception('no proper datatype!')                          

            refs = list()
            for d,e,f in graph.triples((c, None, None)):
                if e == name_wb['#reference']:
                    refs.append(f)

            refs2 = list()
            for r in refs:
                for g,h,i in graph.triples((r, None, None)):
                    refs2.append([wbi_core.ItemID(value=i, prop_nr=h, is_reference=True)])

            for d,e,f in graph.triples((c, None, None)):
                if e != name_wb['#reference'] and e != name_wb['#qualifier']:
                    if isinstance(f, rdflib.URIRef):
                        claimer.append(wbi_core.ItemID(value=str(f), prop_nr=str(e), qualifiers=qual2, references=pydash.uniq(refs2)))
                    elif isinstance(f, rdflib.Literal):
                        claimer.append(wbi_core.String(value=str(f), prop_nr=str(e), qualifiers=qual2, references=pydash.uniq(refs2)))
                    else:
                        raise Exception('no proper datatype!')  

        wd_item = wbi_core.ItemEngine(item_id=str(elem), data=claimer, 
                                      mediawiki_api_url=f'http://{wikibase_url}:8181/w/api.php')
        wd_item.write(login_instance)  
        
    time.sleep(1)        

print(datetime.datetime.now(), ';', len(graph), 'triples.')
