In [1]:

# start with works, go to titles, then look at work merging


In [2]:

# import libraries

import rdflib, pandas, pathlib, json
import numpy, uuid, xmltodict, pydash


In [3]:

# define graph and namespace

graph = rdflib.Graph()

name_eye = rdflib.Namespace('https://www.eyefilm.nl/') 
name_wb = rdflib.Namespace('http://wikibas.se/ontology')
name_fiaf = rdflib.Namespace("https://www.fiafnet.org/")


In [4]:

# useful functions

def make_claim(s, p, o):        
    claim_id = name_eye[f"resource/claim/{uuid.uuid4()}"]    
    graph.add((s, name_wb['#claim'], claim_id))
    graph.add((claim_id, p, o))
    return claim_id

def make_qual(s, p, o):
    qual_id = name_eye[f"resource/qualifier/{uuid.uuid4()}"]       
    graph.add((s, name_wb['#qualifier'], qual_id))
    graph.add((qual_id, p, o))
    return qual_id

def reference(claim_id, institute):
    ref_id = name_eye[f"resource/reference/{uuid.uuid4()}"]
    graph.add((claim_id, name_wb['#reference'], ref_id))
    graph.add((ref_id, name_fiaf['ontology/property/contributed_by'], institute)) 

def single_list(data):  
    if isinstance(data, list):
        return data
    else:
        return [data]     


In [5]:

# define institution

graph.add((name_eye['ontology/item/eye'], rdflib.RDFS.label, rdflib.Literal('Eye Film Institute', lang='en'))) 
graph.add((name_eye['ontology/item/eye'], rdflib.RDFS.label, rdflib.Literal('Eye Filmmuseum', lang='nl'))) 
make_claim(name_eye['ontology/item/eye'], name_fiaf['ontology/property/instance_of'], name_fiaf['ontology/item/holding_institution'])
make_claim(name_eye['ontology/item/eye'], name_fiaf['ontology/property/located_in'], name_fiaf['ontology/item/netherlands'])

print(len(graph)) 


6


In [6]:

# format data

data_work = pandas.read_excel(pathlib.Path.cwd() / 'Eye-Murnau.xlsx', sheet_name='work').to_dict(orient='records')
data_person = pandas.read_excel(pathlib.Path.cwd() / 'Eye-Murnau.xlsx', sheet_name='persons').to_dict(orient='records')
data_analogue = pandas.read_excel(pathlib.Path.cwd() / 'Eye-Murnau.xlsx', sheet_name='items_ana', dtype=str)[1:].to_dict(orient='records')
data_digital = pandas.read_excel(pathlib.Path.cwd() / 'Eye-Murnau.xlsx', sheet_name='items_digi', dtype=str)[1:].to_dict(orient='records')

print(len(graph))


6


In [7]:

# write work 

for x in data_work:
    
    work_id = x['ID']
    work = name_eye[f"resource/work/{work_id}"]

    make_claim(work, name_fiaf['ontology/property/instance_of'], name_fiaf['ontology/item/work'])
    claim1 = make_claim(work, name_fiaf['ontology/property/external_id'], rdflib.Literal(work_id))
    make_qual(claim1, name_fiaf['ontology/property/institution'], name_eye['ontology/item/eye'])
    reference(claim1, name_eye['ontology/item/eye'])     

print(len(graph))


70


In [8]:

# write original title

for x in data_work:
   
    work_id = x['ID']
    work = name_eye[f"resource/work/{work_id}"]

    claim1 = make_claim(work, name_fiaf['ontology/property/title'], rdflib.Literal(x['Originele titel']))
    make_qual(claim1, name_fiaf['ontology/property/title_type'], name_fiaf['ontology/item/original_title'])
    reference(claim1, name_eye['ontology/item/eye'])    

print(len(graph))


118


In [9]:

# country of origin

for x in data_work:
   
    work_id = x['ID']
    work = name_eye[f"resource/work/{work_id}"]

    for k, v in {'Duitsland':name_fiaf['ontology/item/germany'], 'Verenigde Staten':name_fiaf['ontology/item/usa']}.items():
        if x['Productieland'] == k:
            claim1 = make_claim(work, name_fiaf['ontology/property/production_country'], v)  
            reference(claim1, name_eye['ontology/item/eye'])     
            
print(len(graph))            


150


In [10]:

# write agent

def write_credit(work_data, dict_key, agent_type): 
    
    work_id = x['ID']
    work = name_eye[f"resource/work/{work_id}"]

    actor_list = x[dict_key]
    
    if isinstance(actor_list, str):
        actor_list = actor_list.split('\n') # this is the one
        for a in actor_list:
            auth = [y for y in data_person if y['Naam'] == a.split('(')[0].strip()]

            if len(auth) == 1:

                auth_data = auth[0]
                agent = name_eye[f"resource/agent/{auth_data['ID']}"]
                 
                claim1 = make_claim(work, name_fiaf['ontology/property/agent'], agent)
                make_qual(claim1, name_fiaf['ontology/property/agent_type'], agent_type)                
                reference(claim1, name_eye['ontology/item/eye']) 
                
                make_claim(agent, name_fiaf['ontology/property/instance_of'], name_fiaf['ontology/item/agent']) 

                claim2 = make_claim(agent, name_fiaf['ontology/property/external_id'], rdflib.Literal(auth_data['ID']))
                make_qual(claim2, name_fiaf['ontology/property/institution'], name_eye['ontology/item/eye'])
                reference(claim2, name_eye['ontology/item/eye'])                
            
                if isinstance(auth_data['Tussenvoegsels'], str):
                    sur = (str(auth_data['Tussenvoegsels'])+' '+auth_data['Familienaam']).strip()
                else:
                    sur = (auth_data['Familienaam']).strip()
                    
                claim3 = make_claim(agent, name_fiaf['ontology/property/forename'], rdflib.Literal(auth_data['Voornamen']))
                reference(claim3, name_eye['ontology/item/eye'])
                
                claim4 = make_claim(agent, name_fiaf['ontology/property/surname'], rdflib.Literal(sur))
                reference(claim4, name_eye['ontology/item/eye'])                

                if auth_data['Geslacht'] == 'Man':
                    claim5 = make_claim(agent, name_fiaf['ontology/property/gender'], name_fiaf['ontology/item/male'])
                    reference(claim5, name_eye['ontology/item/eye'])   
                elif auth_data['Geslacht'] == 'Vrouw':
                    claim5 = make_claim(agent, name_fiaf['ontology/property/gender'], name_fiaf['ontology/item/female'])
                    reference(claim5, name_eye['ontology/item/eye'])                       
                else:
                    raise Exception('Unknown gender listed.')   
                    
                if auth_data['Geboortedatum'] is not numpy.nan:
                    birth_data = str(auth_data['Geboortedatum']).split(' ')[0]                    
                    claim6 = make_claim(agent, name_fiaf['ontology/property/event'], rdflib.Literal(birth_data))
                    make_qual(claim6, name_fiaf['ontology/property/event_type'], name_fiaf['ontology/item/birth'])
                    reference(claim6, name_eye['ontology/item/eye'])                      

                if auth_data['Sterfdatum'] is not numpy.nan:
                    death_data = str(auth_data['Sterfdatum']).split(' ')[0]                   
                    claim7 = make_claim(agent, name_fiaf['ontology/property/event'], rdflib.Literal(death_data))
                    make_qual(claim7, name_fiaf['ontology/property/event_type'], name_fiaf['ontology/item/death'])
                    reference(claim7, name_eye['ontology/item/eye'])                      

                claim9 = make_claim(agent, name_fiaf['ontology/property/work'], work)    
                reference(claim9, name_eye['ontology/item/eye'])                  
                
for x in data_work:
    write_credit(x, 'Acteur', name_fiaf['ontology/item/cast'])
    write_credit(x, 'Regie', name_fiaf['ontology/item/director'])
    write_credit(x, 'Producent', name_fiaf['ontology/item/producer'])   
    write_credit(x, 'Camera', name_fiaf['ontology/item/cinematographer'])   
    write_credit(x, 'Montage', name_fiaf['ontology/item/editor'])      
    write_credit(x, 'Scenarioschrijver', name_fiaf['ontology/item/screenwriter'])  
    write_credit(x, 'Componist', name_fiaf['ontology/item/composer'])      
        
print(len(graph))


3480


In [11]:

# write events - events of interest, dutch release and censorship

for x in data_work:
    
    work_id = x['ID']
    work = name_eye[f"resource/work/{work_id}"]
        
    nl_release_date = x['Releasedatum Nederland']
    if len(str(nl_release_date)) == 19:
        if nl_release_date != numpy.nan:
            rel_date = str(x['Releasedatum Nederland'])[:10]
            claim_id = make_claim(work, name_fiaf['ontology/property/event'], rdflib.Literal(rel_date))
            make_qual(claim_id, name_fiaf['ontology/property/event_type'], name_fiaf['ontology/item/release_date'])
            make_qual(claim_id, name_fiaf['ontology/property/country'], name_fiaf['ontology/item/netherlands'])            
            reference(claim_id, name_eye['ontology/item/eye']) 
            
    nl_censordate_date = x['Keuringsdatum']
    if len(str(nl_censordate_date)) == 19:
        if nl_censordate_date != numpy.nan:
            cen_date = str(x['Keuringsdatum'])[:10]
            claim_id = make_claim(work, name_fiaf['ontology/property/event'], rdflib.Literal(cen_date))
            make_qual(claim_id, name_fiaf['ontology/property/event_type'], name_fiaf['ontology/item/censorship_decision'])
            make_qual(claim_id, name_fiaf['ontology/property/country'], name_fiaf['ontology/item/netherlands'])            
            reference(claim_id, name_eye['ontology/item/eye']) 

print(len(graph))


3576


In [12]:

# write analogue manifestations and items

for x in data_analogue:
    work_id = x['ID Filmwerk']
    work = name_eye[f"resource/work/{work_id}"]  
    
    manifestation = name_eye[f"resource/manifestation/{uuid.uuid4()}"]
    make_claim(manifestation, name_fiaf['ontology/property/instance_of'], name_fiaf['ontology/item/manifestation'])
    make_claim(manifestation, name_fiaf['ontology/property/manifestation_of'], work)

    item_id = x['ID']        
    item = name_eye[f"resource/item/{item_id}"]     

    make_claim(item, name_fiaf['ontology/property/instance_of'], name_fiaf['ontology/item/item'])        
    make_claim(item, name_fiaf['ontology/property/item_of'], manifestation)  

    claim1 = make_claim(item, name_fiaf['ontology/property/held_at'], name_eye['ontology/item/eye'])
    reference(claim1, name_eye['ontology/item/eye'])          
        
    claim2 = make_claim(item, name_fiaf['ontology/property/external_id'], rdflib.Literal(item_id))
    make_qual(claim2, name_fiaf['ontology/property/institution'], name_eye['ontology/item/eye'])
    reference(claim2, name_eye['ontology/item/eye'])
    
    if x['Drager'] == 'Acetaat onbepaald':
        claim_id = make_claim(item, name_fiaf['ontology/property/carrier'], name_fiaf['ontology/item/film'])
        reference(claim_id, name_eye['ontology/item/eye'])   
        claim_id = make_claim(item, name_fiaf['ontology/property/base'], name_fiaf['ontology/item/acetate'])
        reference(claim_id, name_eye['ontology/item/eye'])   
        
        if x['Soort'] == 'Positief':
            claim_id = make_claim(item, name_fiaf['ontology/property/element'], name_fiaf['ontology/item/print'])
            reference(claim_id, name_eye['ontology/item/eye'])  
        elif x['Soort'] == 'Negatief':   
            claim_id = make_claim(item, name_fiaf['ontology/property/element'], name_fiaf['ontology/item/negative'])
            reference(claim_id, name_eye['ontology/item/eye'])          
        elif x['Soort'] == 'Omkeer kopie':   
            claim_id = make_claim(item, name_fiaf['ontology/property/element'], name_fiaf['ontology/item/duplicate_reversal'])
            reference(claim_id, name_eye['ontology/item/eye']) 
        
    elif x['Drager'] == 'Nitraat':
        claim_id = make_claim(item, name_fiaf['ontology/property/carrier'], name_fiaf['ontology/item/film'])
        reference(claim_id, name_eye['ontology/item/eye']) 
        claim_id = make_claim(item, name_fiaf['ontology/property/base'], name_fiaf['ontology/item/nitrate'])
        reference(claim_id, name_eye['ontology/item/eye'])  
        
        if x['Soort'] == 'Positief':
            claim_id = make_claim(item, name_fiaf['ontology/property/element'], name_fiaf['ontology/item/print'])
            reference(claim_id, name_eye['ontology/item/eye'])  
        elif x['Soort'] == 'Negatief':   
            claim_id = make_claim(item, name_fiaf['ontology/property/element'], name_fiaf['ontology/item/negative'])
            reference(claim_id, name_eye['ontology/item/eye'])          
        elif x['Soort'] == 'Omkeer kopie':   
            claim_id = make_claim(item, name_fiaf['ontology/property/element'], name_fiaf['ontology/item/duplicate_reversal'])
            reference(claim_id, name_eye['ontology/item/eye'])         

    elif x['Drager'] == 'Tape':
        claim_id = make_claim(item, name_fiaf['ontology/property/carrier'], name_fiaf['ontology/item/video_tape'])
        reference(claim_id, name_eye['ontology/item/eye'])  
        
    else:
        raise Exception('unknown drager here')
        
    if x['Kleuraspect'] == 'Zwart-wit':  
        claim_id = make_claim(item, name_fiaf['ontology/property/colour'], name_fiaf['ontology/item/black_and_white'])
        reference(claim_id, name_eye['ontology/item/eye'])         

    if x['Functie'] == 'Projectiekopie': # project copy
        claim_id = make_claim(item, name_fiaf['ontology/property/access'], name_fiaf['ontology/item/viewing'])
        reference(claim_id, name_eye['ontology/item/eye'])      

    elif x['Functie'] == 'Conserveringselement':     # pres element 
        claim_id = make_claim(item, name_fiaf['ontology/property/access'], name_fiaf['ontology/item/master'])
        reference(claim_id, name_eye['ontology/item/eye'])        

    if x['Geluid'] == 'Optisch':
        claim_id = make_claim(item, name_fiaf['ontology/property/sound_format'], name_fiaf['ontology/item/optical'])
        reference(claim_id, name_eye['ontology/item/eye'])   
        
    if x['Geluid'] == 'Stil':
        claim_id = make_claim(item, name_fiaf['ontology/property/sound'], name_fiaf['ontology/item/silent'])
        reference(claim_id, name_eye['ontology/item/eye'])     
    elif x['Geluid'] in ['Optisch', 'Digitaal']:
        claim_id = make_claim(item, name_fiaf['ontology/property/sound'], name_fiaf['ontology/item/sound'])
        reference(claim_id, name_eye['ontology/item/eye'])  
        
    if x['Formaat'] == '16mm':   
        claim_id = make_claim(item, name_fiaf['ontology/property/specific_carrier'], name_fiaf['ontology/item/16mm'])
        reference(claim_id, name_eye['ontology/item/eye'])          
    elif x['Formaat'] == '35mm':           
        claim_id = make_claim(item, name_fiaf['ontology/property/specific_carrier'], name_fiaf['ontology/item/35mm'])
        reference(claim_id, name_eye['ontology/item/eye'])   
    elif x['Formaat'] == 'Digibeta':           
        claim_id = make_claim(item, name_fiaf['ontology/property/specific_carrier'], name_fiaf['ontology/item/digibeta'])
        reference(claim_id, name_eye['ontology/item/eye'])   
    else:
        pass
        
    intertitles = x['Taal tussentitels']
    if intertitles is not numpy.nan:
        intertitles = intertitles.split('\n')
        for i in intertitles:
            if i == 'Duits':
                claim_id = make_claim(item, name_fiaf['ontology/property/intertitles_language'], name_fiaf['ontology/item/german'])
                reference(claim_id, name_eye['ontology/item/eye'])                   
            if i == 'Nederlands':
                claim_id = make_claim(item, name_fiaf['ontology/property/intertitles_language'], name_fiaf['ontology/item/dutch'])
                reference(claim_id, name_eye['ontology/item/eye'])  
            if i == 'Engels':
                claim_id = make_claim(item, name_fiaf['ontology/property/intertitles_language'], name_fiaf['ontology/item/english'])
                reference(claim_id, name_eye['ontology/item/eye'])                   
            if i == 'Frans':
                claim_id = make_claim(item, name_fiaf['ontology/property/intertitles_language'], name_fiaf['ontology/item/french'])
                reference(claim_id, name_eye['ontology/item/eye'])                 
            
    maintitles = x['Taal titels']
    if maintitles is not numpy.nan:
        maintitles = maintitles.split('\n')
        for m in maintitles: 
            if m == 'Duits':
                claim_id = make_claim(item, name_fiaf['ontology/property/maintitles_language'], name_fiaf['ontology/item/german'])
                reference(claim_id, name_eye['ontology/item/eye'])                   
            if m == 'Nederlands':
                claim_id = make_claim(item, name_fiaf['ontology/property/maintitles_language'], name_fiaf['ontology/item/dutch'])
                reference(claim_id, name_eye['ontology/item/eye'])                   

    extent = [x['Metrage (geschat)'], x['Metrage (berekend)']]
    extent = pydash.uniq([x for x in extent if x is not numpy.nan and x != '0'])
    for ext in extent:
        claim_id = make_claim(item, name_fiaf['ontology/property/extent_metres'], rdflib.Literal(ext))
        reference(claim_id, name_eye['ontology/item/eye']) 

    duration = [x['Speelduur (u:m:s)'], x['Speelduur (berekend)']]
    duration = pydash.uniq([x for x in duration if x is not numpy.nan and x != '00:00:00'])
    for dure in duration:
        claim_id = make_claim(item, name_fiaf['ontology/property/duration'], rdflib.Literal(dure))
        reference(claim_id, name_eye['ontology/item/eye']) 
    
    make_claim(work, name_fiaf['ontology/property/manifestation'], manifestation)
    make_claim(manifestation, name_fiaf['ontology/property/item'], item)                    

print(len(graph))    


5208


In [13]:

# write digital manifestations and items

for x in data_analogue:
    work_id = x['ID Filmwerk']
    work = name_eye[f"resource/work/{work_id}"]  
    
    manifestation = name_eye[f"resource/manifestation/{uuid.uuid4()}"]
    make_claim(manifestation, name_fiaf['ontology/property/instance_of'], name_fiaf['ontology/item/manifestation'])
    make_claim(manifestation, name_fiaf['ontology/property/manifestation_of'], work)

    item_id = x['ID']        
    item = name_eye[f"resource/item/{item_id}"]     

    make_claim(item, name_fiaf['ontology/property/instance_of'], name_fiaf['ontology/item/item'])        
    make_claim(item, name_fiaf['ontology/property/item_of'], manifestation)  

    claim1 = make_claim(item, name_fiaf['ontology/property/held_at'], name_eye['ontology/item/eye'])
    reference(claim1, name_eye['ontology/item/eye'])          
        
    claim2 = make_claim(item, name_fiaf['ontology/property/external_id'], rdflib.Literal(item_id))
    make_qual(claim2, name_fiaf['ontology/property/institution'], name_eye['ontology/item/eye'])
    reference(claim2, name_eye['ontology/item/eye'])
            
    claim_id = make_claim(item, name_fiaf['ontology/property/carrier'], name_fiaf['ontology/item/digital'])
    reference(claim_id, name_eye['ontology/item/eye'])        

    if x['Functie'] == 'Projectiekopie': # project copy
        claim_id = make_claim(item, name_fiaf['ontology/property/access'], name_fiaf['ontology/item/viewing'])
        reference(claim_id, name_eye['ontology/item/eye'])      

    if x['Formaat'] == '.mxf':   
        claim_id = make_claim(item, name_fiaf['ontology/property/specific_carrier'], name_fiaf['ontology/item/mxf'])
        reference(claim_id, name_eye['ontology/item/eye'])          
        
    intertitles = x['Taal tussentitels']
    if intertitles is not numpy.nan:
        intertitles = intertitles.split('\n')
        for i in intertitles:
            if i == 'Duits':
                claim_id = make_claim(item, name_fiaf['ontology/property/intertitles_language'], name_fiaf['ontology/item/german'])
                reference(claim_id, name_eye['ontology/item/eye'])                   
            if i == 'Nederlands':
                claim_id = make_claim(item, name_fiaf['ontology/property/intertitles_language'], name_fiaf['ontology/item/dutch'])
                reference(claim_id, name_eye['ontology/item/eye'])  
            if i == 'Engels':
                claim_id = make_claim(item, name_fiaf['ontology/property/intertitles_language'], name_fiaf['ontology/item/english'])
                reference(claim_id, name_eye['ontology/item/eye'])                   
            if i == 'Frans':
                claim_id = make_claim(item, name_fiaf['ontology/property/intertitles_language'], name_fiaf['ontology/item/french'])
                reference(claim_id, name_eye['ontology/item/eye'])                 
            
    maintitles = x['Taal titels']
    if maintitles is not numpy.nan:
        maintitles = maintitles.split('\n')
        for m in maintitles:            
            if m == 'Duits':
                claim_id = make_claim(item, name_fiaf['ontology/property/maintitles_language'], name_fiaf['ontology/item/german'])
                reference(claim_id, name_eye['ontology/item/eye'])                   
            if m == 'Nederlands':
                claim_id = make_claim(item, name_fiaf['ontology/property/maintitles_language'], name_fiaf['ontology/item/dutch'])
                reference(claim_id, name_eye['ontology/item/eye'])                   

    duration = [x['Speelduur (u:m:s)'], x['Speelduur (berekend)']]
    duration = pydash.uniq([x for x in duration if x is not numpy.nan and x != '00:00:00'])
    for dure in duration:
        claim_id = make_claim(item, name_fiaf['ontology/property/duration'], rdflib.Literal(dure))
        reference(claim_id, name_eye['ontology/item/eye']) 
    
    make_claim(work, name_fiaf['ontology/property/manifestation'], manifestation)
    make_claim(manifestation, name_fiaf['ontology/property/item'], item)                    

print(len(graph))  


6192


In [14]:

graph.serialize(destination=str(pathlib.Path.cwd() / 'eye.ttl'), format="turtle")
print(len(graph))


6192
