In [1]:

# import libraries

import rdflib, pandas, pathlib
import numpy, uuid
import pathlib, xmltodict, json
import pydash, uuid, rdflib


In [2]:

# define graph and namespace

graph = rdflib.Graph()

name_nfa = rdflib.Namespace('https://www.nfa.cz/') 
name_wb = rdflib.Namespace('http://wikibas.se/ontology')
name_fiaf = rdflib.Namespace("https://www.fiafnet.org/")


In [3]:

# define institution

claim1 = name_nfa[f"resource/claim/{uuid.uuid4()}"]
graph.add((name_nfa['ontology/item/nfa'], rdflib.RDFS.label, rdflib.Literal('National Film Archive', lang='en'))) 
graph.add((name_nfa['ontology/item/nfa'], rdflib.RDFS.label, rdflib.Literal('Národní filmový archiv', lang='cs'))) 

graph.add((name_nfa['ontology/item/nfa'], name_wb['#claim'], claim1))
graph.add((claim1, name_fiaf['ontology/property/instance_of'], name_fiaf['ontology/item/holding_institution'])) 

claim2 = name_nfa[f"resource/claim/{uuid.uuid4()}"]
graph.add((name_nfa['ontology/item/nfa'], name_wb['#claim'], claim2))
graph.add((claim2, name_fiaf['ontology/property/located_in'], name_fiaf['ontology/item/czech_republic'])) 

print(len(graph)) 


6


In [4]:

# define specific linking properties

graph.add((name_nfa['ontology/property/nfa_work_id'], rdflib.RDFS.label, rdflib.Literal('National Film Archive work ID', lang='en'))) 
graph.add((name_nfa['ontology/property/nfa_agent_id'], rdflib.RDFS.label, rdflib.Literal('National Film Archive agent ID', lang='en'))) 
graph.add((name_nfa['ontology/property/nfa_item_id'], rdflib.RDFS.label, rdflib.Literal('National Film Archive item ID', lang='en'))) 

print(len(graph)) 


9


In [5]:

# reference

def reference(claim_id):
    
    ref_id = name_nfa[f"resource/reference/{uuid.uuid4()}"]
    graph.add((claim_id, name_wb['#reference'], ref_id))
    graph.add((ref_id, name_fiaf['ontology/property/contributed_by'], name_nfa['ontology/item/nfa']))  

print(len(graph))  


9


In [6]:

# make claim

def make_claim(s, p, o):        
    claim_x = name_nfa[f"resource/claim/{uuid.uuid4()}"]    
    graph.add((s, name_wb['#claim'], claim_x))
    graph.add((claim_x, p, o))
    return claim_x

print(len(graph))  
    

9


In [7]:

# load and normalise data

import pathlib, xmltodict, json
import pydash, uuid, rdflib

pathway = pathlib.Path.cwd() / 'murnau_ais_export.xml'

data = list()
with open(pathway) as source_data:
    source_data = source_data.read().split('</FILM>')
    for d in source_data:
        try:
            data.append(xmltodict.parse(d+'</FILM>'))
        except:
            pass

with open(pathlib.Path.cwd() / 'nfa.json', 'w') as nfa_json:
    json.dump(data, nfa_json)

print(len(graph))  


9


In [8]:

# write work specifics

for x in data:
    work_id = pydash.get(x, 'FILM.FILMID')
    work = name_nfa[f"resource/work/{work_id}"]
    
#     claim1 = name_nfa[f"resource/claim/{uuid.uuid4()}"]
#     claim2 = name_nfa[f"resource/claim/{uuid.uuid4()}"]    

#     graph.add((work, name_wb['#claim'], claim1))
#     graph.add((claim1, name_fiaf['ontology/property/instance_of'], name_fiaf['ontology/item/work']))

#     graph.add((work, name_wb['#claim'], claim2))
#     graph.add((claim2, name_nfa['ontology/property/nfa_work_id'], rdflib.Literal(work_id))) 
    
    
#     work_id = x['ID']
#     work = name_eye[f"resource/work/{work_id}"]
    
    make_claim(work, name_fiaf['ontology/property/instance_of'], name_fiaf['ontology/item/work'])
    make_claim(work, name_nfa['ontology/property/nfa_work_id'], rdflib.Literal(work_id))    
    
print(len(graph))


57


In [9]:
# write original title

for x in data:
    work_id = pydash.get(x, 'FILM.FILMID')
    work = name_nfa[f"resource/work/{work_id}"]  
    
#     claim1 = name_nfa[f"resource/claim/{uuid.uuid4()}"]
    qual1 = name_nfa[f"resource/qualifier/{uuid.uuid4()}"]    
#     ref1 = name_nfa[f"resource/reference/{uuid.uuid4()}"]  
    
    title = str(pydash.get(x, 'FILM.NAZEV-ORIGIN')).split(',')
    if len(title) > 1:
        title = title[1].strip()+' '+title[0].strip()
    else:
        title = title[0]    
        
    claim1 = make_claim(work, name_fiaf['ontology/property/title'], rdflib.Literal(title))        

#     graph.add((work, name_wb['#claim'], claim1))
#     graph.add((claim1, name_fiaf['ontology/property/title'], rdflib.Literal(title)))
        
    graph.add((claim1, name_wb['#qualifier'], qual1))
    graph.add((qual1, name_fiaf['ontology/property/title_type'], name_fiaf['ontology/item/original_title']))
    
    reference(claim1)
            
#     graph.add((claim1, name_wb['#reference'], ref1))
#     graph.add((ref1, name_fiaf['ontology/property/contributed_by'], name_nfa['ontology/item/nfa']))  
            
print(len(graph))

129


In [10]:


def write_credit(work_data, dict_key, agent_type): 

    work_id = pydash.get(x, 'FILM.FILMID')
    work = name_nfa[f"resource/work/{work_id}"] 
    if pydash.get(x, f'FILM.{dict_key}') != None:
#         print(type(pydash.get(x, f'FILM.{dict_key}')))
        
        if isinstance(pydash.get(x, f'FILM.{dict_key}'), list):
            agent_data = pydash.get(x, f'FILM.{dict_key}')
#             print('list')
        else:
            agent_data = [pydash.get(x, f'FILM.{dict_key}')]
#             print('nt t list')
        
        for o in agent_data:
            
#             print(work_id, dict_key)

            agent1 = name_nfa[f"resource/agent/{uuid.uuid4()}"]
            claim1 = make_claim(work, name_fiaf['ontology/property/agent'], agent1)        
            reference(claim1)            

            qual1 = name_nfa[f"resource/qualifier/{uuid.uuid4()}"]  
            graph.add((claim1, name_wb['#qualifier'], qual1))
            graph.add((qual1, name_fiaf['ontology/property/agent_type'], agent_type))

            make_claim(agent1, name_fiaf['ontology/property/instance_of'], name_fiaf['ontology/item/agent'])  
            
            if dict_key == 'OBSAZENI':
                fore = pydash.get(o, 'HEREC.JMENO')
                sur = pydash.get(o, 'HEREC.PRIJMENI')
            else:
                fore = pydash.get(o, 'JMENO')
                sur = pydash.get(o, 'PRIJMENI')     
                
#             print(fore, sur)    
            
            claim2 = make_claim(agent1, name_fiaf['ontology/property/forename'], rdflib.Literal(fore))         
            reference(claim2)      
            claim3 = make_claim(agent1, name_fiaf['ontology/property/surname'], rdflib.Literal(sur))         
            reference(claim3) 
            
            make_claim(agent1, name_fiaf['ontology/property/work'], work)            
        
for x in data:
    
    write_credit(x, 'OBSAZENI', name_fiaf['ontology/item/cast'])
    write_credit(x, 'REZIE', name_fiaf['ontology/item/director']) 
    write_credit(x, 'KAMERA', name_fiaf['ontology/item/cinematographer'])       
    write_credit(x, 'SCENAR', name_fiaf['ontology/item/screenwriter'])  
    write_credit(x, 'HUDBA', name_fiaf['ontology/item/composer'])      
        
print(len(graph))
        

3981


In [11]:

# questions, will need to find eye-like item info, request via email
# you can still build in items though
# also is there any interesting additional data


In [12]:

graph.serialize(destination=str(pathlib.Path.cwd() / 'nfa.ttl'), format="turtle")
print(len(graph))


3981
