In [1]:

# questions for NFA (or google translate)
# 1. I understand NAZEV-ORIGIN (original title), but what is NAZEV-SKUT, NAZEV-PRAC, NAZEV-KATALOG
# 2. no authority ids for individuals?

# notes, write labels for agents


In [2]:

# load and normalise data

import pathlib, xmltodict, json
import pydash, uuid, rdflib

pathway = pathlib.Path.cwd() / 'murnau_ais_export.xml'

data = list()
with open(pathway) as source_data:
    source_data = source_data.read().split('</FILM>')
    for d in source_data:
        try:
            data.append(xmltodict.parse(d+'</FILM>'))
        except:
            pass

with open(pathlib.Path.cwd() / 'nfa.json', 'w') as nfa_json:
    json.dump(data, nfa_json)

# last laugh filter
murnau = ['2200560']
data = [x for x in data if pydash.get(x, 'FILM.FILMID') in murnau]

for x in data:
    print(pydash.get(x, 'FILM.NAZEV-ORIGIN'))
    

Letzte Mann,Der


In [3]:

# define graph and namespace

graph = rdflib.Graph()

name_nfa = rdflib.Namespace('https://www.nfa.cz/') 
name_wb = rdflib.Namespace('http://wikibas.se/ontology')
name_fiaf = rdflib.Namespace("https://www.fiafnet.org/")


In [4]:

# institute specific entities

graph.add((name_nfa['ontology/property/nfa_work_id'], rdflib.RDFS.label, rdflib.Literal('National Film Archive work ID'))) 
graph.add((name_nfa['ontology/property/nfa_agent_id'], rdflib.RDFS.label, rdflib.Literal('National Film Archive agent ID'))) 

claim1 = name_nfa[f"resource/claim/{uuid.uuid4()}"]
graph.add((name_nfa['ontology/item/nfa'], rdflib.RDFS.label, rdflib.Literal('National Film Archive', lang='en'))) 
graph.add((name_nfa['ontology/item/nfa'], rdflib.RDFS.label, rdflib.Literal('Národní filmový archiv', lang='cs'))) 
graph.add((name_nfa['ontology/item/nfa'], name_wb['#claim'], claim1))
graph.add((claim1, name_fiaf['ontology/property/instance_of'], name_fiaf['ontology/item/holding_institution'])) 

claim2 = name_nfa[f"resource/claim/{uuid.uuid4()}"]
graph.add((name_nfa['ontology/item/nazev_origin'], rdflib.RDFS.label, rdflib.Literal('nazev origin', lang='cs'))) 
graph.add((name_nfa['ontology/item/nazev_origin'], name_wb['#claim'], claim2)) 
graph.add((claim2, name_fiaf['ontology/property/instance_of'], name_fiaf['ontology/item/title_type'])) 

claim3 = name_nfa[f"resource/claim/{uuid.uuid4()}"]
graph.add((name_nfa['ontology/item/obsazeni'], rdflib.RDFS.label, rdflib.Literal('obsazení', lang='cs'))) 
graph.add((name_nfa['ontology/item/obsazeni'], name_wb['#claim'], claim3)) 
graph.add((claim3, name_fiaf['ontology/property/instance_of'], name_fiaf['ontology/item/agent_type'])) 

print(len(graph))


12


In [5]:

# reference

def reference(claim_id):
    
    ref_id = name_nfa[f"resource/reference/{uuid.uuid4()}"]
    graph.add((claim_id, name_wb['#reference'], ref_id))
    graph.add((ref_id, name_fiaf['ontology/property/contributed_by'], name_nfa['ontology/item/nfa']))  

print(len(graph))  


12


In [6]:

# write work specifics

for x in data:
    work_id = pydash.get(x, 'FILM.FILMID')
    work = name_nfa[f"resource/work/{work_id}"]
    
    claim1 = name_nfa[f"resource/claim/{uuid.uuid4()}"]
    claim2 = name_nfa[f"resource/claim/{uuid.uuid4()}"]    

    graph.add((work, name_wb['#claim'], claim1))
    graph.add((claim1, name_fiaf['ontology/property/instance_of'], name_fiaf['ontology/item/work']))

    graph.add((work, name_wb['#claim'], claim2))
    graph.add((claim2, name_nfa['ontology/property/nfa_work_id'], rdflib.Literal(work_id)))  
    
print(len(graph))
    

16


In [7]:

# write original title

for x in data:
    work_id = pydash.get(x, 'FILM.FILMID')
    work = name_nfa[f"resource/work/{work_id}"]  
    
    claim1 = name_nfa[f"resource/claim/{uuid.uuid4()}"]
    qual1 = name_nfa[f"resource/qualifier/{uuid.uuid4()}"]    
    ref1 = name_nfa[f"resource/reference/{uuid.uuid4()}"]  
    
    title = str(pydash.get(x, 'FILM.NAZEV-ORIGIN')).split(',')
    if len(title) > 1:
        title = title[1].strip()+' '+title[0].strip()
    else:
        title = title[0]    

    graph.add((work, name_wb['#claim'], claim1))
    graph.add((claim1, name_fiaf['ontology/property/title'], rdflib.Literal(title)))
        
    graph.add((claim1, name_wb['#qualifier'], qual1))
    graph.add((qual1, name_fiaf['ontology/property/title_type'], name_nfa['ontology/item/nazev_origin']))
            
    graph.add((claim1, name_wb['#reference'], ref1))
    graph.add((ref1, name_fiaf['ontology/property/contributed_by'], name_nfa['ontology/item/nfa']))  
            
print(len(graph))


22


In [8]:

# write cast agents

for x in data:
    work_id = pydash.get(x, 'FILM.FILMID')
    work = name_nfa[f"resource/work/{work_id}"]  
    for o in pydash.get(x, 'FILM.OBSAZENI'):

        agent1 = name_nfa[f"resource/agent/{uuid.uuid4()}"]
        role1 = name_nfa[f"resource/role/{uuid.uuid4()}"] 
        
        claim1 = name_nfa[f"resource/claim/{uuid.uuid4()}"]
        claim2 = name_nfa[f"resource/claim/{uuid.uuid4()}"]
        claim3 = name_nfa[f"resource/claim/{uuid.uuid4()}"]
        claim4 = name_nfa[f"resource/claim/{uuid.uuid4()}"]
        claim5 = name_nfa[f"resource/claim/{uuid.uuid4()}"]        

        qual1 = name_nfa[f"resource/qualifier/{uuid.uuid4()}"]  
        qual2 = name_nfa[f"resource/qualifier/{uuid.uuid4()}"]          
        
        graph.add((work, name_wb['#claim'], claim1))
        graph.add((claim1, name_fiaf['ontology/property/agent_cast'], agent1))
        
        graph.add((claim1, name_wb['#qualifier'], qual1))
        graph.add((qual1, name_fiaf['ontology/property/agent_type'], name_nfa['ontology/item/obsazeni']))

        graph.add((agent1, name_wb['#claim'], claim2))
        graph.add((claim2, name_fiaf['ontology/property/instance_of'], name_fiaf['ontology/item/agent']))  

        graph.add((agent1, name_wb['#claim'], claim3))
        graph.add((claim3, name_fiaf['ontology/property/forename'], rdflib.Literal(pydash.get(o, 'HEREC.JMENO'))))  
   
        graph.add((agent1, name_wb['#claim'], claim4))
        graph.add((claim4, name_fiaf['ontology/property/surname'], rdflib.Literal(pydash.get(o, 'HEREC.PRIJMENI')))) 

        reference(claim1)
        reference(claim3)
        reference(claim4)        
            
        if 'ROLE' in o:    
            
            graph.add((claim1, name_wb['#qualifier'], qual2))
            graph.add((qual2, name_fiaf['ontology/property/role'], role1))        

            graph.add((role1, name_wb['#claim'], claim5))
            graph.add((claim5, name_fiaf['ontology/property/instance_of'], name_fiaf['ontology/item/role']))      
            graph.add((role1, rdflib.RDFS.label, rdflib.Literal(pydash.get(o, 'ROLE'), lang='cs'))) # instane of title type (item)
    
        label_name = pydash.get(o, 'HEREC.JMENO')+' '+pydash.get(o, 'HEREC.PRIJMENI')
        graph.add((agent1, rdflib.RDFS.label, rdflib.Literal(label_name, lang='cs')))     
    
print(len(graph))


220


In [9]:

# write credit agents

def reference(claim_id):
    
    ref_id = name_nfa[f"resource/reference/{uuid.uuid4()}"]
    graph.add((claim_id, name_wb['#reference'], ref_id))
    graph.add((ref_id, name_fiaf['ontology/property/contributed_by'], name_nfa['ontology/item/nfa']))  
    
def write_credit(c, y):
    
    agent1 = name_nfa[f"resource/agent/{uuid.uuid4()}"]
    role1 = name_nfa[f"resource/role/{uuid.uuid4()}"] 

    claim1 = name_nfa[f"resource/claim/{uuid.uuid4()}"]
    claim2 = name_nfa[f"resource/claim/{uuid.uuid4()}"]
    claim3 = name_nfa[f"resource/claim/{uuid.uuid4()}"]
    claim4 = name_nfa[f"resource/claim/{uuid.uuid4()}"]
    claim5 = name_nfa[f"resource/claim/{uuid.uuid4()}"]        

    qual1 = name_nfa[f"resource/qualifier/{uuid.uuid4()}"]  
    qual2 = name_nfa[f"resource/qualifier/{uuid.uuid4()}"]          

    graph.add((work, name_wb['#claim'], claim1))
    graph.add((claim1, name_fiaf['ontology/property/agent_credit'], agent1))

    graph.add((claim1, name_wb['#qualifier'], qual1))
    graph.add((qual1, name_fiaf['ontology/property/agent_type'], name_nfa[f'ontology/item/{y.lower()}']))

    graph.add((agent1, name_wb['#claim'], claim2))
    graph.add((claim2, name_fiaf['ontology/property/instance_of'], name_fiaf['ontology/item/agent']))  

    graph.add((agent1, name_wb['#claim'], claim3))
    graph.add((claim3, name_fiaf['ontology/property/forename'], rdflib.Literal(pydash.get(c, 'JMENO'))))  

    graph.add((agent1, name_wb['#claim'], claim4))
    graph.add((claim4, name_fiaf['ontology/property/surname'], rdflib.Literal(pydash.get(c, 'PRIJMENI')))) 
    
    reference(claim1)
    reference(claim3)
    reference(claim4)  
    
#     print(pydash.get(o, 'JMENO'), pydash.get(o, 'PRIJMENI'))
    label_name = pydash.get(c, 'JMENO')+' '+pydash.get(c, 'PRIJMENI')
    graph.add((agent1, rdflib.RDFS.label, rdflib.Literal(label_name, lang='cs')))     
        

for x in data:
    work_id = pydash.get(x, 'FILM.FILMID')
    work = name_nfa[f"resource/work/{work_id}"]  
    for y in ['REZIE', 'SCENAR', 'KAMERA', 'HUDBA']:

        claim9 = name_nfa[f"resource/claim/{uuid.uuid4()}"]
        graph.add((name_nfa[f'ontology/item/{y.lower()}'], rdflib.RDFS.label, rdflib.Literal(y.lower(), lang='cs'))) 
        graph.add((name_nfa[f'ontology/item/{y.lower()}'], name_wb['#claim'], claim9)) 
        graph.add((claim9, name_fiaf['ontology/property/instance_of'], name_fiaf['ontology/item/agent_type'])) 
        
        if 'JMENO' in pydash.get(x, f'FILM.{y}'):
            c = pydash.get(x, f'FILM.{y}')     
            write_credit(c, y)
        else:
            credits = pydash.get(x, f'FILM.{y}')   
            for c in credits:
                write_credit(c, y)
                                                                                                
print(len(graph))  


300


In [10]:

# what next, you are getting away from events, lets do agents next.
# I also do not feel happy with imposing work title, we should retain original title in native language
# so define an original title as a title type with orig language, then you will merge these title types together
# but you will not know this from the data, so at somepoint you will need to explicitly state
# that original title (americans), orignaltitel (sw/de) and nevaz origin (cz) are all the same.


In [11]:

# print(graph.serialize(format="ttl").decode())


In [12]:

graph.serialize(destination=str(pathlib.Path.cwd() / 'nfa.ttl'), format="turtle")
print(len(graph))


300
