In [1]:

# questions for the nfa:


In [2]:

# import libraries

import rdflib, pandas, pathlib, json
import numpy, uuid, xmltodict, pydash


In [3]:

# define graph and namespace

graph = rdflib.Graph()
name_nfa = rdflib.Namespace('https://www.nfa.cz/') 
name_wb = rdflib.Namespace('http://wikibas.se/ontology')
name_fiaf = rdflib.Namespace("https://www.fiafnet.org/")


In [4]:

# useful functions

def make_claim(s, p, o):        
    claim_id = name_nfa[f"resource/claim/{uuid.uuid4()}"]    
    graph.add((s, name_wb['#claim'], claim_id))
    graph.add((claim_id, p, o))
    return claim_id

def make_qual(s, p, o):
    qual_id = name_nfa[f"resource/qualifier/{uuid.uuid4()}"]       
    graph.add((s, name_wb['#qualifier'], qual_id))
    graph.add((qual_id, p, o))
    return qual_id

def reference(claim_id, institute):
    ref_id = name_nfa[f"resource/reference/{uuid.uuid4()}"]
    graph.add((claim_id, name_wb['#reference'], ref_id))
    graph.add((ref_id, name_fiaf['ontology/property/contributed_by'], institute)) 

def single_list(data):  
    if isinstance(data, list):
        return data
    else:
        return [data]   
    

In [5]:

# define institution

graph.add((name_nfa['ontology/item/nfa'], rdflib.RDFS.label, rdflib.Literal('National Film Archive', lang='en'))) 
graph.add((name_nfa['ontology/item/nfa'], rdflib.RDFS.label, rdflib.Literal('Národní filmový archiv', lang='cs'))) 
make_claim(name_nfa['ontology/item/nfa'], name_fiaf['ontology/property/instance_of'], name_fiaf['ontology/item/holding_institution'])
make_claim(name_nfa['ontology/item/nfa'], name_fiaf['ontology/property/located_in'], name_fiaf['ontology/item/czech_republic'])

print(len(graph)) 


6


In [6]:

# format data

pathway = pathlib.Path.cwd() / 'murnau_ais_export.xml'

data = list()
with open(pathway) as source_data:
    source_data = source_data.read().split('</FILM>')
    for d in source_data:
        try:
            data.append(xmltodict.parse(d+'</FILM>'))
        except:
            pass

with open(pathlib.Path.cwd() / 'nfa.json', 'w') as nfa_json:
    json.dump(data, nfa_json)
    
tech_codes = pathlib.Path.cwd() / 'Xciselnk.xml'
with open(tech_codes) as codes:
    codes = xmltodict.parse(codes.read())
    codes = pydash.get(codes, 'CISELNIKY.CISELNIK') # keep this as a dictionary
    tech = pandas.DataFrame(codes)
    
    
# DRUH
# FORMAT is a weird mix of aspect ratio and small gauge (including 16mm)
# MATERTYP is your nupe deg etc, but it is also manafacturer 
# MEZITTYP - is this usage? vkopírované, zvlášť, narážkové
# NOSICE is tape formats
# PODKLAD is just N A P
# SIRENI is TV dvd notes
# TERIT is country
# UVTITYP is spojené, zvlášť, přes, narážkové - I reckon this is access


print(len(tech))
tech.head(30)

# print(len(graph))  


132


Unnamed: 0,DRUH,CISLO,TEXTCIS
0,FORMAT,1,"1 : 1,19"
1,FORMAT,2,"1 : 1,33"
2,FORMAT,3,"1 : 1,37"
3,FORMAT,4,"1 : 1,66"
4,FORMAT,5,"1 : 1,85"
5,FORMAT,6,"1 : 2,35"
6,FORMAT,7,"1 : 2,55"
7,FORMAT,8,jiný
8,FORMAT,9,16 mm
9,FORMAT,10,8 mm


In [7]:

# write work

for x in data:
    work_id = pydash.get(x, 'FILM.FILMID')
    work = name_nfa[f"resource/work/{work_id}"]
    
    make_claim(work, name_fiaf['ontology/property/instance_of'], name_fiaf['ontology/item/work'])
    claim1 = make_claim(work, name_fiaf['ontology/property/external_id'], rdflib.Literal(work_id))
    make_qual(claim1, name_fiaf['ontology/property/institution'], name_nfa['ontology/item/nfa'])
    reference(claim1, name_nfa['ontology/item/nfa'])     
    
print(len(graph))


102


In [8]:

# write original title

for x in data:
    work_id = pydash.get(x, 'FILM.FILMID')
    work = name_nfa[f"resource/work/{work_id}"] 

    title = str(pydash.get(x, 'FILM.NAZEV-ORIGIN')).split(',')
    if len(title) > 1:
        title = title[1].strip()+' '+title[0].strip()
    else:
        title = title[0] 

    claim1 = make_claim(work, name_fiaf['ontology/property/title'], rdflib.Literal(title))
    make_qual(claim1, name_fiaf['ontology/property/title_type'], name_fiaf['ontology/item/original_title'])
    reference(claim1, name_nfa['ontology/item/nfa'])    
        
print(len(graph))


174


In [9]:

# write agent data

def write_credit(work_data, dict_key, agent_type): 

    work_id = pydash.get(x, 'FILM.FILMID')
    work = name_nfa[f"resource/work/{work_id}"] 
    if pydash.get(x, f'FILM.{dict_key}') != None:
        agent_data = single_list(pydash.get(x, f'FILM.{dict_key}'))
        for a in agent_data:
            agent = name_nfa[f"resource/agent/{uuid.uuid4()}"]
                 
            claim1 = make_claim(work, name_fiaf['ontology/property/agent'], agent)
            make_qual(claim1, name_fiaf['ontology/property/agent_type'], agent_type)                
            reference(claim1, name_nfa['ontology/item/nfa']) 
                
            make_claim(agent, name_fiaf['ontology/property/instance_of'], name_fiaf['ontology/item/agent']) 
            
            if dict_key == 'OBSAZENI':
                fore = pydash.get(a, 'HEREC.JMENO')
                sur = pydash.get(a, 'HEREC.PRIJMENI')
            else:
                fore = pydash.get(a, 'JMENO')
                sur = pydash.get(a, 'PRIJMENI')     
            
            if fore != None:
                claim3 = make_claim(agent, name_fiaf['ontology/property/forename'], rdflib.Literal(fore))
                reference(claim3, name_nfa['ontology/item/nfa'])  
            if sur != None:
                claim4 = make_claim(agent, name_fiaf['ontology/property/surname'], rdflib.Literal(sur))
                reference(claim4, name_nfa['ontology/item/nfa'])             

            claim9 = make_claim(agent, name_fiaf['ontology/property/work'], work)    
            reference(claim9, name_nfa['ontology/item/nfa'])                  
                        
for x in data:
    
    write_credit(x, 'OBSAZENI', name_fiaf['ontology/item/cast'])
    write_credit(x, 'REZIE', name_fiaf['ontology/item/director']) 
    write_credit(x, 'KAMERA', name_fiaf['ontology/item/cinematographer'])       
    write_credit(x, 'SCENAR', name_fiaf['ontology/item/screenwriter'])  
    write_credit(x, 'HUDBA', name_fiaf['ontology/item/composer'])      
        
print(len(graph))
        

4430


In [10]:

# write items and manifestations

for x in data:
    for m in single_list(pydash.get(x, 'FILM.MATERIAL')):
        manifestation = name_nfa[f"resource/manifestation/{uuid.uuid4()}"]
        make_claim(manifestation, name_fiaf['ontology/property/instance_of'], name_fiaf['ontology/item/manifestation'])
        make_claim(manifestation, name_fiaf['ontology/property/manifestation_of'], work)
        
        item_id = pydash.get(m, 'KLIC.MATERIALID')
        item = name_nfa[f"resource/item/{item_id}"]
        
        claim_id = make_claim(item, name_fiaf['ontology/property/carrier'], name_fiaf['ontology/item/film'])   
        reference(claim_id, name_nfa['ontology/item/nfa'])                     

        for k, v in {'1':name_fiaf['ontology/item/original_negative'], '2':name_fiaf['ontology/item/sound_negative'], 
                    '6':name_fiaf['ontology/item/sound_negative'], '8':name_fiaf['ontology/item/duplicate_negative'], 
                    '9':name_fiaf['ontology/item/print'], '10':name_fiaf['ontology/item/print']}.items():
            if  m['MATERIAL'] == k:
                claim_id = make_claim(item, name_fiaf['ontology/property/element'], v)   
                reference(claim_id, name_nfa['ontology/item/nfa'])          
        
        if m['FORMAT-MATER'] == '02':
            claim_id = make_claim(item, name_fiaf['ontology/property/specific_carrier'], name_fiaf['ontology/item/35mm'])   
            reference(claim_id, name_nfa['ontology/item/nfa']) 
            claim_id = make_claim(item, name_fiaf['ontology/property/aspect_ratio'], name_fiaf['ontology/item/133'])   
            reference(claim_id, name_nfa['ontology/item/nfa'])             
        elif m['FORMAT-MATER'] == '03':
            claim_id = make_claim(item, name_fiaf['ontology/property/specific_carrier'], name_fiaf['ontology/item/35mm'])   
            reference(claim_id, name_nfa['ontology/item/nfa']) 
            claim_id = make_claim(item, name_fiaf['ontology/property/aspect_ratio'], name_fiaf['ontology/item/137'])   
            reference(claim_id, name_nfa['ontology/item/nfa']) 
        elif m['FORMAT-MATER'] == '09':            
            claim_id = make_claim(item, name_fiaf['ontology/property/specific_carrier'], name_fiaf['ontology/item/16mm'])   
            reference(claim_id, name_nfa['ontology/item/nfa']) 
        else:
            pass

        for k, v in {'N':name_fiaf['ontology/item/nitrate'], 'A':name_fiaf['ontology/item/acetate'], 'P':name_fiaf['ontology/item/polyester']}.items():
            if  m['DRUH-PODKLADU'] == k:
                claim4 = make_claim(item, name_fiaf['ontology/property/base'], v)   
                reference(claim4, name_nfa['ontology/item/nfa'])          
        
        make_claim(work, name_fiaf['ontology/property/manifestation'], manifestation)
        make_claim(manifestation, name_fiaf['ontology/property/item'], item)          
        
print(len(graph)) 


4830


In [11]:

graph.serialize(destination=str(pathlib.Path.cwd() / 'nfa.ttl'), format="turtle")
print(len(graph))


4830
