In [1]:

# Swedish Film Institute


In [2]:

# import libraries

import rdflib, pandas, pathlib, json
import numpy, uuid, xmltodict, pydash


In [3]:

# define graph and namespace

graph = rdflib.Graph()
name_sfi = rdflib.Namespace('https://www.filminstitutet.se/') 
name_wb = rdflib.Namespace('http://wikibas.se/ontology')
name_fiaf = rdflib.Namespace('https://www.fiafnet.org/')


In [4]:

# useful functions

def make_claim(s, p, o):        
    claim_id = name_sfi[f"resource/claim/{uuid.uuid4()}"]    
    graph.add((s, name_wb['#claim'], claim_id))
    graph.add((claim_id, p, o))
    return claim_id

def make_qual(s, p, o):
    qual_id = name_sfi[f"resource/qualifier/{uuid.uuid4()}"]       
    graph.add((s, name_wb['#qualifier'], qual_id))
    graph.add((qual_id, p, o))
    return qual_id

def reference(claim_id, institute):
    ref_id = name_sfi[f"resource/reference/{uuid.uuid4()}"]
    graph.add((claim_id, name_wb['#reference'], ref_id))
    graph.add((ref_id, name_fiaf['ontology/property/contributed_by'], institute))  
    
def single_list(data):  
    if isinstance(data, list):
        return data
    else:
        return [data]    


In [5]:

# define institution

graph.add((name_sfi['ontology/item/sfi'], rdflib.RDFS.label, rdflib.Literal('Swedish Film Institute', lang='en'))) 
graph.add((name_sfi['ontology/item/sfi'], rdflib.RDFS.label, rdflib.Literal('Svenska Filminstitutet', lang='sv'))) 
make_claim(name_sfi['ontology/item/sfi'], name_fiaf['ontology/property/instance_of'], name_fiaf['ontology/item/holding_institution'])
make_claim(name_sfi['ontology/item/sfi'], name_fiaf['ontology/property/located_in'], name_fiaf['ontology/item/sweden'])

print(len(graph)) 


6


In [6]:

# format data

datapath = pathlib.Path.cwd() / 'Murnau.xml'
with open(datapath) as data:
    data = xmltodict.parse(data.read())    
    data = json.loads(json.dumps(data))
    
with open(pathlib.Path.cwd() / 'sfi.json', 'w') as sfi_test:
    json.dump(data, sfi_test)

data = [x for x in pydash.get(data, 'adlibXML.recordList.record')] 

print(len(graph)) 


6


In [7]:

# write work

for x in data:
    work_id = x['object_number']
    work = name_sfi[f"resource/work/{work_id}"]
    
    make_claim(work, name_fiaf['ontology/property/instance_of'], name_fiaf['ontology/item/work'])
    claim1 = make_claim(work, name_fiaf['ontology/property/external_id'], rdflib.Literal(work_id))
    make_qual(claim1, name_fiaf['ontology/property/institution'], name_sfi['ontology/item/sfi'])
    reference(claim1, name_sfi['ontology/item/sfi'])     

print(len(graph))


158


In [8]:

# write title

for x in data:
    
    work_id = x['object_number']
    work = name_sfi[f"resource/work/{work_id}"]

    for t in single_list(x['Title']):
        if 'Originaltitel' in str(t): 
            claim1 = make_claim(work, name_fiaf['ontology/property/title'], rdflib.Literal(t['title_complete']))
            make_qual(claim1, name_fiaf['ontology/property/title_type'], name_fiaf['ontology/item/original_title'])
            reference(claim1, name_sfi['ontology/item/sfi'])      

print(len(graph))


272


In [9]:

# write country

for x in data:
    
    work_id = x['object_number']
    work = name_sfi[f"resource/work/{work_id}"]

    for k, v in {'Germany':name_fiaf['ontology/item/germany'], 'USA':name_fiaf['ontology/item/usa']}.items():
        if pydash.get(x, 'production_country.value.0.#text') == k:
            claim_id = make_claim(work, name_fiaf['ontology/property/production_country'], v)  
            reference(claim_id, name_sfi['ontology/item/sfi'])     
            
print(len(graph)) 


348


In [10]:

# write agents

def write_credit(top_branch, lower_branch, credit_level, agent_type, datum):
    work = name_sfi[f"resource/work/{x['object_number']}"]  
    
    if top_branch in datum:
        for a in single_list(pydash.get(datum, top_branch)):
            if lower_branch != 'cast':
                credit_type = a['credit.type']    
            else:
                credit_type = a['cast.credit_type']
                
            if credit_level == pydash.get(credit_type, 'value.0.#text'):

                if pydash.get(a, f'{lower_branch}\.name.pseudonym_for'):
                    agent_id = pydash.get(a, f'{lower_branch}\.name.pseudonym_for.id_number')
                    fore = pydash.get(a, f'{lower_branch}\.name.pseudonym_for.forename')                    
                    sur = pydash.get(a, f'{lower_branch}\.name.pseudonym_for.surname')    
                    
                else:
                    agent_id = pydash.get(a, f'{lower_branch}\.name.id_number')
                    fore = pydash.get(a, f'{lower_branch}\.name.forename')                    
                    sur = pydash.get(a, f'{lower_branch}\.name.surname')    
                
                if agent_id != '254560':
                    agent = name_sfi[f"resource/agent/{agent_id}"]            

                    claim1 = make_claim(work, name_fiaf['ontology/property/agent'], agent)
                    make_qual(claim1, name_fiaf['ontology/property/agent_type'], agent_type)                
                    reference(claim1, name_sfi['ontology/item/sfi'])   

                    make_claim(agent, name_fiaf['ontology/property/instance_of'], name_fiaf['ontology/item/agent']) 

                    claim2 = make_claim(agent, name_fiaf['ontology/property/external_id'], rdflib.Literal(agent_id))
                    make_qual(claim2, name_fiaf['ontology/property/institution'], name_sfi['ontology/item/sfi'])
                    reference(claim2, name_sfi['ontology/item/sfi']) 

                    if fore != None:
                        claim3 = make_claim(agent, name_fiaf['ontology/property/forename'], rdflib.Literal(fore))
                        reference(claim3, name_sfi['ontology/item/sfi'])                

                    claim4 = make_claim(agent, name_fiaf['ontology/property/surname'], rdflib.Literal(sur))
                    reference(claim4, name_sfi['ontology/item/sfi'])        

                    claim9 = make_claim(agent, name_fiaf['ontology/property/work'], work)    
                    reference(claim9, name_sfi['ontology/item/sfi'])              

for x in data:
    write_credit('Cast', 'cast', 'Cast', name_fiaf['ontology/item/cast'], x)
    write_credit('Credits', 'credit', 'Director', name_fiaf['ontology/item/director'], x) 
    write_credit('Credits', 'credit', 'Screenplay', name_fiaf['ontology/item/screenwriter'], x) 
    write_credit('Credits', 'credit', 'Producer', name_fiaf['ontology/item/producer'], x) 
    write_credit('Credits', 'credit', 'Director of Photography', name_fiaf['ontology/item/cinematographer'], x) 
    write_credit('Credits', 'credit', 'Music', name_fiaf['ontology/item/composer'], x)  
    write_credit('Credits', 'credit', 'Film Editor', name_fiaf['ontology/item/editor'], x)      
    
print(len(graph))


8696


In [11]:

# write events

for x in data:
    work = name_sfi[f"resource/work/{x['object_number']}"]      
    if 'Publication_event' in x:
        collected_events = list()
        for y in single_list(pydash.get(x, 'Publication_event')):
            event_type = y['event.publication.number']['event.sub_type']
            if pydash.get(event_type, 'value.0.#text') == 'Release in Sweden':
                collected_events.append(('swedish release', y['event.publication.number']['date_start']))
  
        collected_events = pydash.uniq([x for x in collected_events])
        
        for a,b in collected_events:
            claim1 = make_claim(work, name_fiaf['ontology/property/event'], rdflib.Literal(b))
            make_qual(claim1, name_fiaf['ontology/property/event_type'], name_fiaf['ontology/item/publication']) 
            make_qual(claim1, name_fiaf['ontology/property/country'], name_fiaf['ontology/item/sweden'])   
            reference(claim1, name_sfi['ontology/item/sfi'])             

for x in data:
    work = name_sfi[f"resource/work/{x['object_number']}"]      
    if 'Decision_event' in x:
        for c in single_list(pydash.get(x, 'Decision_event')):
            date = pydash.get(c, 'event\.decision\.number.date_start')
            cert = pydash.get(c, 'event\.decision\.number.decision\.audience_rating.value.0.#text')
            
            claim1 = make_claim(work, name_fiaf['ontology/property/event'], rdflib.Literal(date))
            make_qual(claim1, name_fiaf['ontology/property/event_type'], name_fiaf['ontology/item/decision_censorship']) 
            make_qual(claim1, name_fiaf['ontology/property/country'], name_fiaf['ontology/item/sweden'])  
            make_qual(claim1, name_fiaf['ontology/property/certificate'], rdflib.Literal(cert))              
            reference(claim1, name_sfi['ontology/item/sfi'])         
            
print(len(graph))   


8982


In [12]:

# write manifestations/items

for x in data:
    work = name_sfi[f"resource/work/{x['object_number']}"]  
    parts = single_list(pydash.get(x, 'Parts.parts\.reference.Parts'))
    
    for p in [x for x in parts if x != None]:
        
        manifestation = name_sfi[f"resource/manifestation/{uuid.uuid4()}"]
        make_claim(manifestation, name_fiaf['ontology/property/instance_of'], name_fiaf['ontology/item/manifestation'])
        make_claim(manifestation, name_fiaf['ontology/property/manifestation_of'], work)

        item_id = pydash.get(p, 'parts\.reference.object_number')
        item = name_sfi[f"resource/item/{item_id}"]     

        make_claim(item, name_fiaf['ontology/property/instance_of'], name_fiaf['ontology/item/item'])        
        make_claim(item, name_fiaf['ontology/property/item_of'], manifestation)  

        claim1 = make_claim(item, name_fiaf['ontology/property/held_at'], name_sfi['ontology/item/sfi'])
        reference(claim1, name_sfi['ontology/item/sfi'])          
        
        claim2 = make_claim(item, name_fiaf['ontology/property/external_id'], rdflib.Literal(item_id))
        make_qual(claim2, name_fiaf['ontology/property/institution'], name_sfi['ontology/item/sfi'])
        reference(claim2, name_sfi['ontology/item/sfi'])         

        claim_id = make_claim(item, name_fiaf['ontology/property/carrier'], name_fiaf['ontology/item/film'])   
        reference(claim_id, name_sfi['ontology/item/sfi'])             
        
        for k, v in {'16 mm':name_fiaf['ontology/item/16mm'], '35 mm':name_fiaf['ontology/item/35mm']}.items():
            if pydash.get(p, 'parts\.reference.carrier_type_specific.value.0.#text') == k:                       
                claim3 = make_claim(item, name_fiaf['ontology/property/specific_carrier'], v)   
                reference(claim3, name_sfi['ontology/item/sfi'])                 

        for k, v in {'Acetate':name_fiaf['ontology/item/acetate']}.items():
            if pydash.get(p, 'parts\.reference.Material.material.value.0.#text') == k:
                claim4 = make_claim(item, name_fiaf['ontology/property/base'], v)   
                reference(claim4, name_sfi['ontology/item/sfi'])                

        for k, v in {'Print':name_fiaf['ontology/item/print'], 'Duplicate negative':name_fiaf['ontology/item/duplicate_negative']}.items():
            if pydash.get(p, 'parts\.reference.Object_name.object_name.value.0.#text') == k:
                claim5 = make_claim(item, name_fiaf['ontology/property/element'], v)   
                reference(claim5, name_sfi['ontology/item/sfi'])                
                
        ext = pydash.get(p, 'parts\.reference.Dimension.dimension\.value')
        if ext:
            claim6 = make_claim(item, name_fiaf['ontology/property/extent_metres'], rdflib.Literal(ext))
            reference(claim6, name_sfi['ontology/item/sfi'])             

        make_claim(work, name_fiaf['ontology/property/manifestation'], manifestation)
        make_claim(manifestation, name_fiaf['ontology/property/item'], item)                    

print(len(graph)) 


9316


In [13]:

graph.serialize(destination=str(pathlib.Path.cwd() / 'sfi.ttl'), format="turtle")
print(len(graph))


9316
