In [1]:

# Academy Film Archive


In [2]:

# import libraries

import rdflib, pandas, pathlib, json
import numpy, uuid, xmltodict, pydash


In [3]:
# define graph and namespace

graph = rdflib.Graph()
name_afa = rdflib.Namespace('https://www.oscars.org/film-archive') 
name_wb = rdflib.Namespace('http://wikibas.se/ontology')
name_fiaf = rdflib.Namespace('https://www.fiafnet.org/')


In [4]:

# useful functions

def make_claim(s, p, o):        
    claim_id = name_afa[f"resource/claim/{uuid.uuid4()}"]    
    graph.add((s, name_wb['#claim'], claim_id))
    graph.add((claim_id, p, o))
    return claim_id

def make_qual(s, p, o):
    qual_id = name_afa[f"resource/qualifier/{uuid.uuid4()}"]       
    graph.add((s, name_wb['#qualifier'], qual_id))
    graph.add((qual_id, p, o))
    return qual_id

def reference(claim_id, institute):
    ref_id = name_afa[f"resource/reference/{uuid.uuid4()}"]
    graph.add((claim_id, name_wb['#reference'], ref_id))
    graph.add((ref_id, name_fiaf['ontology/property/contributed_by'], institute))  
    
def single_list(data):  
    if isinstance(data, list):
        return data
    else:
        return [data]    


In [5]:

# define institution

graph.add((name_afa['ontology/item/afa'], rdflib.RDFS.label, rdflib.Literal('Academy Film Archive', lang='en'))) 
make_claim(name_afa['ontology/item/afa'], name_fiaf['ontology/property/instance_of'], name_fiaf['ontology/item/holding_institution'])
make_claim(name_afa['ontology/item/afa'], name_fiaf['ontology/property/located_in'], name_fiaf['ontology/item/usa'])

print(len(graph)) 


5


In [6]:

# format data

with open(pathlib.Path.cwd() / 'AFA_murnau_Works_2021-01-13.xml') as work_xml:    
    work_data = pydash.get(xmltodict.parse(work_xml.read()), 'adlibXML.recordList.record')

with open(pathlib.Path.cwd() / 'AFA_murnau_items_carriers_2021-01-11.xml') as item_xml:    
    item_data = pydash.get(xmltodict.parse(item_xml.read()), 'adlibXML.recordList.record')
    
print(len(graph))


5


In [7]:

# write work

for x in work_data:
    
    work_id = x['object_number']
    work = name_afa[f"resource/work/{work_id}"]
    
    make_claim(work, name_fiaf['ontology/property/instance_of'], name_fiaf['ontology/item/work'])
    claim_id = make_claim(work, name_fiaf['ontology/property/external_id'], rdflib.Literal(work_id))
    make_qual(claim_id, name_fiaf['ontology/property/institution'], name_afa['ontology/item/afa'])
    reference(claim_id, name_afa['ontology/item/afa'])     

print(len(graph))


85


In [8]:

# write title

for x in work_data:

    work_id = x['object_number']
    work = name_afa[f"resource/work/{work_id}"]
    
    orig = [y for y in pydash.get(x, 'Title') if 'origin' in str(y)][0]
    title = pydash.get(orig, 'title')

    claim_id = make_claim(work, name_fiaf['ontology/property/title'], rdflib.Literal(title.strip()))
    make_qual(claim_id, name_fiaf['ontology/property/title_type'], name_fiaf['ontology/item/original_title'])
    reference(claim_id, name_afa['ontology/item/afa'])      

print(len(graph))


145


In [9]:

# write country

for x in work_data:
    
    work_id = x['object_number']
    work = name_afa[f"resource/work/{work_id}"]
        
    for k, v in {'Germany':name_fiaf['ontology/item/germany'], 'United States':name_fiaf['ontology/item/usa']}.items():
        if pydash.get(x, 'Production.production_country') == k:
            claim_id = make_claim(work, name_fiaf['ontology/property/production_country'], v)  
            reference(claim_id, name_afa['ontology/item/afa'])        

print(len(graph))


185


In [10]:

# write agent

def write_credit(source, contribution, uri):

    for s in [x for x in source if x['type'] == contribution]:
    
        work = s['work']
        agent = name_afa[f"resource/agent/{s['id']}"]            

        claim_id = make_claim(work, name_fiaf['ontology/property/agent'], agent)
        make_qual(claim_id, name_fiaf['ontology/property/agent_type'], uri)                
        reference(claim_id, name_afa['ontology/item/afa']) 

        make_claim(agent, name_fiaf['ontology/property/instance_of'], name_fiaf['ontology/item/agent']) 

        claim_id = make_claim(agent, name_fiaf['ontology/property/external_id'], rdflib.Literal(s['id']))
        make_qual(claim_id, name_fiaf['ontology/property/institution'], name_afa['ontology/item/afa'])
        reference(claim_id, name_afa['ontology/item/afa'])
        
        name = s['name'].split(',')

        claim_id = make_claim(agent, name_fiaf['ontology/property/forename'], rdflib.Literal(name[1].strip()))
        reference(claim_id, name_afa['ontology/item/afa'])        
        
        claim_id = make_claim(agent, name_fiaf['ontology/property/surname'], rdflib.Literal(name[0].strip()))
        reference(claim_id, name_afa['ontology/item/afa']) 

        claim_id = make_claim(agent, name_fiaf['ontology/property/work'], work)    
        reference(claim_id, name_afa['ontology/item/afa'])               

combined = list()
for x in work_data:
    
    work_id = x['object_number']
    work = name_afa[f"resource/work/{work_id}"]    
    
    if 'credits' in x:
        for y in pydash.get(x, 'credits'):
            name = pydash.get(y, 'credit\.name.name')
            agent_id = pydash.get(y, 'credit\.name\.lref')
            agent_type = pydash.get(y, 'credit\.type')
            combined.append({'work': work, 'id':agent_id, 'name':name, 'type':agent_type})
        
    if 'director_credit' in x:
        director = single_list(pydash.get(x, 'director_credit'))
        for y in director:
            name = pydash.get(y, 'director.name')
            agent_id = pydash.get(y, 'director\.lref')
            agent_type = pydash.get(y, 'director\.type')
            combined.append({'work': work, 'id':agent_id, 'name':name, 'type':agent_type})
                  
write_credit(combined, 'Cast', name_fiaf['ontology/item/cast'])                    
write_credit(combined, 'Director', name_fiaf['ontology/item/director']) 
write_credit(combined, 'Writer', name_fiaf['ontology/item/screenwriter']) 
write_credit(combined, 'Producer', name_fiaf['ontology/item/producer']) 
write_credit(combined, 'Director of photography', name_fiaf['ontology/item/cinematographer']) 
write_credit(combined, 'Music', name_fiaf['ontology/item/composer'])  
write_credit(combined, 'Editor', name_fiaf['ontology/item/editor'])      

print(len(graph))        
        

965


In [11]:

# write manifestations/items

manifestation_dict = dict()
for x in work_data:
    if 'Parts' in x:
        for y in single_list(pydash.get(x, 'Parts')):
            manif_id = y['parts_reference']
            manifestation_dict[manif_id] = x['object_number']
            
items = list()
for x in [i for i in item_data if i['object_number'][0] == 'I']:
    work_id = manifestation_dict[pydash.get(x, 'Part_of.part_of_reference')]  
    item_id = pydash.get(x, 'object_number')
    
    item_carrier = pydash.get(x, 'item_type.value.0.#text') # note video is not split
    
    video_carrier = pydash.get(x, 'material_type_video') # this has video tape and disc split
    audio_carrier = pydash.get(x, 'material_type_audio') # this has audio tape
    film_carrier = pydash.get(x, 'material_type_film') # this has safety or nitrate split    
    
    film_specific = pydash.get(x, 'material_format_film') # like release print
    video_specific = pydash.get(x, 'material_format_video') # like digibeta
    audio_specific = pydash.get(x, 'material_format_audio') # like 1/4" tape 
    
    gauge = pydash.get(x, 'gauge.gauge_film') # has film gauge but also other length
    colour = pydash.get(x, 'image_color') # colour
    base = pydash.get(x, 'base') # base 
    length = pydash.get(x, 'footage_length') # extent    
    duration = pydash.get(x, 'duration') # duration
    usage = pydash.get(x, 'copy_usage') # usage
    
    items.append({'work_id':work_id, 'item_id': item_id, 'item_carrier': item_carrier, 'video_carrier': video_carrier, 
                  'audio_carrier': audio_carrier, 'film_specific': film_specific, 
                  'video_specific': video_specific, 'audio_specific': audio_specific, 'gauge': gauge, 
                  'colour': colour, 'base': base, 'length': length, 'duration': duration, 'usage': usage})
    
print(len(graph))    


965


In [12]:

# process manifestations/items

for i in items:

    work = name_afa[f"resource/work/{i['work_id']}"]

    manifestation = name_afa[f"resource/manifestation/{uuid.uuid4()}"]
    make_claim(manifestation, name_fiaf['ontology/property/instance_of'], name_fiaf['ontology/item/manifestation'])
    make_claim(manifestation, name_fiaf['ontology/property/manifestation_of'], work)

    item_id = i['item_id']
    item = name_afa[f"resource/item/{item_id}"]     

    make_claim(item, name_fiaf['ontology/property/instance_of'], name_fiaf['ontology/item/item'])        
    make_claim(item, name_fiaf['ontology/property/item_of'], manifestation)  

    claim_id = make_claim(item, name_fiaf['ontology/property/held_at'], name_afa['ontology/item/afa'])
    reference(claim_id, name_afa['ontology/item/afa'])          

    claim_id = make_claim(item, name_fiaf['ontology/property/external_id'], rdflib.Literal(item_id))
    make_qual(claim_id, name_fiaf['ontology/property/institution'], name_afa['ontology/item/afa'])
    reference(claim_id, name_afa['ontology/item/afa'])  

    for k, v in {'FILM':name_fiaf['ontology/item/film'], 
                 'DIGITAL':name_fiaf['ontology/item/digital']}.items():
        if i['item_carrier'] == k:                       
            claim_id = make_claim(item, name_fiaf['ontology/property/carrier'], v)   
            reference(claim_id, name_afa['ontology/item/afa'])  
            
    for k, v in {'Video Tape':name_fiaf['ontology/item/video_tape'], 'Disc':name_fiaf['ontology/item/disc']}.items():
        if i['video_carrier'] == k:                       
            claim_id = make_claim(item, name_fiaf['ontology/property/carrier'], v)   
            reference(claim_id, name_afa['ontology/item/afa'])    
            
    for k, v in {'Audio Tape':name_fiaf['ontology/item/sound_tape']}.items():
        if i['audio_carrier'] == k:                       
            claim_id = make_claim(item, name_fiaf['ontology/property/carrier'], v)   
            reference(claim_id, name_afa['ontology/item/afa'])             
            
    for k, v in {
        'fine grain master positive': name_fiaf['ontology/item/duplicate_positive'], 
        'picture negative': name_fiaf['ontology/item/negative'],
        'dupe picture negative': name_fiaf['ontology/item/duplicate_negative'], 
        'optical track negative': name_fiaf['ontology/item/sound_negative'],
        'dupe negative': name_fiaf['ontology/item/duplicate_negative'], 
        'Work print': name_fiaf['ontology/item/work_print'],
        'print': name_fiaf['ontology/item/print']}.items():
        if i['film_specific'] == k:                       
            claim_id = make_claim(item, name_fiaf['ontology/property/element'], v)   
            reference(claim_id, name_afa['ontology/item/afa']) 
  
    for k, v in {
        'Betacam SP': name_fiaf['ontology/item/betacamsp'],
        'Betamax': name_fiaf['ontology/item/betamax'],
        'D5': name_fiaf['ontology/item/d5'],
        'D2': name_fiaf['ontology/item/d2'],
        'VHS': name_fiaf['ontology/item/vhs'],
        'Hi-8mm': name_fiaf['ontology/item/hi8'],
        'HDCAM SR': name_fiaf['ontology/item/hdcam'],
        'Digital Betacam': name_fiaf['ontology/item/digibeta'],
        'Digital Video Disk (DVD)': name_fiaf['ontology/item/dvd']}.items():
        if i['video_specific'] == k:                       
            claim_id = make_claim(item, name_fiaf['ontology/property/specific_carrier'], v)   
            reference(claim_id, name_afa['ontology/item/afa'])  

    for k, v in {
        '1/4" Magnetic Tape': name_fiaf['ontology/item/quarter-inch']}.items():
        if i['audio_specific'] == k:                       
            claim_id = make_claim(item, name_fiaf['ontology/property/specific_carrier'], v)   
            reference(claim_id, name_afa['ontology/item/afa'])  

    for k, v in {'16mm':name_fiaf['ontology/item/16mm'], '35mm':name_fiaf['ontology/item/35mm'],
                 '8mm':name_fiaf['ontology/item/8mm']}.items():
        if i['gauge'] == k:                       
            claim_id = make_claim(item, name_fiaf['ontology/property/specific_carrier'], v)   
            reference(claim_id, name_afa['ontology/item/afa']) 
                 
    for k, v in {'color':name_fiaf['ontology/item/colour'], 'black and white':name_fiaf['ontology/item/black_and_white'],
                 'black and white (tinted)':name_fiaf['ontology/item/tinted']}.items():
        if i['colour'] == k:                       
            claim_id = make_claim(item, name_fiaf['ontology/property/colour'], v)   
            reference(claim_id, name_afa['ontology/item/afa']) 
                
    for k, v in {'safety':name_fiaf['ontology/item/acetate'], 'acetate':name_fiaf['ontology/item/acetate'],
                 'nitrate':name_fiaf['ontology/item/nitrate'], 'polyester':name_fiaf['ontology/item/polyester']}.items():
        if i['base'] == k:                       
            claim_id = make_claim(item, name_fiaf['ontology/property/base'], v)   
            reference(claim_id, name_afa['ontology/item/afa'])                

    for k, v in {'Preservation':name_fiaf['ontology/item/master'], 'Conservation':name_fiaf['ontology/item/master'],
                 'Access':name_fiaf['ontology/item/viewing']}.items():
        if i['usage'] == k:                       
            claim_id = make_claim(item, name_fiaf['ontology/property/access'], v)   
            reference(claim_id, name_afa['ontology/item/afa'])         
    
    if i['length']:
        claim_id = make_claim(item, name_fiaf['ontology/property/extent_feet'], rdflib.Literal(i['length']))
        reference(claim_id, name_afa['ontology/item/afa'])         
             
    if i['duration']:
        claim_id = make_claim(item, name_fiaf['ontology/property/duration'], rdflib.Literal(i['duration']))
        reference(claim_id, name_afa['ontology/item/afa'])              
 
    make_claim(work, name_fiaf['ontology/property/manifestation'], manifestation)
    make_claim(manifestation, name_fiaf['ontology/property/item'], item) 

print(len(graph))


4685


In [13]:

graph.serialize(destination=str(pathlib.Path.cwd() / 'afa.ttl'), format="turtle")
print(len(graph))


4685
