In [1]:

# british film institute


In [2]:

# import libraries

import rdflib, pandas, pathlib, json
import numpy, uuid, xmltodict, pydash


In [3]:

# define graph and namespace

graph = rdflib.Graph()
name_bfi = rdflib.Namespace('https://www.bfi.org.uk/') 
name_wb = rdflib.Namespace('http://wikibas.se/ontology')
name_fiaf = rdflib.Namespace('https://www.fiafnet.org/')


In [4]:

# useful functions

def make_claim(s, p, o):        
    claim_id = name_bfi[f"resource/claim/{uuid.uuid4()}"]    
    graph.add((s, name_wb['#claim'], claim_id))
    graph.add((claim_id, p, o))
    return claim_id

def make_qual(s, p, o):
    qual_id = name_bfi[f"resource/qualifier/{uuid.uuid4()}"]       
    graph.add((s, name_wb['#qualifier'], qual_id))
    graph.add((qual_id, p, o))
    return qual_id

def reference(claim_id, institute):
    ref_id = name_bfi[f"resource/reference/{uuid.uuid4()}"]
    graph.add((claim_id, name_wb['#reference'], ref_id))
    graph.add((ref_id, name_fiaf['ontology/property/contributed_by'], institute))  
    
def single_list(data):  
    if isinstance(data, list):
        return data
    else:
        return [data]    


In [5]:

# define institution

graph.add((name_bfi['ontology/item/bfi'], rdflib.RDFS.label, rdflib.Literal('British Film Institute', lang='en'))) 
make_claim(name_bfi['ontology/item/bfi'], name_fiaf['ontology/property/instance_of'], name_fiaf['ontology/item/holding_institution'])
make_claim(name_bfi['ontology/item/bfi'], name_fiaf['ontology/property/located_in'], name_fiaf['ontology/item/uk'])

print(len(graph)) 


5


In [6]:

# format data

path = pathlib.Path.home() / 'murnau-data' / 'british_film_institute'

with open(path / 'BFI_Murnau_Works.json') as data:    
    data = [x for x in pydash.get(json.load(data), 'adlibJSON.recordList.record')] 

print(len(graph)) 


5


In [7]:

# write work

graph.add((name_bfi['ontology/item/wikidata'], rdflib.RDFS.label, rdflib.Literal('Wikidata', lang='en'))) 

for x in data:
    work_id = x['object_number'][0]
    work = name_bfi[f"resource/work/{work_id}"]
    
    make_claim(work, name_fiaf['ontology/property/instance_of'], name_fiaf['ontology/item/work'])
    claim1 = make_claim(work, name_fiaf['ontology/property/external_id'], rdflib.Literal(work_id))
    make_qual(claim1, name_fiaf['ontology/property/institution'], name_bfi['ontology/item/bfi'])
    reference(claim1, name_bfi['ontology/item/bfi']) 

    for y in pydash.get(x, 'URL'):
        if 'Wikidata' in pydash.get(y, 'URL\.description.0'):            
            wikidata_id = pydash.get(y, 'URL.0').split('/')[-1]
            claim_id = make_claim(work, name_fiaf['ontology/property/external_id'], rdflib.Literal(wikidata_id))
            make_qual(claim_id, name_fiaf['ontology/property/institution'], name_bfi['ontology/item/wikidata'])
            reference(claim_id, name_bfi['ontology/item/bfi'])             

    if pydash.get(x, 'worklevel_type.0.value.1') == 'Monographic':
        claim_id = make_claim(work, name_fiaf['ontology/property/work_type'], name_fiaf['ontology/item/monographic'])
        reference(claim_id, name_bfi['ontology/item/bfi'])

print(len(graph))


324


In [8]:

# write title

for x in data:
    
    work_id = x['object_number'][0]
    work = name_bfi[f"resource/work/{work_id}"]
    
    orig = [y for y in pydash.get(x, 'Title') if 'Original' in str(y)][0]
    title = pydash.get(orig, 'title')[0]
    if pydash.get(orig, 'title\.article') is not None:
        title = pydash.get(orig, 'title\.article')[0]+' '+title
        
    claim1 = make_claim(work, name_fiaf['ontology/property/title'], rdflib.Literal(title.strip()))
    make_qual(claim1, name_fiaf['ontology/property/title_type'], name_fiaf['ontology/item/original_title'])
    reference(claim1, name_bfi['ontology/item/bfi'])      

print(len(graph))


450


In [9]:

# write country

for x in data:
    
    work_id = x['object_number'][0]
    work = name_bfi[f"resource/work/{work_id}"]
    
    for k, v in {'Germany':name_fiaf['ontology/item/germany'], 'USA':name_fiaf['ontology/item/usa']}.items():
        if pydash.get(x, 'production_country.0.term.0') == k:
            claim_id = make_claim(work, name_fiaf['ontology/property/production_country'], v)  
            reference(claim_id, name_bfi['ontology/item/bfi'])         

print(len(graph))


534


In [10]:

# write agent

def write_credit(source, contribution, uri):

    for s in [x for x in source if x['role'] == contribution]:
        
        work = s['work']
        agent = name_bfi[f"resource/agent/{s['id']}"]            

        claim_id = make_claim(work, name_fiaf['ontology/property/agent'], agent)
        make_qual(claim_id, name_fiaf['ontology/property/agent_type'], uri)                
        reference(claim_id, name_bfi['ontology/item/bfi'])   

        make_claim(agent, name_fiaf['ontology/property/instance_of'], name_fiaf['ontology/item/agent']) 

        claim_id = make_claim(agent, name_fiaf['ontology/property/external_id'], rdflib.Literal(s['id']))
        make_qual(claim_id, name_fiaf['ontology/property/institution'], name_bfi['ontology/item/bfi'])
        reference(claim_id, name_bfi['ontology/item/bfi']) 
        
        claim_id = make_claim(agent, name_fiaf['ontology/property/surname'], rdflib.Literal(s['name'][0].strip()))
        reference(claim_id, name_bfi['ontology/item/bfi']) 
        
        if len(s['name']) > 1:
            claim_id = make_claim(agent, name_fiaf['ontology/property/forename'], rdflib.Literal(s['name'][1].strip()))
            reference(claim_id, name_bfi['ontology/item/bfi'])             

        claim_id = make_claim(agent, name_fiaf['ontology/property/work'], work)    
        reference(claim_id, name_bfi['ontology/item/bfi'])               

combined = list()
for x in data:
    work_id = x['object_number'][0]
    work = name_bfi[f"resource/work/{work_id}"]

    for y in pydash.get(x, 'cast'):
        name = pydash.get(y, 'cast\.name.0.name')[0].split(',')
        credit_type = pydash.get(y, 'cast\.name.0.party\.class.0.value')[0]
        agent_id = pydash.get(y, 'cast\.name\.lref')[0]
        combined.append({'work': work, 'id':agent_id, 'name':name, 'type':credit_type, 'role':'Cast'})
        
    for y in pydash.get(x, 'credits'):
        name = pydash.get(y, 'credit\.name.0.name')[0].split(',')
        credit_type = pydash.get(y, 'credit\.name.0.party\.class.0.value')[0]        
        role = pydash.get(y, 'credit\.type.0.term')[0]
        agent_id = pydash.get(y, 'credit\.name\.lref')[0]
        combined.append({'work': work, 'id':agent_id, 'name':name, 'type':credit_type, 'role':role})    
        
combined = [x for x in combined if x['type'] == 'PERSON']  

write_credit(combined, 'Cast', name_fiaf['ontology/item/cast'])                    
write_credit(combined, 'Director', name_fiaf['ontology/item/director']) 
write_credit(combined, 'Screenplay', name_fiaf['ontology/item/screenwriter']) 
write_credit(combined, 'Producer', name_fiaf['ontology/item/producer']) 
write_credit(combined, 'Photography', name_fiaf['ontology/item/cinematographer']) 
write_credit(combined, 'Music', name_fiaf['ontology/item/composer'])  
write_credit(combined, 'Editor', name_fiaf['ontology/item/editor'])      

print(len(graph))


10408


In [11]:

# write manifestations/items

items = list()
for x in data:
    work_id = x['object_number'][0]
    for manifestation in pydash.get(x, 'Parts'):
        for item in pydash.get(manifestation, 'parts_reference'):
            if 'Parts' in item:
                for carrier in pydash.get(item, 'Parts'):
                    
                    carrier = pydash.get(carrier, 'parts_reference.0')
                    
                    item_id = pydash.get(carrier, 'object_number.0')  
                    copy_status = pydash.get(carrier, 'copy_status.0.value.1')                    
                    item_type = pydash.get(carrier, 'item_type.0.value.0') 
                    sound = pydash.get(carrier, 'sound_item.0.value.1') 
                    base = pydash.get(carrier, 'base.0.value.1')   
                    phys = pydash.get(carrier, 'physical_description')   
                    gauge = pydash.get(carrier, 'gauge_film.0.value.1')  
                    vid_form = pydash.get(carrier, 'video_format.0.value.0') 
                    if 'Dimension' in carrier:
                        duration = [y for y in pydash.get(carrier, 'Dimension') if pydash.get(y, 'dimension\.part.0') == 'Total']
                        duration = pydash.get(duration, '0.dimension\.value.0')
                    else:
                        duration = None
                    
                    items.append({'work_id':work_id, 'item_id':item_id, 'copy':copy_status, 'item_type':item_type, 
                                  'sound':sound, 'base':base, 'element':phys, 'gauge':gauge, 'video':vid_form, 'dure':duration})

for i in items:
    if i['copy'] != 'Removed':

        work = name_bfi[f"resource/work/{i['work_id']}"]

        manifestation = name_bfi[f"resource/manifestation/{uuid.uuid4()}"]
        make_claim(manifestation, name_fiaf['ontology/property/instance_of'], name_fiaf['ontology/item/manifestation'])
        make_claim(manifestation, name_fiaf['ontology/property/manifestation_of'], work)

        item_id = i['item_id']
        item = name_bfi[f"resource/item/{item_id}"]     

        make_claim(item, name_fiaf['ontology/property/instance_of'], name_fiaf['ontology/item/item'])        
        make_claim(item, name_fiaf['ontology/property/item_of'], manifestation)  

        claim_id = make_claim(item, name_fiaf['ontology/property/held_at'], name_bfi['ontology/item/bfi'])
        reference(claim_id, name_bfi['ontology/item/bfi'])          

        claim_id = make_claim(item, name_fiaf['ontology/property/external_id'], rdflib.Literal(item_id))
        make_qual(claim_id, name_fiaf['ontology/property/institution'], name_bfi['ontology/item/bfi'])
        reference(claim_id, name_bfi['ontology/item/bfi'])  

        for k, v in {'FILM':name_fiaf['ontology/item/film'], 
                     'VIDEO':name_fiaf['ontology/item/video_tape'], 'DIGITAL':name_fiaf['ontology/item/digital']}.items():
            if i['item_type'] == k:                       
                claim_id = make_claim(item, name_fiaf['ontology/property/carrier'], v)   
                reference(claim_id, name_bfi['ontology/item/bfi'])  
                
        for k, v in {'16mm':name_fiaf['ontology/item/16mm'], '35mm':name_fiaf['ontology/item/35mm']}.items():
            if i['gauge'] == k:                       
                claim_id = make_claim(item, name_fiaf['ontology/property/specific_carrier'], v)   
                reference(claim_id, name_bfi['ontology/item/bfi'])  
                
        for k, v in {'Safety':name_fiaf['ontology/item/acetate'], 'Acetate':name_fiaf['ontology/item/acetate'],
                     'Nitrate':name_fiaf['ontology/item/nitrate'], 'Polyester':name_fiaf['ontology/item/polyester']}.items():
            if i['base'] == k:                       
                claim_id = make_claim(item, name_fiaf['ontology/property/base'], v)   
                reference(claim_id, name_bfi['ontology/item/bfi'])                  

        for k, v in {'Silent':name_fiaf['ontology/item/silent'], 'Combined':name_fiaf['ontology/item/sound'],
                     'Mute':name_fiaf['ontology/item/silent'], 'Mixed':name_fiaf['ontology/item/sound']}.items():
            if i['sound'] == k:                       
                claim_id = make_claim(item, name_fiaf['ontology/property/sound'], v)   
                reference(claim_id, name_bfi['ontology/item/bfi'])                  

        for k, v in {'Master':name_fiaf['ontology/item/master'], 'Viewing':name_fiaf['ontology/item/viewing']}.items():
            if i['copy'] == k:                       
                claim_id = make_claim(item, name_fiaf['ontology/property/access'], v)   
                reference(claim_id, name_bfi['ontology/item/bfi'])         
                
        for k, v in {
            'Dupe Negative':name_fiaf['ontology/item/duplicate_negative'], 
            'BW Positive':name_fiaf['ontology/item/print'], 
            'Negative':name_fiaf['ontology/item/negative'], 
            'Duplicating Positive':name_fiaf['ontology/item/duplicate_positive'], 
            'Colour Positive':name_fiaf['ontology/item/print']}.items():
            if i['element'] == k:                       
                claim_id = make_claim(item, name_fiaf['ontology/property/element'], v)   
                reference(claim_id, name_bfi['ontology/item/bfi']) 
                    
        if 'BW' in str(i['element']):
            claim_id = make_claim(item, name_fiaf['ontology/property/colour'], name_fiaf['ontology/item/black_and_white'])   
            reference(claim_id, name_bfi['ontology/item/bfi'])   
            
        if 'Colour' in str(i['element']):
            claim_id = make_claim(item, name_fiaf['ontology/property/colour'], name_fiaf['ontology/item/colour'])   
            reference(claim_id, name_bfi['ontology/item/bfi'])   

        for k, v in {'VHS':name_fiaf['ontology/item/master'], 'DVD':name_fiaf['ontology/item/viewing'],
                     'DB':name_fiaf['ontology/item/digibeta']}.items():
            if i['video'] == k:                       
                claim_id = make_claim(item, name_fiaf['ontology/property/specific_carrier'], v)   
                reference(claim_id, name_bfi['ontology/item/bfi'])  

        if i['dure']:
            claim_id = make_claim(item, name_fiaf['ontology/property/extent_feet'], rdflib.Literal(i['dure']))
            reference(claim_id, name_bfi['ontology/item/bfi'])               
            
        make_claim(work, name_fiaf['ontology/property/manifestation'], manifestation)
        make_claim(manifestation, name_fiaf['ontology/property/item'], item)   

print(len(graph))


12438


In [12]:

graph.serialize(destination=str(pathlib.Path.cwd() / 'british_film_institute.ttl'), format="turtle")
print(len(graph))


12438
