In [1]:

# Library of Congress


In [2]:

# import libraries

import rdflib, pandas, pathlib, json
import numpy, uuid, xmltodict, pydash


In [3]:

# define graph and namespace

graph = rdflib.Graph()
name_loc = rdflib.Namespace('https://loc.gov/') 
name_wb = rdflib.Namespace('http://wikibas.se/ontology')
name_fiaf = rdflib.Namespace('https://www.fiafnet.org/')


In [4]:

# useful functions

def make_claim(s, p, o):        
    claim_id = name_loc[f"resource/claim/{uuid.uuid4()}"]    
    graph.add((s, name_wb['#claim'], claim_id))
    graph.add((claim_id, p, o))
    return claim_id

def make_qual(s, p, o):
    qual_id = name_loc[f"resource/qualifier/{uuid.uuid4()}"]       
    graph.add((s, name_wb['#qualifier'], qual_id))
    graph.add((qual_id, p, o))
    return qual_id

def reference(claim_id, institute):
    ref_id = name_loc[f"resource/reference/{uuid.uuid4()}"]
    graph.add((claim_id, name_wb['#reference'], ref_id))
    graph.add((ref_id, name_fiaf['ontology/property/contributed_by'], institute))  
    
def single_list(data):  
    if isinstance(data, list):
        return data
    else:
        return [data]    


In [5]:

# define institution

graph.add((name_loc['ontology/item/loc'], rdflib.RDFS.label, rdflib.Literal('Library of Congress', lang='en'))) 
make_claim(name_loc['ontology/item/loc'], name_fiaf['ontology/property/instance_of'], name_fiaf['ontology/item/holding_institution'])
make_claim(name_loc['ontology/item/loc'], name_fiaf['ontology/property/located_in'], name_fiaf['ontology/item/usa'])

print(len(graph))


5


In [6]:

# format data

data = list()
for f in [x for x in pathlib.Path.cwd().glob('**/*.xml')]:   
    with open(f,  encoding='ISO-8859-1') as xml_data:
        element = xmltodict.parse(xml_data.read()) 
        data.append(single_list(pydash.get(element, 'mavis.TitleWork'))[0])
        
with open(pathlib.Path.cwd() / 'loc.json', 'w') as export:
    json.dump(data, export)
        
print(len(graph)) 


5


In [7]:

# write work 

for x in data:
    
    work_id = x['@xl:href'].split('/')[-1]
    work = name_loc[f"resource/work/{work_id}"]

    make_claim(work, name_fiaf['ontology/property/instance_of'], name_fiaf['ontology/item/work'])
    claim_id = make_claim(work, name_fiaf['ontology/property/external_id'], rdflib.Literal(work_id))
    make_qual(claim_id, name_fiaf['ontology/property/institution'], name_loc['ontology/item/loc'])
    reference(claim_id, name_loc['ontology/item/loc'])     

print(len(graph))


61


In [8]:

# write title

for x in data:
    
    work_id = x['@xl:href'].split('/')[-1]
    work = name_loc[f"resource/work/{work_id}"]

    selected_title = ''
    for t in ['preferredTitle.Title', 'alternateTitles.Title']:
        for y in single_list(pydash.get(x, t)):  
            if 'Original title' in str(y) or 'German' in str(y):
                selected_title = pydash.get(y, '@xl:title')
                title_type = name_fiaf['ontology/item/original_title'] 

    if selected_title == '':
        selected_title = pydash.get(x, '@xl:title')
        title_type = name_fiaf['ontology/item/work_title']  

    claim_id = make_claim(work, name_fiaf['ontology/property/title'], rdflib.Literal(selected_title[:-1]))
    make_qual(claim_id, name_fiaf['ontology/property/title_type'], title_type)
    reference(claim_id, name_loc['ontology/item/loc'])    
#     print(work_id, selected_title[:-1])

print(len(graph))


103


In [9]:

# write country

for x in data:
    
    work_id = x['@xl:href'].split('/')[-1]
    work = name_loc[f"resource/work/{work_id}"]
    
    country = pydash.get(x, 'countries.WorkCountry.@xl:title')
    if country == 'US':
        fiaf_country = name_fiaf['ontology/item/usa']
    elif country == 'GG':
        fiaf_country = name_fiaf['ontology/item/germany']
    else:
        raise Exception('Unknown country.')
        
    claim = make_claim(work, name_fiaf['ontology/property/production_country'], fiaf_country)  
    reference(claim, name_loc['ontology/item/loc'])     
    
print(len(graph))   


131


In [10]:

# write agents

def write_credit(work_data, dict_key, agent_type): 
    
    work_id = x['@xl:href'].split('/')[-1]
    work = name_loc[f"resource/work/{work_id}"]
    
    for a in pydash.get(x, 'roles.Name-Role'):
        if 'Person' in pydash.get(a, 'party') and pydash.get(a, 'role.@xl:title') == dict_key:

            forename = pydash.get(a, 'party.Person.preferredName.PersonName.firstName')
            surname = pydash.get(a, 'party.Person.preferredName.PersonName.name')
            contribution = pydash.get(a, 'role.@xl:title')
            key = pydash.get(a, 'party.Person.@xl:href').split('/')[-1]
            gend = pydash.get(a, 'party.Person.gender.@xl:title')
            
            agent = name_loc[f"resource/agent/{key}"]
                 
            claim1 = make_claim(work, name_fiaf['ontology/property/agent'], agent)
            make_qual(claim1, name_fiaf['ontology/property/agent_type'], agent_type)                
            reference(claim1, name_loc['ontology/item/loc']) 
            
            make_claim(agent, name_fiaf['ontology/property/instance_of'], name_fiaf['ontology/item/agent']) 

            claim2 = make_claim(agent, name_fiaf['ontology/property/external_id'], rdflib.Literal(key))
            make_qual(claim2, name_fiaf['ontology/property/institution'], name_loc['ontology/item/loc'])
            reference(claim2, name_loc['ontology/item/loc'])  
            
            if forename != None:
                claim3 = make_claim(agent, name_fiaf['ontology/property/forename'], rdflib.Literal(forename))
                reference(claim3, name_loc['ontology/item/loc'])          

            claim4 = make_claim(agent, name_fiaf['ontology/property/surname'], rdflib.Literal(surname))
            reference(claim4, name_loc['ontology/item/loc'])  
            
            if gend == 'Male':
                claim_id = make_claim(agent, name_fiaf['ontology/property/gender'], name_fiaf['ontology/item/male'])
                reference(claim_id, name_loc['ontology/item/loc']) 

            if gend == 'Female':
                claim_id = make_claim(agent, name_fiaf['ontology/property/gender'], name_fiaf['ontology/item/female'])
                reference(claim_id, name_loc['ontology/item/loc']) 

            claim_id = make_claim(agent, name_fiaf['ontology/property/work'], work)    
            reference(claim_id, name_loc['ontology/item/loc'])        

for x in data:
    write_credit(x, 'Cast/Actor', name_fiaf['ontology/item/cast'])
    write_credit(x, 'Director', name_fiaf['ontology/item/director'])
    write_credit(x, 'Producer', name_fiaf['ontology/item/producer'])   
    write_credit(x, 'Cinematographer/Director of Photography', name_fiaf['ontology/item/cinematographer'])   
    write_credit(x, 'Scriptwriter', name_fiaf['ontology/item/screenwriter'])  
    write_credit(x, 'Music Composer', name_fiaf['ontology/item/composer'])      
        
print(len(graph))    


1179


In [11]:

# write events
    
for x in data:
    
    work_id = x['@xl:href'].split('/')[-1]
    work = name_loc[f"resource/work/{work_id}"]
    
    date_data = pydash.get(x, 'objectDates.Date-Year')
    for y in [a for a in single_list(date_data) if pydash.get(a, 'dateType.@xl:title') == 'Copyright']:
        date = pydash.get(y, 'yearFrom')
        date += f"-{pydash.get(y, 'monthFrom').zfill(2)}"
        date += f"-{pydash.get(y, 'dayFrom').zfill(2)}"

        claim_id = make_claim(work, name_fiaf['ontology/property/event'], rdflib.Literal(date))
        make_qual(claim_id, name_fiaf['ontology/property/event_type'], name_fiaf['ontology/item/decision_copyright']) 
        make_qual(claim_id, name_fiaf['ontology/property/country'], name_fiaf['ontology/item/usa'])              
        reference(claim_id, name_loc['ontology/item/loc']) 
        
print(len(graph))        


1195


In [12]:

# write manifestations/items

for x in data:
    
    work_id = x['@xl:href'].split('/')[-1]
    work = name_loc[f"resource/work/{work_id}"]

    items = list()    
    for c in [x['components'][y] for y in x['components']]:
        c = single_list(c)
        for y in c:
            items.append(y)
            
    for i in items:
        
        manifestation = name_loc[f"resource/manifestation/{uuid.uuid4()}"]
        make_claim(manifestation, name_fiaf['ontology/property/instance_of'], name_fiaf['ontology/item/manifestation'])
        make_claim(manifestation, name_fiaf['ontology/property/manifestation_of'], work)

        item_id = pydash.get(i, 'itemId')
        item = name_loc[f"resource/item/{item_id}"]     

        make_claim(item, name_fiaf['ontology/property/instance_of'], name_fiaf['ontology/item/item'])        
        make_claim(item, name_fiaf['ontology/property/item_of'], manifestation)  

        claim_id = make_claim(item, name_fiaf['ontology/property/held_at'], name_loc['ontology/item/loc'])
        reference(claim_id, name_loc['ontology/item/loc'])          
        
        claim_id = make_claim(item, name_fiaf['ontology/property/external_id'], rdflib.Literal(item_id))
        make_qual(claim_id, name_fiaf['ontology/property/institution'], name_loc['ontology/item/loc'])
        reference(claim_id, name_loc['ontology/item/loc']) 
    
        for k, v in {'Safety':name_fiaf['ontology/item/film'], 'Nitrate':name_fiaf['ontology/item/film'],
                     'Digital':name_fiaf['ontology/item/digital'], 'Video':name_fiaf['ontology/item/video_tape'],
                     'Tape':name_fiaf['ontology/item/sound_tape'], 'Disc':name_fiaf['ontology/item/disc']}.items():
            if pydash.get(i, 'itemType.@xl:title') == k:
                claim_id = make_claim(item, name_fiaf['ontology/property/carrier'], v)   
                reference(claim_id, name_loc['ontology/item/loc'])      

        for k, v in {'16mm':name_fiaf['ontology/item/16mm'], '35mm':name_fiaf['ontology/item/35mm']}.items():
            if pydash.get(i, 'gauge.@xl:title') == k:
                claim_id = make_claim(item, name_fiaf['ontology/property/specific_carrier'], v)   
                reference(claim_id, name_loc['ontology/item/loc'])         
        
        for k, v in {'Composite Positive':name_fiaf['ontology/item/print'], 'Duplicate Negative Track':name_fiaf['ontology/item/duplicate_negative'],
                     'Duplicate Negative Picture':name_fiaf['ontology/item/duplicate_negative'], 'Positive Picture':name_fiaf['ontology/item/duplicate_positive']}.items():
            if pydash.get(i, 'techCode.@xl:title') == k:
                claim_id = make_claim(item, name_fiaf['ontology/property/element'], v)   
                reference(claim_id, name_loc['ontology/item/loc'])      
    
        for k, v in {'Access':name_fiaf['ontology/item/viewing'], 'Preservation Copy':name_fiaf['ontology/item/master'],
                     'Access/Browsing copy':name_fiaf['ontology/item/viewing'], 'Limited Access':name_fiaf['ontology/item/restricted'],
                     'Preservation Material':name_fiaf['ontology/item/master']}.items():
            if pydash.get(i, 'categoryMaterial.@xl:title') == k:
                claim_id = make_claim(item, name_fiaf['ontology/property/access'], v)   
                reference(claim_id, name_loc['ontology/item/loc'])         
        
        for k, v in {'Safety':name_fiaf['ontology/item/acetate'], 'Nitrate':name_fiaf['ontology/item/nitrate']}.items():
            if pydash.get(i, 'itemType.@xl:title') == k:
                claim_id = make_claim(item, name_fiaf['ontology/property/base'], v)   
                reference(claim_id, name_loc['ontology/item/loc'])         
        
        make_claim(work, name_fiaf['ontology/property/manifestation'], manifestation)
        make_claim(manifestation, name_fiaf['ontology/property/item'], item)                    

print(len(graph)) 


2415


In [13]:

graph.serialize(destination=str(pathlib.Path.cwd() / 'loc.ttl'), format="turtle")
print(len(graph))


2415
