# Catalogue data to ArCO

#### Imports

In [1]:
import json , rdflib , hashlib , re , requests
from rdflib import *
from rdflib import URIRef, Literal, Namespace, Graph
from rdflib.namespace import RDF , RDFS, DC , XSD
from rdflib.serializer import Serializer
import pandas as pd
from requests.auth import HTTPBasicAuth
import os.path

In [6]:
with open('rdf_transform/context.json') as context_arco:
    context_doc = json.load(context_arco)

def rdf_format(serializ):
    frmt = "xml" if serializ == "xml"\
            else "n3" if serializ == "xml"\
            else "ttl" if serializ == "turtle"\
            else "nt" if serializ == "nt"\
            else "xml" if serializ == "pretty-xml"\
            else "json" if serializ == "json-ld"\
            else "xml"
    return frmt

def unique_id(text):
    return hashlib.md5(text.encode()).hexdigest()

def get_emotion(txt, filename):
    """CALL CELI emotion annotation APIs"""
    try:
        data = '{"content": "'+txt+'"}'
        annotations = requests.post('https://sophia-cluster-dev.aws.celi.it/it/spice/analysis', auth=('live', 'analys1s0042'), data=data.encode('utf-8'))
        if annotations.status_code == 200:
            annotations_json = annotations.json()
            with open('emotions/'+filename+'.json', 'w') as f:
                json.dump(annotations_json, f)
        return annotations_json
    except Exception as e:
        pass


def artworks_to_rdf(input_json_file, 
                    output_rdf_file, 
                    serialization_format,
                    inst_name,
                    prefix,
                    artist=None,
                    artist_sep=None,
                    artwork_id=None,
                    artwork_type=None,
                    artwork_title=None,
                    inventory_num=None,
                    date=None,
                    date_sep=None,
                    technique=None,
                    dimensions=None,
                    description=None,
                    curator_emotions=None,
                    depicted_event=None,
                    depicted_characters=None,
                    depicted_objects=None,
                    web_page=None,
                    images=None,
                    social_media=None,
                    instagram=None,
                    twitter=None
                   ):
    
    """ 
    Transform a JSON file into RDF according to the ArCo ontology. 
    
    Parameters
    ----------
    
    input_json_file: str . 
        The path to the JSON file. It must be a list of dictionaries. 
        Every dictionary must represent an artefact.
    output_rdf_file: str . 
        The path and name of the output RDF file, format excluded.
    serialization_format: str . 
        The RDF serialization and file format. 
        Choose between: xml, n3, turtle, nt, pretty-xml, json-ld
    inst_name: str.
        The name of the data provider.
    prefix: str .
        The short name to be associated to the dataset. 
        Must be lower case, no spaces and special characters.
    artist: str , optional .
        The name of the key including the artist(s)' name(s).
        The value of the key must be a string.
    artist_sep: str, optional .
        If multiple artists are included in 'artist', use 'artist_sep' to specify the separator.
    artwork_id: str , optional .
        The name of the key including a unique identifier for the artefact.
        The value of the key must be a string.
    artwork_type: str , optional .
        The name of the key including a classification of the artefact (e.g. painting, sculpture).
        The value of the key must be a string.
    artwork_title: str , optional . 
        The name of the key including the title of the artefact.
        The value of the key must be a string.
    inventory_num: str , optional .
        The name of the key including the inventory number of the artefact.
        The value of the key must be a string.
    date: str , optional .
        The name of the key including the date of creation of the artefact.
        The value of the key must be a string.
    date_sep: str, optional .
        If multiple years are included in 'date', use 'date_sep' to specify the separator.
    technique: str , optional .
        The name of the key including the technique or material of the artefact.
        The value of the key must be a string.
    dimensions: str , optional .
        The name of the key including the dimensions (height x width) of the artefact.
        The value of the key must be a string.
    description: str , optional .
        The name of the key including a description of the artefact.
        The value of the key must be a string.
    curator_emotions: str , optional .
        The name of the key including a comma separated list of feelings or emotions triggered by the artefact according to the curator.
        The value of the key must be a string.
    depicted_event: str , optional .
        The name of the key including a comma separated list of events depicted in the artefact.
        The value of the key must be a string.
    depicted_characters: str , optional .
        The name of the key including a comma separated list of characters depicted in the artefact.
        The value of the key must be a string.
    depicted_objects: str , optional .
        The name of the key including a description of objects depicted in the artefact.
        The value of the key must be a string.
    web_page: str , optional .
        The name of the key including a webpage of the artefact.
        The value of the key must be a string.
    images: str , optional .
        The name of the key including one or more images of the artefact.
        The value of the key must be a list of strings.
    instagram: str , optional .
        The name of the key including one or more URL of social media posts related to the artefact. 
        The value of the key must be a list of strings.
    twitter: str , optional .
        The name of the key including one or more URL of social media posts related to the artefact. 
        The value of the key must be a list of strings.
    
    Returns
    -------
    The method serializes and stores the data into an output file.
    
    Example
    -------
    Assuming the input_json_file (data.json) looks like the following 
    [
        {
          "inventory": "19088",
          "id": "3120",
          "url_en": "https://example.org/item/chair",
          "title": "Chair",
          "designer": "Mao Tse Dong",
          "images_urls": ["https://example.org/chair_img.jpg"],
          "description_en": "This rotating and tilting desk chair from 1908 is an early design."
        }
        ...
    ]
    
    artworks_to_rdf(input_json_file='data.json', 
                    output_rdf_file='data_output', 
                    serialization_format='json-ld',
                    inst_name='Name Lastname (or organization name)',
                    prefix='nl',
                    artist='designer',
                    artist_sep=None,
                    artwork_id='inventory',
                    artwork_title='title',
                    inventory_num='inventory',
                    description='description_en',
                    web_page='url_en',
                    images='images_urls') 
    """
    
    g = Graph()
    
    # namespaces
    spice = "https://w3id.org/spice/"
    base = spice + prefix + '/'
    ARCO = Namespace("https://w3id.org/arco/ontology/arco/")
    ARCOCORE = Namespace("https://w3id.org/arco/ontology/core/")
    ARCOCD = Namespace("https://w3id.org/arco/ontology/context-description/")
    ARCODD = Namespace("https://w3id.org/arco/ontology/denotative-description/")
    schema = Namespace("http://schema.org/")
    earmark = Namespace("http://www.essepuntato.it/2008/12/earmark#")
    EMOTION = Namespace("https://w3id.org/spice/SON/emotion/")
    NARRATIVE = Namespace("https://w3id.org/spice/SON/Narrative-Labyrinth#")
    FC = Namespace("https://w3id.org/spice/SON/fruitionContext/")
    semiotics = Namespace("http://ontologydesignpatterns.org/cp/owl/semiotics.owl#")
    g.bind( prefix , base)
    g.bind( "dc" , DC)
    g.bind( "arco" , ARCO)
    g.bind( "arco-core" , ARCOCORE)
    g.bind( "arco-cd" , ARCOCD)
    g.bind( "arco-dd" , ARCODD)
    g.bind( "schema" , schema)
    g.bind( "emo" , EMOTION)
    g.bind( "earmark" , earmark)
    g.bind( "nar" , NARRATIVE)
    g.bind( "fc" , FC)
    g.bind( "semiotics" , semiotics)
    
    # parse JSON
    with open(input_json_file) as json_file:
        data = json.load(json_file)
        
        for artefact_dict in data:     
            # URIs 
            artefact = base+'artefact/'+artefact_dict[artwork_id].replace(" ", "") if artwork_id else unique_id(json.dumps(artefact_dict))
            art_id = artefact_dict[artwork_id]
            institute = spice + 'institute/' + prefix
            height_length = "https://w3id.org/arco/ontology/denotative-description/HeightLength"
            
            #inv = artefact_dict[inventory_num].replace(" ", "")
            art_type = artefact_dict[artwork_type] if (artwork_type and len(artefact_dict[artwork_type]) > 0) else None
            art_title = artefact_dict[artwork_title] if (artwork_title and len(artefact_dict[artwork_title]) > 0) else None
            dating = artefact_dict[date] if (date and len(artefact_dict[date]) > 0) else None 
            if dating:
                dating = dating if date_sep is None \
                    else [re.sub('\D', '', dating.split(date_sep)[0]) ,re.sub('\D', '', dating.split(date_sep)[1])] \
                    if date_sep is not None and date_sep in dating \
                    else [re.sub('\D', '', dating)] if date_sep is not None and date_sep not in dating \
                    else None 
            art_technique = artefact_dict[technique] if (technique and len(artefact_dict[technique]) > 0) else None
            art_dimensions = artefact_dict[dimensions] if (dimensions and len(artefact_dict[dimensions]) > 0) else None
            desc = artefact_dict[description] if (description and len(artefact_dict[description]) > 0) else None 
            source = artefact_dict[web_page] if (web_page and len(artefact_dict[web_page]) > 0) else None
            imgs = artefact_dict[images] if (images and len(artefact_dict[images]) >0 ) else None
            posts_urls = artefact_dict[instagram] if (instagram in artefact_dict and len(artefact_dict[instagram]) >0 ) else None
            twitter_urls = artefact_dict[twitter] if (twitter in artefact_dict and len(artefact_dict[twitter]) >0 ) else None
            artists = ['unknown'] if (artist is None or ( artist is not None and artist in artefact_dict and len(artefact_dict[artist]) == 0 ) ) \
                    else [ art.strip() for art in artefact_dict[artist].split(artist_sep) ] if artist_sep \
                    else [ artefact_dict[artist] ] if (artist in artefact_dict and artist_sep is None) \
                    else None
            emotions = artefact_dict[curator_emotions] if (curator_emotions and len(artefact_dict[curator_emotions]) > 0) else None
            events = artefact_dict[depicted_event] if (depicted_event and len(artefact_dict[depicted_event]) > 0) else None
            characters = artefact_dict[depicted_characters] if (depicted_characters and len(artefact_dict[depicted_characters]) > 0) else None
          
            objects = artefact_dict[depicted_objects] if (depicted_objects and len(artefact_dict[depicted_objects]) > 0) else None
            
            ##############  
            # add triples
            ############## 
            
            # ArCO and Schema.org
            g.add(( URIRef(artefact), RDF.type , ARCO.CulturalProperty ))
            g.add(( URIRef(artefact), ARCO.hasRelatedAgency , URIRef(institute) ))   
            g.add(( URIRef(institute), RDFS.label , Literal(inst_name) ))
            
            g.add(( URIRef(artefact), ARCOCD.isMemberOfCollectionOf , URIRef(institute+'/collection_membership') ))
            g.add(( URIRef(institute+'/collection_membership'), RDF.type , ARCOCD.CollectionMembership ))
            g.add(( URIRef(institute+'/collection_membership'), ARCOCD.hasCollection , URIRef(institute+'/collection') ))
            g.add(( URIRef(institute+'/collection_membership'), ARCOCD.hasMemberOfCollection , URIRef(artefact) ))
            g.add(( URIRef(institute+'/collection'), RDF.type , ARCO.CulturalPropertyCollection ))
            g.add(( URIRef(institute+'/collection'), ARCO.hasRelatedAgency , URIRef(institute) ))   
            
            
            g.add(( URIRef(artefact), RDF.type , schema.CreativeWork ))
            g.add(( URIRef(institute), schema.owns , URIRef(artefact) ))
            
            if inventory_num:
                g.add(( URIRef(artefact), ARCOCD.hasInventory, Literal( artefact_dict[inventory_num] ) ))
            if artwork_id:
                g.add(( URIRef(artefact), ARCO.uniqueIdentifier, Literal( artefact_dict[artwork_id] ) ))
                g.add(( URIRef(artefact), schema.identifier, Literal( artefact_dict[artwork_id] ) ))
            if art_type:
                typ = art_type.replace(' ','_').replace(',','_').lower()
                g.add(( URIRef(artefact), ARCODD.hasCulturalPropertyType, URIRef( spice+'type/'+typ ) ))
                g.add(( URIRef( spice+'type/'+typ ), RDF.type, ARCODD.CulturalPropertyType ))
                g.add(( URIRef(artefact), schema.genre, URIRef( spice+'type/'+typ ) ))
                g.add(( URIRef( spice+'type/'+typ ), RDFS.label, Literal(art_type) ))
            if art_title:  
                g.add(( URIRef(artefact), ARCOCD.title, Literal(art_title) )) 
                g.add(( URIRef(artefact), schema.headline, Literal(art_title) )) 
            if dating:
                g.add(( URIRef(artefact), ARCOCD.hasDating, URIRef(artefact+'/dating') )) 
                g.add(( URIRef(artefact+'/dating'), ARCOCD.hasDatingEvent, URIRef(artefact+'/dating_event') ))
                g.add(( URIRef(artefact+'/dating_event'), ARCO.startTime , Literal(dating[0]+'-01-01T00:00:00Z',datatype=XSD.dateTime) )) 
                g.add(( URIRef(artefact), schema.dateCreated , Literal(dating[0]+'-01-01T00:00:00Z',datatype=XSD.dateTime) ))
                if len(dating) == 2:
                    g.add(( URIRef(artefact+'/dating_event'), ARCO.endTime , Literal(dating[1]+'-01-01T00:00:00Z',datatype=XSD.dateTime) ))         
                else:
                    g.add(( URIRef(artefact+'/dating_event'), ARCO.endTime , Literal(dating[0]+'-01-01T00:00:00Z',datatype=XSD.dateTime) )) 
                 
            if art_technique:
                tec = art_technique.replace(' ','_').replace(',','_').lower()
                g.add(( URIRef(artefact), ARCODD.hasMaterialOrTechnique, URIRef(spice+'material_technique/'+tec ) )) 
                g.add(( URIRef(artefact), schema.material, URIRef(spice+'material_technique/'+tec) )) 
                g.add(( URIRef(spice+'material_technique/'+tec), RDF.type, ARCODD.TechnicalCharacteristic ))
                
                g.add(( URIRef(spice+'material_technique/'+tec), RDFS.label, Literal(art_technique) ))
            if art_dimensions:
                g.add(( URIRef(artefact), ARCODD.hasMeasurementCollection, URIRef(artefact+'/measurement_collection') ))
                g.add(( URIRef(artefact+'/measurement_collection'), RDF.type, ARCODD.MeasurementCollection ))
                g.add(( URIRef(artefact+'/measurement_collection'), ARCODD.hasMeasurement, URIRef(artefact+'/measurement') ))
                g.add(( URIRef(artefact+'/measurement'), RDF.type, ARCODD.Measurement ))
                g.add(( URIRef(artefact+'/measurement') , ARCODD.hasMeasurementType, URIRef(height_length) ))
                g.add(( URIRef(artefact+'/measurement') , ARCODD.hasValue, URIRef(artefact+'/measurement_value') ))
                g.add(( URIRef(artefact+'/measurement_value') , RDF.value, Literal(art_dimensions) ))
                g.add(( URIRef(artefact), schema.size, Literal(art_dimensions) ))
            if desc:
                g.add(( URIRef(artefact), ARCOCORE.description, Literal(desc) ))
                g.add(( URIRef(artefact), schema.abstract, Literal(desc) ))
            if source:
                g.add(( URIRef(artefact), DC.source, URIRef(source) )) 
                g.add(( URIRef(artefact), schema.mainEntityOfPage, URIRef(source) ))
            if imgs:
                if isinstance(imgs,list):
                    i=0
                    for img in imgs:
                        i+=1
                        g.add(( URIRef(artefact), ARCOCD.hasDocumentation, URIRef(artefact+'/img_'+str(i)) ))
                        g.add(( URIRef(artefact+'/img_'+str(i)), RDF.type, ARCOCD.PhotographicDocumentation )) 
                        g.add(( URIRef(artefact+'/img_'+str(i)), ARCOCD.url, URIRef(img) ))
                        g.add(( URIRef(artefact+'/img_'+str(i)), schema.image, URIRef(img) ))
                if isinstance(imgs,str):
                    g.add(( URIRef(artefact), ARCOCD.hasDocumentation, URIRef(artefact+'/img_1') ))
                    g.add(( URIRef(artefact+'/img_1'), RDF.type, ARCOCD.PhotographicDocumentation )) 
                    g.add(( URIRef(artefact+'/img_1'), ARCOCD.url, URIRef(imgs) ))
                    g.add(( URIRef(artefact+'/img_1'), schema.image, URIRef(imgs) ))
  
            if artists:
                n=0
                for artista in artists:
                    n+=1
                    artist_uri = spice+'agent/unknown' if artista == 'unknown' else spice+'agent/'+unique_id(artista)
                    
                    g.add(( URIRef(artefact), ARCOCD.hasAuthorshipAttribution, URIRef(artefact+'/attribution_'+str(n)) ))
                    g.add(( URIRef(artefact+'/attribution_'+str(n)), RDF.type, ARCOCD.AuthorshipAttribution ))
                    g.add(( URIRef(artefact+'/attribution_'+str(n)), ARCOCD.hasAttributedAuthor, URIRef(artist_uri) ))
                    g.add(( URIRef(artist_uri), RDFS.label, Literal(artista) ))
                    g.add(( URIRef(artefact), schema.creator, URIRef(artist_uri) ))
            
            # Social Media
            if posts_urls:
                for post in posts_urls:
                    g.add(( URIRef(artefact), schema.subjectOf, URIRef(post ) ))
                    g.add(( URIRef(post), RDF.type, schema.SocialMediaPosting ))
            
            if twitter_urls:
                for post in twitter_urls:
                    g.add(( URIRef(artefact), schema.subjectOf, URIRef(post ) ))
                    g.add(( URIRef(post), RDF.type, schema.SocialMediaPosting ))
                    
            # SON ontologies  
            if emotions:     
                g.add(( URIRef(artefact+'/curator_emotion_relation'), RDF.type, EMOTION.EmotionRelation ))
                g.add(( URIRef(artefact+'/curator_emotion_relation'), EMOTION.experiencer, URIRef(artefact+'/curator') ))
                g.add(( URIRef(artefact+'/curator_emotion_relation'), EMOTION.stimulus, URIRef(artefact) ))
                
                clean_emotions = [emo for emo in emotions.replace('.','').split(',')] 
                i = 0
                for emo in clean_emotions:
                    i += 1
                    clean_emo = emo.strip().lower().replace(' ','_')
                    
                    g.add(( URIRef(artefact), ARCOCD.hasSubject , URIRef(artefact+'/curator_emotion_stmt/'+clean_emo) ))
                    g.add(( URIRef(artefact+'/curator_emotion_stmt/'+clean_emo), RDF.type, earmark.StringDocuverse ))
                    g.add(( URIRef(artefact+'/curator_emotion_stmt/'+clean_emo), earmark.hasContent, Literal(emo.strip()) ))
                    
                    # CALL CELI emotion annotation APIs
                    if os.path.isfile('emotions/curator_'+art_id+'_'+str(i)+'.json'):
                        print("it exists: ",'curator_'+art_id+'_'+str(i)+'.json')
                        with open('emotions/curator_'+art_id+'_'+str(i)+'.json') as f:
                            annotations_json = json.load(f)
                    #else:
                    #    annotations_json = get_emotion(emo, 'curator_'+art_id+'_'+str(i)) 
                    if annotations_json and len(annotations_json["@graph"])> 1:
                        for ann in annotations_json["@graph"]:
                            j = 0
                            if "@type" in ann \
                                and ann["@type"] == "earmark:PointerRange" \
                                and "semiotics:denotes" in ann \
                                and "@type" in ann["semiotics:denotes"] \
                                and "emotion:" in ann["semiotics:denotes"]["@type"]:
                                cur_emotion = ann["semiotics:denotes"]["@id"][3:]
                                cur_emotion_type = ann["semiotics:denotes"]["@type"].split(":")[1]
                                j += 1
                                g.add(( URIRef(artefact+'/curator_emotion_stmt/'+clean_emo+'/pointer_range_'+str(j)), earmark.refersTo , URIRef(artefact+'/curator_emotion_stmt/'+clean_emo) ))
                                g.add(( URIRef(artefact+'/curator_emotion_stmt/'+clean_emo+'/pointer_range_'+str(j)), semiotics.denotes , URIRef(artefact+'/curator_emotion_stmt/'+clean_emo+'/pointer_range_'+str(j)+'/'+cur_emotion) ))
                                g.add(( URIRef(artefact+'/curator_emotion_stmt/'+clean_emo+'/pointer_range_'+str(j)+'/'+cur_emotion), RDF.type , URIRef(EMOTION+cur_emotion_type) ))
                                g.add(( URIRef(artefact+'/curator_emotion_relation'), EMOTION.emotion, URIRef(artefact+'/curator_emotion_stmt/'+clean_emo+'/pointer_range_'+str(j)+'/'+cur_emotion) ))
                                g.add(( URIRef(artefact), EMOTION.triggers, URIRef(artefact+'/curator_emotion_stmt/'+clean_emo+'/pointer_range_'+str(j)+'/'+cur_emotion) ))             
                
                # add external information from GAMgame
                if prefix == 'gam':
                    try:
                        emotions = pd.read_csv("GAM_game/emozioni.csv", engine='python', index_col=False, skipinitialspace=True, quotechar="'", sep=",")
                        emotions.head()
                        ems = emotions.loc[emotions['id_opera'] == int(art_id)]['tag']
                        k = 0
                        for index,em in ems.items():
                            k += 1
                            clean_emo = em.strip().lower().replace(' ','_')
                            g.add(( URIRef(artefact), ARCOCD.hasSubject , URIRef(artefact+'/curator_ann_emotion_stmt/'+clean_emo) ))
                            #g.add(( URIRef(artefact+'/curator_emotion_stmt/'+em), RDFS.seeAlso, URIRef("http://vocab.getty.edu/aat/300055150") ))
                            g.add(( URIRef(artefact+'/curator_ann_emotion_stmt/'+clean_emo), RDF.type, earmark.StringDocuverse ))
                            g.add(( URIRef(artefact+'/curator_ann_emotion_stmt/'+clean_emo), earmark.hasContent, Literal(em) ))
                            
                            # CALL CELI emotion annotation APIs
                            if os.path.isfile('emotions/curator_ann_'+art_id+'_'+str(i)+'.json'):
                                print("it exists: ",'curator_ann_'+art_id+'_'+str(i)+'.json')
                                with open('emotions/curator_'+art_id+'_'+str(i)+'.json') as f:
                                    annotations_json = json.load(f)
                            #else:
                            #    annotations_json = get_emotion(em, 'curator_ann_'+art_id+'_'+str(k)) 
                            if annotations_json and len(annotations_json["@graph"])> 1:
                                for ann in annotations_json["@graph"]:
                                    l = 0
                                    if "@type" in ann \
                                        and ann["@type"] == "earmark:PointerRange" \
                                        and "semiotics:denotes" in ann \
                                        and "@type" in ann["semiotics:denotes"] \
                                        and "emotion:" in ann["semiotics:denotes"]["@type"]:
                                        cur_emotion = ann["semiotics:denotes"]["@id"][3:]
                                        cur_emotion_type = ann["semiotics:denotes"]["@type"].split(":")[1]
                                        l += 1
                                        g.add(( URIRef(artefact+'/curator_ann_emotion_stmt/'+clean_emo+'/pointer_range_'+str(l)), earmark.refersTo , URIRef(artefact+'/curator_ann_emotion_stmt/'+clean_emo) ))
                                        g.add(( URIRef(artefact+'/curator_ann_emotion_stmt/'+clean_emo+'/pointer_range_'+str(l)), semiotics.denotes , URIRef(artefact+'/curator_ann_emotion_stmt/'+clean_emo+'/pointer_range_'+str(l)+'/'+cur_emotion) ))
                                        g.add(( URIRef(artefact+'/curator_ann_emotion_stmt/'+clean_emo+'/pointer_range_'+str(l)+'/'+cur_emotion), RDF.type , URIRef(EMOTION+cur_emotion_type) ))
                                        g.add(( URIRef(artefact+'/curator_emotion_relation'), EMOTION.emotion, URIRef(artefact+'/curator_ann_emotion_stmt/'+clean_emo+'/pointer_range_'+str(l)+'/'+cur_emotion) ))
                                        g.add(( URIRef(artefact), EMOTION.triggers, URIRef(artefact+'/curator_ann_emotion_stmt/'+clean_emo+'/pointer_range_'+str(l)+'/'+cur_emotion) ))             
                
                           
                    except Exception as e:
                        pass 
             
            if events:
                clean_events = [ev for ev in events.split(',')] 
                for ev in clean_events:
                    if len(ev) >= 55: # curators describe the scene rather than listing events / actions
                        i = 1
                        g.add(( URIRef(artefact), NARRATIVE.hasRepresentedAction , URIRef(artefact+'/depicted_action/'+str(i) ) ))
                        g.add(( URIRef(artefact+'/depicted_action/'+str(i)), RDFS.label , Literal(events.strip()) ))
                        # Getty AAT event
                        #g.add(( URIRef(artefact+'/depicted_action/'+str(i)), RDFS.seeAlso , URIRef("http://vocab.getty.edu/aat/300054722") ))
                    else:
                        i = ev.lower().strip().replace(' ','_')
                        g.add(( URIRef(artefact), NARRATIVE.hasRepresentedAction , URIRef(artefact+'/depicted_action/'+str(i) ) ))
                        g.add(( URIRef(artefact+'/depicted_action/'+str(i)), RDFS.label , Literal(ev.strip()) ))
                        # Getty AAT event
                        #g.add(( URIRef(artefact+'/depicted_action/'+str(i)), RDFS.seeAlso , URIRef("http://vocab.getty.edu/aat/300054722") ))
                
                # add external information from GAMgame
                if prefix == 'gam':
                    try:
                        narrations = pd.read_csv("GAM_game/narrazione.csv", engine='python', index_col=False, skipinitialspace=True, quotechar="'", sep=",")
                        nars = narrations.loc[(narrations['id_opera'] == int(art_id)) & (narrations['type'] == 'e')]['tag']
                       
                        for index,nar in nars.items():
                            nar_uri = nar.strip().lower().replace(' ','_')
                            g.add(( URIRef(artefact), NARRATIVE.hasRepresentedAction , URIRef(artefact+'/depicted_ann_action/'+nar_uri) ))
                            g.add(( URIRef(artefact+'/depicted_ann_action/'+nar_uri) , RDFS.label , Literal(nar) ))
                            # Getty AAT event
                            #g.add(( URIRef(artefact+'/depicted_action/'+nar_uri), RDFS.seeAlso , URIRef("http://vocab.getty.edu/aat/300054722") ))
                    except Exception as e:
                        pass     
            
            if characters:   
                clean_characters = [ev for ev in characters.split(',')] 
                for character in clean_characters:
                    if len(character) >= 55: # curators describe the scene rather than listing characters
                        i = 1
                        g.add(( URIRef(artefact), NARRATIVE.hasRepresentedEntity , URIRef(artefact+'/depicted_entity/'+str(i) ) ))
                        g.add(( URIRef(artefact+'/depicted_entity/'+str(i)), RDFS.label , Literal(characters.strip()) ))
                        g.add(( URIRef(artefact+'/depicted_entity/'+str(i)), RDF.type , NARRATIVE.NarrativeEntity ))
                    else:
                        i = character.lower().strip().replace(' ','_')
                        g.add(( URIRef(artefact), NARRATIVE.hasRepresentedEntity , URIRef(artefact+'/depicted_entity/'+str(i) ) ))
                        g.add(( URIRef(artefact+'/depicted_entity/'+str(i)), RDFS.label , Literal(character.strip()) ))
                        g.add(( URIRef(artefact+'/depicted_entity/'+str(i)), RDF.type , NARRATIVE.NarrativeEntity ))
                        
                # add external information from GAMgame
                if prefix == 'gam':
                    try:
                        narrations = pd.read_csv("GAM_game/narrazione.csv", engine='python', index_col=False, skipinitialspace=True, quotechar="'", sep=",")
                        nars = narrations.loc[(narrations['id_opera'] == int(art_id)) & (narrations['type'] == 'p')]['tag']
                        for index,nar in nars.items():
                            nar_uri = nar.strip().lower().replace(' ','_')
                            g.add(( URIRef(artefact), NARRATIVE.hasRepresentedEntity , URIRef(artefact+'/depicted_ann_entity/'+nar_uri) ))
                            g.add(( URIRef(artefact+'/depicted_ann_entity/'+nar_uri) , RDFS.label , Literal(nar) ))
                            g.add(( URIRef(artefact+'/depicted_ann_entity/'+nar_uri) , RDF.type , NARRATIVE.NarrativeEntity ))
                    except Exception as e:
                        pass 
            
            if objects: # curators never give a list of objects
                g.add(( URIRef(artefact) , NARRATIVE.hasRepresentedObject , URIRef(artefact+'/depicted_object/1') ))
                g.add(( URIRef(artefact+'/depicted_object/1') , RDFS.label , Literal(objects) ))
                g.add(( URIRef(artefact+'/depicted_object/1') , RDF.type , NARRATIVE.NarrativeObject ))
                
                # add external information from GAMgame
                if prefix == 'gam':
                    try:
                        narrations = pd.read_csv("GAM_game/narrazione.csv", engine='python', index_col=False, skipinitialspace=True, quotechar="'", sep=",")
                        nars = narrations.loc[(narrations['id_opera'] == int(art_id)) & (narrations['type'] == 'o')]['tag']
                        for index,nar in nars.items():
                            nar_uri = nar.strip().lower().replace(' ','_')
                            g.add(( URIRef(artefact), NARRATIVE.hasRepresentedObject , URIRef(artefact+'/depicted_ann_object/'+nar_uri) ))
                            g.add(( URIRef(artefact+'/depicted_ann_object/'+nar_uri) , RDFS.label , Literal(nar) ))
                            g.add(( URIRef(artefact+'/depicted_ann_object/'+nar_uri) , RDF.type , NARRATIVE.NarrativeObject ))
                    except Exception as e:
                        pass 
            
            if prefix == 'gam': # add gamgame sessions
                try:
                    sessions = pd.read_csv("GAM_game/scelte.csv", engine='python', index_col=False, skipinitialspace=True, quotechar="'", sep=",")
                    sessions_codes = sessions.loc[sessions['id_opera'] == int(art_id)]['codice_sessione']
                    for code in sessions_codes:
                        g.add(( URIRef(spice+'gam_session/'+code), FC.fruitedEntity , URIRef(artefact)  ))
                        g.add(( URIRef(spice+'gam_session/'+code), RDF.type , FC.FruitionContext  ))
                except Exception as e:
                    pass 
    if serialization_format == 'json-ld':
        g.serialize(destination=output_rdf_file+'.'+rdf_format(serialization_format), format=serialization_format, context=context_doc, encoding='utf-8')
    else:
        g.serialize(destination=output_rdf_file+'.'+rdf_format(serialization_format), format=serialization_format, encoding='utf-8')
        

## GAM catalogue to ArCO

In [7]:
# exec
artworks_to_rdf(input_json_file='GAM_test_catalogue.json', 
                    output_rdf_file='rdf_transform/GAM_test_catalogue', 
                    serialization_format='json-ld',
                    inst_name="Galleria Civica d'Arte Moderna e Contemporanea",
                    prefix='gam',
                    artist='Autore',
                    artist_sep=None,
                    artwork_id='ID',
                    artwork_type='Tipo',
                    artwork_title='Titolo',
                    inventory_num=None,
                    date='Datazione',
                    date_sep='-',
                    technique='Tecnica',
                    dimensions='Dimensioni',
                    description='Storia',
                    curator_emotions='Sensazione',
                    depicted_event='Evento',
                    depicted_characters='Personaggi',
                    web_page=None,
                    images='Immagine',
                    instagram='instagram',
                    twitter="twitter")

it exists:  curator_25_1.json
it exists:  curator_25_2.json
it exists:  curator_25_3.json
it exists:  curator_26_1.json
it exists:  curator_26_2.json
it exists:  curator_26_3.json
it exists:  curator_27_1.json
it exists:  curator_27_2.json
it exists:  curator_28_1.json
it exists:  curator_28_2.json
it exists:  curator_28_4.json
it exists:  curator_29_1.json
it exists:  curator_29_2.json
it exists:  curator_29_3.json
it exists:  curator_29_4.json
it exists:  curator_29_5.json
it exists:  curator_30_1.json
it exists:  curator_30_2.json
it exists:  curator_30_3.json
it exists:  curator_30_4.json
it exists:  curator_30_5.json
it exists:  curator_30_6.json
it exists:  curator_31_1.json
it exists:  curator_31_2.json
it exists:  curator_31_3.json
it exists:  curator_31_5.json
it exists:  curator_32_1.json
it exists:  curator_32_2.json
it exists:  curator_32_3.json
it exists:  curator_32_4.json
it exists:  curator_32_5.json
it exists:  curator_32_6.json
it exists:  curator_32_7.json
it exists: