# Linked Art - Transform - John Ruskin 

This notebook is transforms Collections data on John Ruskin artworks to Linked Art, in preparation for a data visualisation. 

## Import What We Need

See the [Transform Introduction](01-00-Transform-Intro.ipynb) for a description of the Python libraries used

In [1]:
try:
    import ipywidgets as widgets
except:
    !pip install ipywidgets
    import ipywidgets as widgets

from ipywidgets import Layout
from ipywidgets import FileUpload

import os


try:
    import IPython
except:
    !pip install IPython
    import IPython   
    
from IPython.display import display, IFrame, HTML

try:
    import xmltodict
except:
    !pip install xmltodict
    import xmltodict

try:
    import json
except:
    !pip install json
    import json 
    
    
try:
    import requests
except:
    !pip install requests
    import requests

import csv

try:
    import cromulent 
except:
    !pip install cromulent
    import cromulent
    
from cromulent.model import factory
from lib import linkedart as la


try:
    import pandas as pd
except:
    !pip install pandas
    import pandas as pd


In [2]:
outputdir = "./data/ruskin/output/json/"

images = {}

### National Gallery of Art

- [data file](./data/nga/input/nga_ruskin.csv)

In [3]:

#  baseURI for JSON-LD document
baseURI = "https://www.nga.gov/collection/"
file = 'data/nga/input/nga_ruskin.csv'
mpg = pd.read_csv(file)
mpg.head()

Unnamed: 0,objectid,accessioned,accessionnum,locationid,title,displaydate,beginyear,endyear,visualbrowsertimespan,medium,...,visualbrowserclassification,parentid,isvirtual,departmentabbr,portfolio,series,volume,watermarks,lastdetectedmodification,customprinturl
0,70238,1,1987.73.2,,Tower of the Cathedral at Sens,c. 1845,1845,1845,1826 to 1850,"pen and brown ink, brush and black ink, black ...",...,drawing,,0,CG-E,,,,,2019-10-28 22:01:34.883-04,
1,70367,1,1988.20.38,,Tree Study,mid-1850s,1845,1855,1826 to 1850,pen and black ink with blue-gray and gray wash...,...,drawing,,0,CG-E,,,,,2020-04-10 22:01:40.093-04,
2,72870,1,1991.88.1,,The Garden of San Miniato near Florence,1845,1845,1845,1826 to 1850,"watercolor and pen and black ink, heightened w...",...,drawing,,0,CG-E,,,,,2019-10-28 22:01:34.883-04,
3,76140,1,1995.52.158,,"Ornamental Study with Acanthus Motif for ""The ...",1849,1849,1849,1826 to 1850,pen and brown ink with watercolor and graphite...,...,drawing,,0,CG-E,,,,,2019-10-28 22:01:34.883-04,


In [6]:
file_images = "https://raw.githubusercontent.com/NationalGalleryOfArt/opendata/main/data/published_images.csv"

df_images = pd.read_csv(file_images)

df_images.head()

Unnamed: 0,uuid,iiifurl,iiifthumburl,viewtype,sequence,width,height,maxpixels,created,modified,depictstmsobjectid,assistivetext
0,00004dec-8300-4487-8d89-562d0126b6a1,https://api.nga.gov/iiif/00004dec-8300-4487-8d...,https://api.nga.gov/iiif/00004dec-8300-4487-8d...,primary,0.0,2623,4000,640.0,2010-09-07 15:08:48-04,2021-08-26 14:28:24-04,11975,
1,00007f61-4922-417b-8f27-893ea328206c,https://api.nga.gov/iiif/00007f61-4922-417b-8f...,https://api.nga.gov/iiif/00007f61-4922-417b-8f...,primary,0.0,3365,4332,,2013-07-05 15:41:08-04,2021-07-07 10:26:29-04,17387,
2,0000bd8c-39de-4453-b55d-5e28a9beed38,https://api.nga.gov/iiif/0000bd8c-39de-4453-b5...,https://api.nga.gov/iiif/0000bd8c-39de-4453-b5...,primary,0.0,3500,4688,,2013-08-05 14:31:59-04,2021-07-07 10:34:01-04,19245,
3,0000e5a4-7d32-4c2a-97c6-a6b571c9fd71,https://api.nga.gov/iiif/0000e5a4-7d32-4c2a-97...,https://api.nga.gov/iiif/0000e5a4-7d32-4c2a-97...,primary,0.0,2252,3000,,2013-03-18 14:39:55-04,2021-08-31 10:29:14-04,153987,
4,0001668a-dd1c-48e8-9267-b6d1697d43c8,https://api.nga.gov/iiif/0001668a-dd1c-48e8-92...,https://api.nga.gov/iiif/0001668a-dd1c-48e8-92...,primary,0.0,3446,4448,,2014-01-02 14:50:50-05,2021-07-07 11:09:58-04,23830,


In [15]:


#remove BOM
s = open(file, mode='r', encoding='utf-8-sig').read()
open(file, mode='w', encoding='utf-8').write(s)

mapp_nga =  {
    "id":"objectid",
    "accession_number":"accessionnum",
    "accession_date": "",
    "classification" : "classification",
    "title": "title",
    "alt_title": "",
    "notes": "",
    "date_created":"displaydate",
    "date_created_earliest": "beginyear",
    "date_created_latest": "endyear",
    "created_period":"",
    "created_dynasty":"",
    "created_inscriptions":"",
    "created_notes": "",
    "created_provenance" : "",
    "creator":"attribution",
    "physical_medium": "medium",
    "physical_style": "",
    "physical_technique": "",
    "physical_description": "",
    "physical_dimensions": "dimensions",
    "created_provenance": "" ,
    "credit_line": "creditline",
    "collection" : "departmentabbr",
    "current_status" : "",
    "current_owner" : "",
    "homepage": ""
}

dfimg_list = df_images['depictstmsobjectid'].tolist()
dfimgurl_list = df_images['iiifurl'].tolist()

def createObjProp(obj,mapp):
    objProp = {}
    csv_keys = list(obj.keys())
    for key in csv_keys:
        for prop in mapp:
            if key == mapp[prop]:
                if prop == "creator":
                    objProp[prop] = [{"id": baseURI +"creatorid/" + obj[mapp["id"]] ,"name": obj[key],"role":"Artist"}]
                else:
                    objProp[prop] = obj[key]
    for image in images:
        if objProp["id"] in image:
            objProp["image_url"] = images[image]
    return objProp   

    
allObjects = csv.DictReader(open(file, mode='r',encoding='utf-8'))

for obj in allObjects:
    # create object property dictionary
    objProp = createObjProp(obj,mapp_nga)
    id = objProp["id"]

    if int(id) in dfimg_list:
        df_images_match = df_images.loc[df_images['depictstmsobjectid'] == int(id)]
        objProp["image_url"] = df_images_match.iloc[0]["iiifurl"] + "/full/!500,500/0/default.jpg"
        
        print(objProp["image_url"])
    objProp["homepage"] = "https://www.nga.gov/collection/art-object-page." + id + ".html"
    object_uri = baseURI + id
    
    # create obj description
    objLA = la.createObjDesc(objProp,la.objTypes,object_uri)

    # write to file 
    text_file = open(outputdir + objProp["id"] + ".json", "wt")
    n = text_file.write(factory.toString(objLA, compact=False))
    text_file.close()

https://api.nga.gov/iiif/49a6128c-8d5a-4b00-beb3-36b29b97c0a1/full/!500,500/0/default.jpg
https://api.nga.gov/iiif/9310c903-7099-4138-b380-d90af8b06703/full/!500,500/0/default.jpg
https://api.nga.gov/iiif/f6ef48d3-3512-4f46-ac5b-c221b3fa320e/full/!500,500/0/default.jpg
https://api.nga.gov/iiif/bc567179-9c1e-4493-b712-956cc4e6b00a/full/!500,500/0/default.jpg


### Cleveland Museum of Art 
http://www.clevelandart.org/
- Download CSV file from GitHub https://github.com/ClevelandMuseumArt/openaccess
- View file with OpenRefine https://openrefine.org/
- Create a text facet for the Creator field to identify artworks by John Ruskin
- Export this subset of records (1 record found)
- [data file](./data/cma/input/ruskin.csv) 


In [None]:
#  baseURI for JSON-LD document
baseURI = "https://clevelandart.org/art/"
file = './data/cma/input/ruskin.csv'
mpg = pd.read_csv(file)
mpg.head()

In [None]:

#remove BOM
s = open(file, mode='r', encoding='utf-8-sig').read()
open(file, mode='w', encoding='utf-8').write(s)

mapp_cma =  {
    "id":"id",
    "accession_number":"accession_number",
    "accession_date": "",
    "classification" : "type",
    "title": "title",
    "alt_title": "title_in_original_language",
    "notes": "tombstone",
    "date_created":"creation_date",
    "date_created_earliest": "creation_date_earliest",
    "date_created_latest": "creation_date_latest",
    "created_period":"culture",
    "created_dynasty":"",
    "created_inscriptions":"inscriptions",
    "created_notes": "fun_fact",
    "creator":"creator",
    "physical_medium": "Medium",
    "physical_style": "",
    "physical_technique": "technique",
    "physical_description": "",
    "physical_dimensions": "measurements",
    "created_provenance": "provenance" ,
    "credit_line": "creditline",
    "collection" : "department",
    "current_status" : "current_location",
    "current_owner": "",
    "creator": {"name": "John Ruskin"},
    "homepage": "url"
    
    
}

def createObjProp(obj,mapp):
    objProp = {}
    csv_keys = list(obj.keys())
    for key in csv_keys:
        for prop in mapp:
            if key == mapp[prop]:
                if prop == "creator":
                    objProp[prop] = [{"id": baseURI +"creatorid/" + obj[mapp["id"]] ,"name": obj[key],"role":"Artist"}]
                else:
                    objProp[prop] = obj[key]
    return objProp   

allObjects = csv.DictReader(open(file, mode='r',encoding='utf-8'))

for obj in allObjects:
    # create object property dictionary
    objProp = createObjProp(obj,mapp_cma)

    id = objProp["id"]
    object_uri = baseURI + id
    
    objProp["image_url"] = "https://clevelandart.org/art/" + id
    
    # create obj description
    objLA = la.createObjDesc(objProp,la.objTypes,object_uri)
    
    # write to file 
    text_file = open(outputdir + "/" + objProp["id"] + ".json", "wt")
    n = text_file.write(factory.toString(objLA, compact=False))
    text_file.close()

## Tate Museum

The Tate Museum has three items by John Ruskin in its Collection Online database. JSON format file are available from 

https://github.com/tategallery/collection

### N02726 

https://www.tate.org.uk/art/artworks/ruskin-an-olive-spray-and-two-leaf-outlines-n02726 

https://raw.githubusercontent.com/tategallery/collection/master/artworks/n/027/n02726-13031.json 

 

### N02972 

https://www.tate.org.uk/art/artworks/ruskin-the-north-west-angle-of-the-facade-of-st-marks-venice-n02972 

https://raw.githubusercontent.com/tategallery/collection/master/artworks/n/029/n02972-13032.json  

 
### N03507 

https://www.tate.org.uk/art/artworks/ruskin-view-of-bologna-n03507 

https://raw.githubusercontent.com/tategallery/collection/master/artworks/n/035/n03507-13033.json  

In [None]:
fileprefix = "./data/tate/"

files = ("n02726-13031.json", 
         "n02972-13032.json", 
         "n03507-13033.json")

baseURI = "https://www.tate.org.uk/art/artworks/"

mapp_tate =  {
    "id":"id",
    "accession_number":"acno",
    "accession_date": "acquisitionYear",
    "classification" : "classification",
    "title": "title",
    "alt_title": "",
    "notes": "",
    "date_created":"dateText",
    "date_created_earliest": "Dated",
    "date_created_latest": "Dated",
    "created_period":"",
    "created_dynasty":"",
    "created_inscriptions":"",
    "created_notes": "",
    "creator":"contributors",
    "physical_medium": "medium",
    "physical_style": "",
    "physical_technique": "",
    "physical_description": "",
    "physical_dimensions": "dimensions",
    "created_provenance": "" ,
    "credit_line": "creditLine",
    "collection" : "Department",
    "current_status" : "",
    "current_owner": "Tate Museum",
    "image_url":"thumbnailUrl",
    "homepage" : "url"
}


def createObjProp(obj,mapp):
    objProp = {}
    csv_keys = list(obj.keys())
    for key in csv_keys:
        for prop in mapp:
            if key == mapp[prop]:
                if prop == "creator":
                    objProp[prop] = [{"name": obj[key]}]
                else:
                    objProp[prop] = obj[key]
    return objProp   

for file in files:
    objProp = {}
    with open(fileprefix + file) as json_file:
        data = json.load(json_file)
        objProp =  {
            "id": data["id"],
            "accession_number":data["acno"],
            "accession_date": data["acquisitionYear"],
            "classification" : data["classification"],
            "title": data["title"],
            "alt_title": "",
            "notes": "",
            "date_created":data["dateText"],
            "date_created_earliest": "",
            "date_created_latest": "",
            "created_period":"",
            "created_dynasty":"",
            "created_inscriptions":"",
            "created_notes": "",
            "creator":[],
            "physical_medium": data["medium"],
            "physical_style": "",
            "physical_technique": "",
            "physical_description": "",
            "physical_dimensions": data["dimensions"],
            "created_provenance": "" ,
            "credit_line": data["creditLine"],
            "collection" : "",
            "current_status" : "",
            "current_owner": "Tate Museum",
            "image_url" : "thumbnailUrl",
            "homepage" : "url"
            }
        
        creator = data["contributors"]
        for contributor in creator:
            id = str(contributor["id"])
            name = str(contributor["fc"])
            objProp["creator"].append({"id":id,"name":name})
            
        objProp["current_owner"] = {"name":"Tate Museum",
                                "location":"London,England",
                                "type": "http://vocab.getty.edu/aat/300312281" ,
                                "type_label": ""}
        id = str(objProp["id"])
        object_uri = baseURI + str(id)
    
        # create obj description
        objLA = la.createObjDesc(objProp,la.objTypes,object_uri)
     
        # write to file 
        text_file = open(outputdir + "/" + id + ".json", "wt")
        n = text_file.write(factory.toString(objLA, compact=False))
        text_file.close()
        

## Harvard Art Museum

https://harvardartmuseums.org/collections/person/28419?person=28419


In [None]:
apidocs = "https://github.com/harvardartmuseums/api-docs"

keyfile = "data/ruskin/input/key.txt"
key = open(keyfile, mode='r', encoding='utf-8-sig').read()
uri = "https://api.harvardartmuseums.org/object?person=28419&apikey=" + key

for page in range(1, 11):
    response = requests.get(uri + "&page=" + str(page))
    json_data = response.json()
    text_file = open( "./data/harvard/input/" + str(page) + ".json", "wt")
    n = text_file.write(json.dumps(json_data, indent=2))
    text_file.close()
    

### mapping

In [None]:
import re

baseURI = "https://www.harvardartmuseums.org/collections/object/"

mapp_harvard =  {
    "id":"id",
    "accession_number":"objectnumber",
    "accession_date": "accessionyear",
    "classification" : "",
    "title": "title",
    "alt_title": "",
    "notes": "commentary",
    "date_created":"dated",
    "date_created_earliest": "",
    "date_created_latest": "",
    "created_period":"period",
    "created_dynasty":"century",
    "created_inscriptions":"",
    "created_notes": "",
    "creator":"people",
    "physical_medium": "medium",
    "physical_style": "",
    "physical_technique": "technique",
    "physical_description": "description",
    "physical_dimensions": "dimensions",
    "created_provenance": "provenance" ,
    "credit_line": "creditline",
    "collection" : "division",
    "classification": "classification",
    "current_status" : "",
    "current_owner": "Harvard Museum of Art",
    "image_url":"primaryimageurl",
    "homepage" : "url"
}

def createObjPropHarvard(obj,mapp):

    objProp = {}
    csv_keys = list(obj.keys())
    for key in csv_keys:
        for prop in mapp:
            if key == mapp[prop]:
                if key == "dated":
                    
                    # look for year in value 
                    years = re.findall('(\d{4})', obj[key])
                    if len(years) == 1:
                        #objProp["date"] = years[0]
                        objProp["date_created_earliest"] = years[0]
                        objProp["date_created_latest"] = years[0]
                    elif len(years) == 2:
                       # objProp["dated"] = str(years[0]) + "-" + str(years[1])
                        objProp["date_created_earliest"] = years[0]
                        objProp["date_created_latest"] = years[1]
                    else:
                        # use Ruskins birth/death years
                        objProp["date_created_earliest"] = 1819
                        objProp["date_created_latest"] = 1900
                        
    
                        
                        
                
                if prop == "creator":
                    objProp["creator"] = ""
                    for person in obj[key]:
                        
                        if "personid" in person and person["personid"] == 28419 and person["role"] == "Artist": 
                            objProp[prop] = [{
                                "id": str(person["personid"]),
                                "name": person["displayname"],
                                }]      
                else:
                    objProp[prop] = obj[key]
    objProp["current_owner"] = {"name":"Harvard Art Museum",
                                "location":"Cambridge, MA",
                                "type": "http://vocab.getty.edu/aat/300312281" ,
                                "type_label": ""}
    return objProp   

for x in range(1,11):
    
    with open("./data/harvard/input/" + str(x) + ".json") as json_file:
        data = json.load(json_file)
        for obj in data["records"]:
            #print(json.dumps(obj,indent=2))
            objProp=createObjPropHarvard(obj,mapp_harvard)
            if "image_url" in objProp and objProp["image_url"] != "":
                images["https://www.harvardartmuseums.org/collections/object/" + str(objProp["id"])] = objProp["image_url"]
            if objProp["creator"] != "":
                #print(json.dumps(objProp,indent=2))
                
                id = str(objProp["id"])
                object_uri = baseURI + id
    
                # create obj description
                objLA = la.createObjDesc(objProp,la.objTypes,object_uri)
                #print(json.dumps(objLA,indent=2))
                # write to file 
                text_file = open(outputdir + "/" + id + ".json", "wt")
                #print(factory.toString(objLA, compact=False))
                n = text_file.write(factory.toString(objLA, compact=False))
                text_file.close()
    
    
    



## Rijksmuseum

In [None]:
keyfile = "data/ruskin/input/keyrijks.txt"
key = open(keyfile, mode='r', encoding='utf-8-sig').read()
url = "https://www.rijksmuseum.nl/api/nl/collection?key=" + key + "&involvedMaker=John+Ruskin"

rijksdir = "data/rijks/json/"

baseURI = "http://www.rijksmuseum.nl/nl/collectie/"


response = requests.get(url)
data = response.json()

for artwork in data["artObjects"]:
    
    if artwork["hasImage"] == False:
        continue
    if artwork["principalOrFirstMaker"] != "John Ruskin":
        continue
    text_file = open( rijksdir + artwork["id"] + ".json", "wt")
    text_file.write(json.dumps(artwork, indent=2))
    text_file.close()
    
    
    
mapp = {
    
    "id":"id",
    "accession_number":"objectNumber",
    "classification" : "",
    "title": "title",
    "alt_title": "longTitle",
    "notes": "",
    "date_created":"",
    "date_created_earliest": "",
    "date_created_latest": "",
    "created_period":"",
    "created_dynasty":"",
    "created_inscriptions":"",
    "created_notes": "",
    "creator":"principalOrFirstMaker",
    "physical_medium": "",
    "physical_style": "",
    "physical_technique": "",
    "physical_description": "",
    "physical_dimensions": "",
    "created_provenance": "" ,
    "credit_line": "",
    "collection" : "",
    "classification": "",
    "current_status" : "",
    "current_owner": "",
    "image_url":"",
    "homepage" : ""
}

def createObjProp(obj,mapp):
    objProp = {}
    csv_keys = list(obj.keys())
    for key in csv_keys:
        for prop in mapp:
            if key == mapp[prop]:
                if prop == "creator":
                    objProp["creator"] = ""
                    for person in obj[key]:
                        
                        if "personid" in person and person["personid"] == 28419 and person["role"] == "Artist": 
                            objProp[prop] = [{
                                "id": str(person["personid"]),
                                "name": person["displayname"],
                                }]      
                else:
                    objProp[prop] = obj[key]
    objProp["current_owner"] = {"name":"Rijksmuseum",
                                "location":"Amsterdam, Netherlands",
                                "type": "http://vocab.getty.edu/aat/300312281" ,
                                "type_label": "Museum"}
    return objProp   


file_list=os.listdir(rijksdir)
for file in file_list:
    with open(rijksdir + file) as json_file:
        obj = json.load(json_file)
        objProp=createObjProp(obj,mapp)
        
        objProp["creator"] = obj["principalOrFirstMaker"]
        objProp["classification"] = "Painting"
        objProp["homepage"] = obj["links"]["web"]
        
        
        if objProp["creator"] != "":
           
            id = str(objProp["id"])
            
            object_uri = baseURI + id
            
            # create obj description
            objLA = la.createObjDesc(objProp,la.objTypes,object_uri)
            print(outputdir)
            # write to file 
            text_file = open(outputdir + id + ".json", "wt")
            n = text_file.write(factory.toString(objLA, compact=False))
            text_file.close()





In [None]:
file_list=os.listdir(outputdir)
   
for file in file_list:
    display(HTML("<a target='_new' href='" + outputdir + file +"'>" + file + "</a>"))
