#  Transforming Collections Data to Linked Art
## Harvard Art Museum

## Import What We Need for Notebook
See the [Transform Introduction](01-00-Transform-Intro.ipynb) for a description of the Python libraries used

In [1]:
try:
    import json
except:
    !pip install json
    import json 

try:
    import cromulent
except:
    !pip install cromulent
    import cromulent
    
from cromulent.model import factory
    
from lib import linkedart as la

import csv

try:
    import pandas as pd
except:
    !pip install pandas
    import pandas as pd
    
import requests

## Input Data
The notebook uses data from the Harvard Art Museum, using an API

In [2]:
apidocs = "https://github.com/harvardartmuseums/api-docs"

keyfile = "data/ruskin/input/key.txt"
key = open(keyfile, mode='r', encoding='utf-8-sig').read()
uri = "https://api.harvardartmuseums.org/object?person=28419&apikey=" + key

for page in range(1, 11):
    response = requests.get(uri + "&page=" + str(page))
    json_data = response.json()
   
    text_file = open( "./data/harvard/input/" + str(page) + ".json", "wt")
    n = text_file.write(json.dumps(json_data, indent=2))
    text_file.close()

In [3]:
baseURI = "https://www.harvardartmuseums.org/collections/object/"
outputdir = "data/harvard/output"

mapp_harvard =  {
    "id":"id",
    "accession_number":"objectnumber",
    "accession_date": "accessionyear",
    "classification" : "",
    "title": "title",
    "alt_title": "",
    "notes": "commentary",
    "date_created":"dated",
    "date_created_earliest": "",
    "date_created_latest": "",
    "created_period":"period",
    "created_dynasty":"century",
    "created_inscriptions":"",
    "created_notes": "",
    "creator":"people",
    "physical_medium": "medium",
    "physical_style": "",
    "physical_technique": "technique",
    "physical_description": "description",
    "physical_dimensions": "dimensions",
    "created_provenance": "provenance" ,
    "credit_line": "creditline",
    "collection" : "division",
    "classification": "classification",
    "current_status" : "",
    "current_owner": "Harvard Museum of Art",
    "image_url":"primaryimageurl",
    "homepage" : "url"
}

def createObjPropHarvard(obj,mapp):
    objProp = {}
    csv_keys = list(obj.keys())
    for key in csv_keys:
        for prop in mapp:
            if key == mapp[prop]:
                if prop == "creator":
                    objProp["creator"] = ""
                    for person in obj[key]:
                        
                        if "personid" in person and person["personid"] == 28419 and person["role"] == "Artist": 
                            objProp[prop] = [{
                                "id": str(person["personid"]),
                                "name": person["displayname"],
                                }]      
                else:
                    objProp[prop] = obj[key]
    objProp["current_owner"] = {"name":"Harvard Art Museum",
                                "location":"Cambridge, MA",
                                "type": "http://vocab.getty.edu/aat/300312281" ,
                                "type_label": ""}
    return objProp   

for x in range(1,11):
    
    with open("./data/harvard/input/" + str(x) + ".json") as json_file:
        data = json.load(json_file)
        for obj in data["records"]:
            objProp=createObjPropHarvard(obj,mapp_harvard)
            if objProp["creator"] != "":
                
                
                id = str(objProp["id"])
                object_uri = baseURI + id
    
                # create obj description
                objLA = la.createObjDesc(objProp,la.objTypes,object_uri)
               
                # write to file 
                text_file = open(outputdir + "/" + id + ".json", "wt")
                n = text_file.write(factory.toString(objLA, compact=False))
                text_file.close()
    

In [4]:
import os
from IPython.core.display import display, HTML


def fn():       # 1.Get file names from directory
    file_list=os.listdir(r"./data/harvard/output/")
   
    for file in file_list:
        display(HTML("<a target='_new' href='./data/harvard/output/" + file +"'>" + file + "</a>"))
    
fn()