- Playing around with V&A examples

In [4]:
from IPython.display import display

def Vega(spec):
    bundle = {}
    bundle['application/vnd.vega.v5+json'] = spec
    display(bundle, raw=True)

def treemap(clusters, cluster_name, colour = "blue"):
    
  clusters_json = [{"id": index+1, "name": [x["value"], "%d objects" % x["count"]], "parent": '0', "value": x["count"]} for index, x in enumerate(clusters)]
  clusters_json.insert(0, {"id": 0, "value": 0, "name": cluster_name})
    
  Vega({
  "$schema": "https://vega.github.io/schema/vega/v5.json",
  "description": "Hierarchical Data Layout",
  "width": 1200,
  "height": 800,
  "padding": 2.5,
  "autosize": "none",
  "data": [
    {
      "name": "tree",
      "values": clusters_json,
      "transform": [
        {
          "type": "stratify",
          "key": "id",
          "parentKey": "parent"
        },
        {
          "type": "treemap",
          "field": "value",
          "sort": {"field": "value", "order": "descending"},
          "round": True,
          "size": [{"signal": "width"}, {"signal": "height"}]
        }
      ]
    },
    {
      "name": "nodes",
      "source": "tree",
      "transform": [{ "type": "filter", "expr": "datum.children" }]
    },
    {
      "name": "leaves",
      "source": "tree",
      "transform": [{ "type": "filter", "expr": "datum.parent == 0" }]
    }
  ],

  "scales": [
    {
      "name": "color",
      "type": "ordinal",
      "domain": {"data": "nodes", "field": "value"},
      "range": [ colour ]
    },
    {
      "name": "size",
      "type": "ordinal",
      "domain": [0, 1, 2, 3],
      "range": [256, 10, 20, 14]
    },
    {
      "name": "opacity",
      "type": "ordinal",
      "domain": [0, 1, 2, 3],
      "range": [0.15, 0.5, 0.8, 1.0]
    }
  ],

  "marks": [
    {
      "type": "rect",
      "from": {"data": "nodes"},
      "interactive": False,
      "encode": {
        "enter": {
          "fill": {"scale": "color", "field": "value"}
        },
        "update": {
          "x": {"field": "x0"},
          "y": {"field": "y0"},
          "x2": {"field": "x1"},
          "y2": {"field": "y1"}
        }
      }
    },
    {
      "type": "rect",
      "from": {"data": "leaves"},
      "encode": {
        "enter": {
          "stroke": {"value": "#fff"}
        },
        "update": {
          "x": {"field": "x0"},
          "y": {"field": "y0"},
          "x2": {"field": "x1"},
          "y2": {"field": "y1"},
          "fill": {"value": "transparent"},
          "href": {"value": "https://collections.vam.ac.uk/"}
        },
        "hover": {
          "fill": {"value": "green"}
        }
      }
    },
    {
      "type": "text",
      "from": {"data": "leaves"},
      "interactive": False,
      "encode": {
        "enter": {
          "font": {"value": "Helvetica Neue, Arial"},
          "align": {"value": "center"},
          "baseline": {"value": "middle"},
          "fill": {"value": "#000"},
          "text": {"field": "name"},
          "fontSize": {"scale": "size", "field": "depth"},
          "fillOpacity": {"scale": "opacity", "field": "depth"}
        },
        "update": {
          "x": {"signal": "0.5 * (datum.x0 + datum.x1)"},
          "y": {"signal": "0.5 * (datum.y0 + datum.y1)"}
        }
      }
    }
  ]
}
)

import requests

req = requests.get('https://api.vam.ac.uk/v2/objects/clusters/material/search?kw_object_type=chair&cluster_size=25')
# Of course, in a real use case, error handling in case of no results should be added here instead of passing results directly to the treemap function
treemap(req.json(), "Materials", "#8bcf89")

In [5]:
import requests
import pandas as pd

object_df = pd.read_csv('https://api.vam.ac.uk/v2/objects/search?q=chair&page_size=5&response_format=csv')
object_df.head()

Unnamed: 0,accessionNumber,accessionYear,systemNumber,objectType,_primaryTitle,_primaryPlace,_primaryMaker__name,_primaryMaker__association,_primaryDate,_primaryImageId,_sampleMaterial,_sampleTechnique,_sampleStyle,_currentLocation__displayName,_objectContentWarning,_imageContentWarning
0,W.21-1990,1990,O48605,chair,Chippendale Chair with Grandmother pattern,New York,Venturi Scott Brown,designer,1984,2006AF3713,plywood,Cut-out,Post-Modern,In store,False,False
1,B.1-2013,2013,O1264771,Chair,Joan's chair,England,Ambrose Heal,designer,1901,2013GT3689,oak,inlay,Arts and Crafts (movement),In store,False,False
2,W.2-1995,1995,O72610,Chair,Ply Chair,Great Britain,"Morrison, Jasper",designer,1990,2006AU1797,plywood,sawing,New Functionalism,In store,False,False
3,W.28-2011,2011,O1243361,Chair,21400 mm chair,Tokyo (city),nendo,designer,2010,2015HX9943,steel,bent,,"Imagine Gallery, The Living Room, Case 3",False,False
4,W.2-2022,2022,O1681358,chair,RCP2 Chair,,"Atfield, Jane",Designer,1992,2022NC4231,polythene,,,"Design 1900 to Now, Room 76",False,False


Checking for a chair count in their collection

In [5]:
import requests

page_num = 1 

req = requests.get(f'https://api.vam.ac.uk/v2/objects/search?q=chair&kw_object_type=chair&page_size=10&page={page_num}')
object_data = req.json()
object_info = object_data["info"]
object_records = object_data["records"]
record_count = object_info["record_count"]
print(f"There are {record_count} object records that have the word 'chair' in the object type")
# print("The fifth object is called '%s' and has the type of '%s'" % (object_records[10]["_primaryTitle"], object_records[5]["objectType"]))
print(object_info)
print(object_info['pages'])


There are 972 object records that have the word 'chair' in the object type
{'version': '2.0', 'record_count': 972, 'record_count_exact': True, 'parameters': {}, 'page_size': 10, 'pages': 98, 'page': 1, 'image_count': 2507}
98


In [219]:
import json

print(json.dumps(object_data, indent=4))

{
    "info": {
        "version": "2.0",
        "record_count": 972,
        "record_count_exact": true,
        "parameters": {},
        "page_size": 10,
        "pages": 98,
        "page": 78,
        "image_count": 2507
    },
    "records": [
        {
            "systemNumber": "O370419",
            "accessionNumber": "W.70-1911",
            "objectType": "Chair",
            "_currentLocation": {
                "id": "THES50153",
                "displayName": "In store",
                "type": "storage",
                "site": "BH",
                "onDisplay": false,
                "detail": {
                    "free": "",
                    "case": "",
                    "shelf": "",
                    "box": ""
                }
            },
            "_primaryTitle": "",
            "_primaryMaker": {},
            "_primaryImageId": "2011FB0976",
            "_primaryDate": "1750-1800",
            "_primaryPlace": "",
            "_images": {
          

In [220]:
# loop through object record with the object parser
# pick up info and log it into the dictionary
# after all is looped and its all in the dictionary > print

import time

object_counter = 0
object_list = []

while object_counter <= 5:
    time.sleep(0.25)

    designer = object_records[object_counter]['_primaryMaker']
    title = object_records[object_counter]['_primaryTitle']
    date = object_records[object_counter]['_primaryDate']
    object_id = object_records[object_counter]['systemNumber']
    image_url = object_records[0]['_images']['_primary_thumbnail']

    # print(designer)
    # print(title)
    # print(date)
    # print(object_id)
    # print(image_url)

    # loop through each object in the object records
    # get id
    # request api url for each object
    # find materials and techniques

    object_record_url = f'https://api.vam.ac.uk/v2/object/{object_id}'

    obj_req = requests.get(object_record_url)
    obj_record_view = obj_req.json()
    material = obj_record_view['record']['materials']
    techniques = obj_record_view['record']['techniques']
    material_technique = obj_record_view['record']['materialsAndTechniques']

    object_dict = {
        'designer': designer,
        'title': title,
        'date': date,
        'object_id': object_id,
        'material': material,
        'techniques': techniques,
        'material_technique': material_technique,
        'image_url': image_url,
        'museum': 'v&a'
    }

    object_list.append(object_dict)

    object_counter += 1

print(len(object_list))
print(json.dumps(object_list, indent=4))



6
[
    {
        "designer": {},
        "title": "",
        "date": "1750-1800",
        "object_id": "O370419",
        "material": [],
        "techniques": [],
        "material_technique": "",
        "image_url": "https://framemark.vam.ac.uk/collections/2011FB0976/full/!100,100/0/default.jpg",
        "museum": "v&a"
    },
    {
        "designer": {},
        "title": "",
        "date": "ca. 1760",
        "object_id": "O372163",
        "material": [],
        "techniques": [],
        "material_technique": "",
        "image_url": "https://framemark.vam.ac.uk/collections/2011FB0976/full/!100,100/0/default.jpg",
        "museum": "v&a"
    },
    {
        "designer": {},
        "title": "",
        "date": "1710-1730",
        "object_id": "O118957",
        "material": [],
        "techniques": [],
        "material_technique": "",
        "image_url": "https://framemark.vam.ac.uk/collections/2011FB0976/full/!100,100/0/default.jpg",
        "museum": "v&a"
    },
    {
 

In [221]:
# Retrieve all entries through API and dump it in a dictionary

from random import randint

page_num = 1 
object_list = []

while page_num < 99:
    time.sleep(randint(0,20))
    req = requests.get(f'https://api.vam.ac.uk/v2/objects/search?q=chair&kw_object_type=chair&page_size=10&page={page_num}')

    if req.status_code == 429:
        print("429 error.")
        time.sleep(120)
    else:
        object_data = req.json()
        object_info = object_data["info"]
        object_records = object_data["records"]
        # print(object_info)



        object_counter = 0

        #object_data['info']['record_count']
        while object_counter <= 9: #CHANGE THIS AT THE END to 99
            rand_var = randint(200,450)
            time.sleep(rand_var/1000)

            designer = object_records[object_counter]['_primaryMaker']
            title = object_records[object_counter]['_primaryTitle']
            date = object_records[object_counter]['_primaryDate']
            object_id = object_records[object_counter]['systemNumber']
            # image_url = object_records[0]['_images']['_primary_thumbnail']

            # print(designer)
            # print(title)
            # print(date)
            # print(object_id)
            # print(image_url)

            object_record_url = f'https://api.vam.ac.uk/v2/object/{object_id}'

            obj_req = requests.get(object_record_url)
            obj_record_view = obj_req.json()
            material = obj_record_view['record']['materials']
            techniques = obj_record_view['record']['techniques']
            material_technique = obj_record_view['record']['materialsAndTechniques']


            object_dict = {
                'designer': designer if designer is not None else 'Null',
                'title': title if title is not None else 'Null',
                'date': date if date is not None else 'Null',
                'object_id': object_id if object_id is not None else 'Null',
                'material': material if material is not None else 'Null',
                'techniques': techniques if techniques is not None else 'Null',
                'material_technique': material_technique if material_technique is not None else 'Null',
                'museum': 'v&a'
            }
            
            object_list.append(object_dict)

            object_counter += 1

    page_num += 1
    print(len(object_list))

print(len(object_list))

# print(json.dumps(object_list, indent=4))

10
20
30
40
50
60
70
80
90
100
110
120
130
140
150
160
170
180
190
200
210
220
230
240
250
260
270
280
290
300
310
320
330
340
350
360
370
380
390
400
410
420
430
440
450
460
470
480
490
500
510
520
530
540
550
560
570
580
590
600
610
620
630
640
650
660
670
680
690
700
710
720
730
740
750
760
770
780
790
800
810
820
830
840
850
860
870
880
890
900
910
920
930
940
950
960
970


IndexError: list index out of range

In [223]:
# Check if i got everything in the dictionary

print(len(object_list))

972


In [225]:
#  Check last entry in the dictionary

print(object_list[971])

{'designer': {}, 'title': '', 'date': '1680-1700', 'object_id': 'O372248', 'material': [], 'techniques': [], 'material_technique': 'carved and turned walnut, caned seat', 'museum': 'v&a'}


In [226]:
# Save 972 entries into file

import json

with open('v&a_chairs.json', 'w') as file:
    file.write(json.dumps(object_list, indent=4))

* CLEANING UP THE DICTIONARY LOCALLY


In [2]:
# Open file

import json

with open('/Users/liwen/Documents/Github/Projects/PFCH-final/v&a_chairs.json','r') as file:
    chair_data = json.load(file)
    print(len(chair_data))

972


In [179]:
#  Cleaning and standardizing entries

import json
import re

with open('/Users/liwen/Documents/Github/Projects/PFCH-final/v&a_chairs.json','r') as file:
    chair_data = json.load(file)
    
chair_dict = []
# medium_list = []

for each_entry in chair_data:

    artist_entry = each_entry.get('designer')
    artist = artist_entry.get('name')

    if artist == None:
        artist = "unknown"
    else:
        if ", " in artist:
            # print(artist)
            split_list = artist.split()
            # print(split_list)
            first_name = split_list[1]
            last_name = split_list[0]
            reversed_name = f"{first_name} {last_name}".rstrip(",")
            # print(reversed_named)
            
            artist = reversed_name

    regex_name = re.sub(r'Unknown', 'Artist unknown', artist, flags=re.IGNORECASE)
    artist = regex_name

    # print(artist)

    medium_list = []
    medium_entry = each_entry.get("material")
    # print(medium_entry)
    for each_medium in medium_entry:
        
        medium = each_medium.get("text", "")
        # print(medium)
        medium_list.append(medium)
        # print(medium_list)

    date = each_entry.get('date')
    # print(date)

    #clean up everything but the centuries entries
    match = re.compile(r'(\d{4}).*\d*').search(date)
    if match:
        regex_date = match.group(1)
        date = regex_date
    # print(date)

    if "century" in date:
        date = ""
    # print(date)
        
    title = each_entry.get('title')
    medium = each_entry.get('medium_list')
    
   
    chair_entry = {
        "artist": artist,
        "date": date,
        "medium": medium_list,
        "title": title,
        "museum": "v&a"
    }

    if date != "":
        chair_dict.append(chair_entry)

print(len(chair_dict))
print(json.dumps(chair_dict, indent=4))

907
[
    {
        "artist": "Venturi Scott Brown",
        "date": "1984",
        "medium": [
            "plywood",
            "plastic laminate"
        ],
        "title": "Chippendale Chair with Grandmother pattern",
        "museum": "v&a"
    },
    {
        "artist": "Jasper Morrison",
        "date": "1990",
        "medium": [
            "plywood",
            "veneer"
        ],
        "title": "Ply Chair",
        "museum": "v&a"
    },
    {
        "artist": "Ambrose Heal",
        "date": "1901",
        "medium": [
            "oak"
        ],
        "title": "Joan's chair",
        "museum": "v&a"
    },
    {
        "artist": "nendo",
        "date": "2010",
        "medium": [
            "steel"
        ],
        "title": "21400 mm chair",
        "museum": "v&a"
    },
    {
        "artist": "Jane Atfield",
        "date": "1992",
        "medium": [
            "polythene",
            "metal"
        ],
        "title": "RCP2 Chair",
        "museum": "

In [180]:
# Save 907 entries into file

with open("/Users/liwen/Documents/Github/Projects/PFCH-final/cleaned_v&a.json", 'w') as file:
    all_chairs = json.dump(chair_dict, file, indent=4)