In [14]:
import json
import pandas as pd
import os

In [7]:
# Example usage of from_records method

records = [("Espresso", "5$"), ("Flat White", "10$")]

In [9]:
pd.DataFrame.from_records(records)

Unnamed: 0,0,1
0,Espresso,5$
1,Flat White,10$


In [10]:
pd.DataFrame.from_records(records, columns=["Coffee", "Price"])

Unnamed: 0,Coffee,Price
0,Espresso,5$
1,Flat White,10$


In [11]:
####

KEYS_TO_USE = ['id', 'all_artists', 'title', 'medium', 'acquisitionYear', 'height', 'width']

In [12]:
def get_record_from_file(file_path, keys_to_use):
    """Process single json file and a tuple containing specific fields."""
    
    with open(file_path) as artwork_file:
        content = json.load(artwork_file)
    
    record = []
    for field in keys_to_use:
        record.append(content[field])
    
    return tuple(record)

In [17]:
# Single file processing function demo
SAMPLE_JSON = os.path.join('artworks', 'a', '000', 'a00001-1035.json')

sample_record = get_record_from_file(SAMPLE_JSON, KEYS_TO_USE)

In [18]:
sample_record

(1035,
 'Robert Blake',
 'A Figure Bowing before a Seated Old Man with his Arm Outstretched in Benediction. Verso: Indecipherable Sketch',
 'Watercolour, ink, chalk and graphite on paper. Verso: graphite on paper',
 1922,
 '419',
 '394')

In [19]:
def read_artworks_from_json(keys_to_use):
    """Traverse the directories with JSON files.
    For first file in each directory call function for
    processing single file and go to the next directory.
    """
    
    JSON_ROOT = os.path.join('artworks')
    artworks = []
    
    for root, _, files in os.walk(JSON_ROOT):
        for f in files:
            if f.endswith('json'):
                record = get_record_from_file(os.path.join(root, f), keys_to_use)
                artworks.append(record)
            break
    
    df = pd.DataFrame.from_records(artworks, columns=keys_to_use, index='id')
    
    return df

In [20]:
df = read_artworks_from_json(KEYS_TO_USE)

In [21]:
df

Unnamed: 0_level_0,all_artists,title,medium,acquisitionYear,height,width
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
6620,William Hogarth,A Scene from ‘The Beggar’s Opera’ VI,Oil paint on canvas,1909,762,572
10413,William James Müller,"Study of Pines, Rheinwald",Watercolour on paper,1908,410,269
496,Sir Lawrence Alma-Tadema,A Silent Greeting,Oil paint on wood,1894,229,305
15803,Frederick Walker,The Vagrants,Oil paint on canvas,1886,1264,832
1810,"Sir Edward Coley Burne-Jones, Bt",Head and Hand,Graphite on paper,1932,165,225
...,...,...,...,...,...,...
80497,James Rosenquist,Sun Sets on the Time Zone,"Paper, acrylic paint, dye and lithograph on paper",2004,1473,2019
19078,Hamish Fulton,Seven Winds. Scotland 1985,Lithograph on paper,1990,829,1070
83640,Thomas Schütte,[no title],Screenprint on paper,2003,320,447
21713,Philip Guston,Untitled,Lithograph on paper,1996,765,570
