In [1]:
# Init and import

In [2]:
import numpy as np
import pandas as pd

In [3]:
data_source = 'artwork_data.csv'
df = pd.read_csv(
    data_source, 
    index_col='id',
    usecols=['id', 'artist', 'title', 'medium', 'year', 'acquisitionYear', 'width', 'height', 'units']
)
print(df.shape)
print(df.head())

(69201, 8)
              artist                                              title  \
id                                                                        
1035   Blake, Robert  A Figure Bowing before a Seated Old Man with h...   
1036   Blake, Robert  Two Drawings of Frightened Figures, Probably f...   
1038   Blake, Robert     Six Drawings of Figures with Outstretched Arms   
1039  Blake, William  The Circle of the Lustful: Francesca da Rimini...   

                                                 medium  year  \
id                                                              
1035  Watercolour, ink, chalk and graphite on paper....   NaN   
1036                                  Graphite on paper   NaN   
1037        Graphite on paper. Verso: graphite on paper  1785   
1038                                  Graphite on paper   NaN   
1039                            Line engraving on paper  1826   

      acquisitionYear width height units  
id                                     

  interactivity=interactivity, compiler=compiler, result=result)


In [4]:
# Utility and filtering

In [5]:
uniques = pd.unique(df['artist'])
print(uniques)
print(len(uniques))

['Blake, Robert' 'Blake, William' 'Richmond, George' ... 'Sterne, Hedda'
 'P-Orridge, Genesis' 'Brunias, Agostino']
3336


In [6]:
occurrences = df['artist'].value_counts()
print(occurrences)
print(occurrences['Blake, William'])

Turner, Joseph Mallord William                                                                         39389
Jones, George                                                                                           1046
Moore, Henry, OM, CH                                                                                     623
Daniell, William                                                                                         612
Beuys, Joseph                                                                                            578
British (?) School                                                                                       388
Paolozzi, Sir Eduardo                                                                                    385
Flaxman, John                                                                                            287
Phillips, Esq Tom                                                                                        274
Warhol, Andy       

In [7]:
# Loc and iloc

In [8]:
bacon_paintings = df.loc[df['artist'] == 'Bacon, Francis']  # loc is by label (id)
print(bacon_paintings)

               artist                                              title  \
id                                                                         
672    Bacon, Francis                              Figure in a Landscape   
673    Bacon, Francis                                     Study of a Dog   
674    Bacon, Francis  Three Studies for Figures at the Base of a Cru...   
677    Bacon, Francis                Study for a Portrait of Van Gogh IV   
678    Bacon, Francis                                    Reclining Woman   
679    Bacon, Francis                                      Seated Figure   
680    Bacon, Francis                  Study for Portrait on Folding Bed   
681    Bacon, Francis                      Portrait of Isabel Rawsthorne   
682    Bacon, Francis                         Three Figures and Portrait   
683    Bacon, Francis  Study for Portrait II (after the Life Mask of ...   
684    Bacon, Francis                             Triptych - August 1972   
19509  Bacon

In [9]:
painting_titles = df.iloc[:, [1]]  # iloc is by row number (list-like)
print(painting_titles)

                                                    title
id                                                       
1035    A Figure Bowing before a Seated Old Man with h...
1036    Two Drawings of Frightened Figures, Probably f...
1038       Six Drawings of Figures with Outstretched Arms
1039    The Circle of the Lustful: Francesca da Rimini...
1040        Ciampolo the Barrator Tormented by the Devils
1041                          The Baffled Devils Fighting
1042    The Six-Footed Serpent Attacking Agnolo Brunel...
1043                   The Serpent Attacking Buoso Donati
1044                   The Pit of Disease: The Falsifiers
1045             Dante Striking against Bocca Degli Abati
1046                                   Job and his Family
1047                       Satan before the Throne of God
1048        Job’s Sons and Daughters Overwhelmed by Satan
1049           The Messengers tell Job of his Misfortunes
1050    Satan Going Forth from the Presence of the Lor...
1051          

In [10]:
# Filter on compound cols

In [11]:
df['width_clean'] = pd.to_numeric(df['width'], errors='coerce')
df.loc[:,'height_clean'] = pd.to_numeric(df['height'], errors='coerce')
print(df.head())
print(df['units'].value_counts())  # all in mm

              artist                                              title  \
id                                                                        
1035   Blake, Robert  A Figure Bowing before a Seated Old Man with h...   
1036   Blake, Robert  Two Drawings of Frightened Figures, Probably f...   
1038   Blake, Robert     Six Drawings of Figures with Outstretched Arms   
1039  Blake, William  The Circle of the Lustful: Francesca da Rimini...   

                                                 medium  year  \
id                                                              
1035  Watercolour, ink, chalk and graphite on paper....   NaN   
1036                                  Graphite on paper   NaN   
1037        Graphite on paper. Verso: graphite on paper  1785   
1038                                  Graphite on paper   NaN   
1039                            Line engraving on paper  1826   

      acquisitionYear width height units  width_clean  height_clean  
id                     

In [12]:
df = df.assign(area=df['width_clean'] * df['height_clean'])
print(df['area'].max())
print(df['area'].idxmax())
print(df.loc[df['area'].idxmax(), :])
print(df['area'].min())
print(df['area'].idxmin())
print(df.loc[df['area'].idxmin(), :])

132462000.0
98367
artist                               Therrien, Robert
title                No Title (Table and Four Chairs)
medium             Aluminium, steel, wood and plastic
year                                             2003
acquisitionYear                                  2008
width                                            8920
height                                          14850
units                                              mm
width_clean                                      8920
height_clean                                    14850
area                                      1.32462e+08
Name: 98367, dtype: object
237.0
9453
artist                                  Mesens, E.L.T.
title                                  Thème de Ballet
medium             Paper and knitting needles on paper
year                                              1960
acquisitionYear                                   1969
width                                                3
height              