**How to Query The Metropolitan Museum of Art's Public Domain Art Works
(BigQuery Dataset)**

In [1]:
import bq_helper
from bq_helper import BigQueryHelper
# https://www.kaggle.com/sohier/introduction-to-the-bq-helper-package
met = bq_helper.BigQueryHelper(active_project="bigquery-public-data",
                                   dataset_name="the_met")

In [2]:
bq_assistant = BigQueryHelper("bigquery-public-data", "the_met")
bq_assistant.list_tables()

['images', 'objects', 'vision_api_data']

In [3]:
bq_assistant.head("images", num_rows=15)

Unnamed: 0,object_id,public_caption,title,original_image_url,caption,is_oasc,gcs_url
0,435868,"Fig. 8. X-radiograph of The Met, 61.101.1",,http://images.metmuseum.org/CRDImages/ep/origi...,,False,gs://gcs-public-data--met/435868/8.jpg
1,634108,"Fig. 5. Ferdinand Hodler, sketch for ""The Drea...",,http://images.metmuseum.org/CRDImages/ep/origi...,,False,gs://gcs-public-data--met/634108/5.jpg
2,634108,"Fig. 2. Ferdinand Hodler, sketch for ""The Drea...",,http://images.metmuseum.org/CRDImages/ep/origi...,,False,gs://gcs-public-data--met/634108/2.jpg
3,634108,"Fig. 7. Ferdinand Hodler, sketch for ""The Drea...",,http://images.metmuseum.org/CRDImages/ep/origi...,,False,gs://gcs-public-data--met/634108/7.jpg
4,634108,"Fig. 6. Ferdinand Hodler, sketch for ""The Drea...",,http://images.metmuseum.org/CRDImages/ep/origi...,,False,gs://gcs-public-data--met/634108/6.jpg
5,634108,"Fig. 3. Ferdinand Hodler, sketch for ""The Drea...",,http://images.metmuseum.org/CRDImages/ep/origi...,,False,gs://gcs-public-data--met/634108/3.jpg
6,634108,"Fig. 4. Ferdinand Hodler, sketch for ""The Drea...",,http://images.metmuseum.org/CRDImages/ep/origi...,,False,gs://gcs-public-data--met/634108/4.jpg
7,435868,"Fig. 1. Paul Cézanne, ""The Card Players,"" ca. ...",,http://images.metmuseum.org/CRDImages/ep/origi...,,False,gs://gcs-public-data--met/435868/1.jpg
8,435868,"Fig. 7. Antoine Le Nain, ""The Little Card Play...",,http://images.metmuseum.org/CRDImages/ep/origi...,,False,gs://gcs-public-data--met/435868/7.jpg
9,435868,"Fig. 3. Paul Cézanne, ""The Card Players,"" ca. ...",,http://images.metmuseum.org/CRDImages/ep/origi...,,False,gs://gcs-public-data--met/435868/3.jpg


In [4]:
bq_assistant.table_schema("images")

[SchemaField('object_id', 'INTEGER', 'NULLABLE', None, ()),
 SchemaField('public_caption', 'STRING', 'NULLABLE', None, ()),
 SchemaField('title', 'STRING', 'NULLABLE', None, ()),
 SchemaField('original_image_url', 'STRING', 'NULLABLE', None, ()),
 SchemaField('caption', 'STRING', 'NULLABLE', None, ()),
 SchemaField('is_oasc', 'BOOLEAN', 'NULLABLE', None, ()),
 SchemaField('gcs_url', 'STRING', 'NULLABLE', None, ())]

What are the types of art by department?


In [5]:
query1 = """
SELECT department, COUNT(*) c 
FROM `bigquery-public-data.the_met.objects`
GROUP BY 1
ORDER BY c DESC;
        """
response1 = met.query_to_pandas_safe(query1)
response1.head(10)

Unnamed: 0,department,c
0,Drawings and Prints,43488
1,European Sculpture and Decorative Arts,30611
2,Asian Art,29844
3,Greek and Roman Art,12518
4,Egyptian Art,12278
5,Islamic Art,10435
6,American Decorative Arts,8942
7,Costume Institute,7824
8,Medieval Art,6838
9,Photographs,6583


![](https://cloud.google.com/blog/big-data/2017/08/images/150177792553261/met03.png)
https://cloud.google.com/blog/big-data/2017/08/images/150177792553261/met03.png

What are the top types of media used for the items in this collection?

In [6]:
query2 = """SELECT 
      LOWER(label) as medium, 
      COUNT(*) c 
FROM `bigquery-public-data.the_met.objects`, 
UNNEST(SPLIT(medium, ',')) label
GROUP BY 1
ORDER BY c DESC;
        """
response2 = met.query_to_pandas_safe(query2)
response2.head(10)

Unnamed: 0,medium,c
0,silk,10130
1,bronze,6023
2,glass,5764
3,etching,5539
4,terracotta,4809
5,silver,4705
6,gold,4617
7,engraving,3973
8,albumen photograph,3273
9,woodcut,3268


![](https://cloud.google.com/blog/big-data/2017/08/images/150177792553261/met04.png)
https://cloud.google.com/blog/big-data/2017/08/images/150177792553261/met04.png

What types of art were most popular during various different time periods?

In [7]:
query3 = """SELECT period, description, c FROM (
  SELECT 
a.period, 
b.description, 
count(*) c, 
row_number() over (partition by period order by count(*) desc) seqnum 
  FROM `bigquery-public-data.the_met.objects` a
  JOIN (
    SELECT 
        label.description as description, 
        object_id 
    FROM `bigquery-public-data.the_met.vision_api_data`, UNNEST(labelAnnotations) label
  ) b
  ON a.object_id = b.object_id
  WHERE a.period is not null
  group by 1,2
)
WHERE seqnum <= 3
AND c >= 10 # only include labels that have 10 or more pieces associated with it
AND description != "art"
ORDER BY period, c desc;
        """
response3 = met.query_to_pandas_safe(query3, max_gb_scanned=10)
response3.head(30)

Unnamed: 0,period,description,c
0,Achaemenid,material,48
1,Achaemenid,drawing,42
2,Akkadian,relief,36
3,Akkadian,stone carving,28
4,Akkadian,black and white,23
5,Angkor period,sculpture,46
6,Angkor period,statue,38
7,Archaic,sculpture,488
8,Archaic,ceramic,486
9,Archaic or Classical,ceramic,26


Where are the images in this dataset currently hosted?

In [8]:
query4 = """SELECT REGEXP_EXTRACT(page.url, '//([^/]*)/?') domain, COUNT(*) c
FROM `bigquery-public-data.the_met.vision_api_data`, 
UNNEST(webDetection.pagesWithMatchingImages) as page
GROUP BY 1
ORDER BY c DESC;
        """
response4 = met.query_to_pandas_safe(query4, max_gb_scanned=10)
response4.head(10)

Unnamed: 0,domain,c
0,www.pinterest.com,119430
1,www.metmuseum.org,70717
2,www.pinterest.se,18421
3,metmuseum.org,10773
4,archive.org,7585
5,www.alamy.com,5104
6,ukiyo-e.org,5003
7,commons.wikimedia.org,3967
8,holstshop.ru,3500
9,it.pinterest.com,3339


Can you sort images by color?

In [9]:
query5 = """SELECT 
color.color.red as r, 
color.color.green as g, 
color.color.blue as b,
concat("https://storage.cloud.google.com/gcs-public-data--met/", 
cast(object_id as string), "/0.jpg") as img_url 
FROM `bigquery-public-data.the_met.vision_api_data`, 
UNNEST(imagePropertiesAnnotation.dominantColors.colors) color
WHERE color.color.red < 0x64
AND (color.color.green > 0x96 or color.color.green < 0xC8)
AND color.color.blue > 0xC8;
        """
response5 = met.query_to_pandas_safe(query5, max_gb_scanned=10)
response5.head(10)

Unnamed: 0,r,g,b,img_url
0,26,163,202,https://storage.cloud.google.com/gcs-public-da...
1,22,123,207,https://storage.cloud.google.com/gcs-public-da...
2,53,160,214,https://storage.cloud.google.com/gcs-public-da...
3,15,112,207,https://storage.cloud.google.com/gcs-public-da...
4,73,172,227,https://storage.cloud.google.com/gcs-public-da...
5,86,85,205,https://storage.cloud.google.com/gcs-public-da...
6,57,161,205,https://storage.cloud.google.com/gcs-public-da...
7,75,163,201,https://storage.cloud.google.com/gcs-public-da...
8,74,140,234,https://storage.cloud.google.com/gcs-public-da...
9,81,125,213,https://storage.cloud.google.com/gcs-public-da...


Which images contain famous landmarks?


In [10]:
query6 = """SELECT b.object_id, b.original_image_url, landmark.description, landmark.mid, landmark.score 
FROM `bigquery-public-data.the_met.vision_api_data` a, 
UNNEST(landmarkAnnotations) landmark
JOIN (
  SELECT object_id, original_image_url, gcs_url 
  FROM `bigquery-public-data.the_met.images` 
) b
ON a.object_id = b.object_id
AND ends_with(lower(b.gcs_url), '/0.jpg')
ORDER BY score DESC;
        """
response6 = met.query_to_pandas_safe(query6, max_gb_scanned=10)
response6.head(20)

Unnamed: 0,object_id,original_image_url,description,mid,score
0,285715,http://images.metmuseum.org/CRDImages/ph/origi...,Yosemite National Park,/m/0c_y8,0.980163
1,436528,http://images.metmuseum.org/CRDImages/ep/origi...,Met Art,/g/12629mj33,0.965158
2,436530,http://images.metmuseum.org/CRDImages/ep/origi...,Met Art,/g/12629mj33,0.962197
3,199313,http://images.metmuseum.org/CRDImages/es/origi...,Château de Malmaison,/m/0jx8l,0.958862
4,437788,http://images.metmuseum.org/CRDImages/ep/origi...,Metropolitan Museum of Art,/m/09c7b,0.95775
5,438817,http://images.metmuseum.org/CRDImages/ep/origi...,New York City,/m/02_286,0.954218
6,436528,http://images.metmuseum.org/CRDImages/ep/origi...,Metropolitan Museum of Art,/m/09c7b,0.954066
7,436529,http://images.metmuseum.org/CRDImages/ep/origi...,Met Art,/g/12629mj33,0.942639
8,436532,http://images.metmuseum.org/CRDImages/ep/origi...,Metropolitan Museum of Art,/m/09c7b,0.939375
9,340029,http://images.metmuseum.org/CRDImages/dp/origi...,Munch Museum,/m/017n7v,0.935057
