# Get Data into a CSV File

If using your own dataset, you can edit this notebook in a way to produce to a .csv file with the same structure. In this notebook, we use data from The MET API, which is documented at https://metmuseum.github.io/.

In [None]:
import numpy as np
import cv2
import requests
import csv
import os

## Use The MET API to get the object IDs

The object IDs correspond to each of the art pieces in the dataset.

In [None]:
r = requests.get("https://collectionapi.metmuseum.org/public/collection/v1/objects")
r_json = r.json()
total = r_json['total']
print("There are {} valid objects in this dataset".format(total))
objectIDs = r_json['objectIDs']

In [None]:
# get the column headers from the first object ID
prefix = "https://collectionapi.metmuseum.org/public/collection/v1/objects/"
url = prefix + str(objectIDs[0])
r = requests.get(url)
col_headers = r.json().keys()
# create the csv file and write the first row
filename = '../main/data/metdata.csv'
# remove if it already exists
try:
    os.remove(filename)
except OSError:
    pass

print("The column headers are the following: {}".format(col_headers))

In [None]:
# make the data folder if needed
data_folder = os.path.dirname(filename)
if not os.path.exists(data_folder):
    os.makedirs(data_folder)

## Go through the objects and write lines in the CSV file

The columns of the CSV are labeled by the headers above.

In [None]:
# specify a limit on the the number of rows to write
# this can be changed
NUMBER_OF_OBJECTIDS_TO_MAKE = 500

In [None]:
# iterate through the object ids and write the the file
with open(filename, 'w') as csv_file:
    csv_writer = csv.writer(csv_file)
    csv_writer.writerow(col_headers)
    
    for index, ID in enumerate(objectIDs):
        url = prefix + str(ID)
        r = requests.get(url)
        values = r.json().values()
        csv_writer.writerow(values)
        # print out each of the rows by their index
        print(index)
        if index > NUMBER_OF_OBJECTIDS_TO_MAKE:
            break