In [None]:
# Standard library imports
import json
import logging
import os
import time
from io import BytesIO
from urllib.parse import urljoin

# Third-party imports
import numpy as np
import pandas as pd
import requests
from dotenv import load_dotenv
from PIL import Image
from ratelimit import limits, sleep_and_retry
from tqdm import tqdm

### Accessing the Art Institute of Chicago's data dump
The Art Institute of Chicago has a downloadable data dump to search thru and find what you want to access
before making any API calls. I looked thru this to try and find that artworks I could use for classifcation. 

In [None]:
# Path JSONL file 
file_path = 'allArtworks.jsonl'

# Read the JSONL file
with open(file_path, 'r') as file:
    data = [json.loads(line) for line in file]

# Create a pandas DataFrame
datadump_df = pd.DataFrame(data)

print(datadump_df.info())

datadump_df.head()

In [None]:
datadump_df['department_title'].value_counts()

Unfortunately, the main dataframe does not contain Styles, however the data dump also comes with
the json for every piece of artwork. I load all into a dataframe and filter for the artwork this way. I can use the 'artwork_type_title'
to only get paintings or prints, then 'style_titles' and 'style_title' for the style of artwork. 

In [None]:
# Set the path to the directory containing JSON files
json_dir = r"C:\Users\16148\Downloads\artic-api-data\json\artworks"

# Initialize an empty list to store the data from each JSON file
data_list = []

# Iterate through all JSON files in the directory
for filename in tqdm(os.listdir(json_dir), desc="Loading files"):
    if filename.endswith(".json"):
        file_path = os.path.join(json_dir, filename)
        with open(file_path, 'r', encoding='utf-8') as file:
            try:
                # Load the JSON data
                json_data = json.load(file)
                # Append the data to our list
                data_list.append(json_data)
            except json.JSONDecodeError:
                print(f"Error decoding JSON in file: {filename}")

# Create a DataFrame from the list of JSON data
all_df = pd.DataFrame(data_list)

# Display the first few rows and basic information about the DataFrame
print(all_df.info())
all_df.head()

In [None]:
# Then save as a csv
all_df.to_csv("all_artworks_aic.csv", index=False)