# Collect artworks and metadata for all artists

In [None]:
import time

In [None]:
from artscraper import GoogleArtScraper, FindArtworks, random_wait_time, retry

In [None]:
# Maximum number of attempts to perform a task 
max_retries = 3

In [None]:
min_wait_time = 10

In [None]:
# Artist Clementine Hunter, 27 artworks
artist_urls = ['https://artsandculture.google.com/entity/clementine-hunter/m0d1k7n']

In [None]:
# Directory in which the data is to be stored
output_dir = './data'

In [None]:
%%time 

# Find_artworks for each artist
for artist_url in artist_urls:
    with FindArtworks(artist_link=artist_url, output_dir=output_dir, 
                      min_wait_time=min_wait_time) as scraper:
            # Save list of works, description, and metadata for an artist
            retry(scraper.save_artist_information, max_retries, min_wait_time)
            # Create directory for this artist
            artist_dir = output_dir + '/' + scraper.get_wikipedia_article_title()    
            # Get list of links to this artist's works 
            with open(artist_dir+'/'+'works.txt', 'r') as file:
                artwork_links = [line.rstrip() for line in file]  
    # Scrape artworks
    with GoogleArtScraper(artist_dir + '/' + 'works', min_wait=min_wait_time) as subscraper:
        # Go through each artwork link
        for url in artwork_links:
            print(f'artwork URL: {url}')
            retry(subscraper.save_artwork_information, max_retries, min_wait_time, url)

In [None]:
len(artwork_links)

# Display results

## Display contents of data directory

In [None]:
!ls data

## Display contents of directory for one artist

In [None]:
!ls ./data/Clementine_Hunter

## Description of artist

In [None]:
!cat ./data/Clementine_Hunter/description.txt

## Metadata of artist

In [None]:
!cat ./data/Clementine_Hunter/metadata.json

## Directory containing works of this artist

In [None]:
!ls ./data/Clementine_Hunter/works

## Directory containing one artwork by this artist

In [None]:
!ls ./data/Clementine_Hunter/works/flowers-clementine-hunter_zQERekxk8d_F8g

## Display metadata for this artwork

In [None]:
!cat ./data/Clementine_Hunter/works/flowers-clementine-hunter_zQERekxk8d_F8g/metadata.json

## Checking that too-long file/directory names now work

In [None]:
artwork_links = ['https://artsandculture.google.com/asset/%E5%86%A8%E5%B6%BD%E4%B8%89%E5%8D%81%E5%85%AD%E6%99%AF%E3%80%80%E7%94%B2%E5%B7%9E%E7%9F%B3%E7%8F%AD%E6%B2%A2-kajikazawa-in-kai-province-k%C5%8Dsh%C5%AB-kajikazawa-from-the-series-thirty-six-views-of-mount-fuji-fugaku-sanj%C5%ABrokkei-katsushika-hokusai/hgHQaDeXBcllwg']

In [None]:
with GoogleArtScraper(output_dir + '/' + 'Hokusai' + '/' + 'works', min_wait=min_wait_time) as subscraper:
        # Go through each artwork link
        for url in artwork_links:
            print(f'artwork URL: {url}')
            retry(subscraper.save_artwork_information, max_retries, min_wait_time, url)

## Directory containing works of this artist

In [None]:
!ls ./data/Hokusai/works

## Directory containing one artwork by this artist

In [None]:
!ls ./data/Hokusai/works/%E5%86%A8%E5%B6%BD%E4%B8%89%E5%8D%81%E5%85%AD%E6%99%AF%E3%80%80%E7%94%B2%E5%B7%9E%E7%9F%B3%E7%8F%AD%E6%B2%A2-kajikazawa-in-kai-province-k%C5%8Dsh%C5%AB-kajikazawa-from-the-series-thirty-six-views-of-mount-fuji-fugaku-sanj%C5%ABrokkei-katsushika-hokusai_hg

## Display metadata for this artwork

In [None]:
!cat ./data/Hokusai/works/%E5%86%A8%E5%B6%BD%E4%B8%89%E5%8D%81%E5%85%AD%E6%99%AF%E3%80%80%E7%94%B2%E5%B7%9E%E7%9F%B3%E7%8F%AD%E6%B2%A2-kajikazawa-in-kai-province-k%C5%8Dsh%C5%AB-kajikazawa-from-the-series-thirty-six-views-of-mount-fuji-fugaku-sanj%C5%ABrokkei-katsushika-hokusai_hg/metadata.json