# Find links to Google Arts & Culture webpages of all artists

In [None]:
from artscraper import get_artist_links

In [None]:
# Get links for all artists, as a list
#artist_urls = get_artist_links(executable_path='geckodriver', min_wait_time=1, output_file='artist_links.txt')

# Collect artworks and metadata for all artists

In [1]:
import time

In [2]:
from artscraper import GoogleArtScraper, FindArtworks, random_wait_time

In [3]:
min_wait_time = 10

In [4]:
# Subset of artist links, for illustration purposes
artist_urls = ['https://artsandculture.google.com/entity/vincent-van-gogh/m07_m2',
 'https://artsandculture.google.com/entity/claude-monet/m01xnj',
 'https://artsandculture.google.com/entity/banksy/m023b7b',
 'https://artsandculture.google.com/entity/rembrandt/m0bskv2',
 'https://artsandculture.google.com/entity/raphael/m0c43g']

In [5]:
# Directory in which the data is to be stored
output_dir = './data'

In [6]:
# Maximum number of attempts to perform a task 
max_retries = 10

In [None]:
%%time 

# Find_artworks for each artist
for artist_url in artist_urls:
        
    with FindArtworks(artist_link=artist_url, output_dir=output_dir, min_wait_time=10) as scraper:
        print(f'artist URL: {artist_url}')
        num_attempts = 0
        while num_attempts < max_retries:
            try:
                # Save list of works, description, and metadata for an artist
                scraper.save_artist_information()
            except Exception as error:
                #raise Exception
                print(f'Error at attempt {num_attempts}: {error}')
                num_attempts = num_attempts + 1
                continue
            else:
                # Create directory for this artist
                artist_dir = output_dir + '/' + scraper.get_wikipedia_article_title() 
                # Get list of links to this artist's works 
                with open(artist_dir+'/'+'works.txt', 'r') as file:
                    artwork_links = [line.rstrip() for line in file]
                break

    if num_attempts==max_retries:
        print(f'Could not save artist information for artist link {artist_url}')
        continue
                
    # Scrape artworks
    with GoogleArtScraper(artist_dir + '/' + 'works', min_wait=10) as subscraper:
        # Go through each artwork link
        for url in artwork_links:                
            print(f'artwork URL: {url}')
            num_attempts = 0
            while num_attempts < max_retries:
                try:
                    subscraper.load_link(url)
                    subscraper.save_metadata()
                    subscraper.save_image()
                except Exception as error:
                    print(f'Error at attempt {num_attempts}: {error}')
                    num_attempts = num_attempts + 1
                    time.sleep(random_wait_time(min_wait=min_wait_time))
                    continue
                else:
                    break
                    
            if num_attempts==max_retries:
                print(f'Could not save artwork information for artwork link {url}')
                continue
                    
                    

artist URL: https://artsandculture.google.com/entity/vincent-van-gogh/m07_m2
artwork URL: https://artsandculture.google.com/asset/undergrowth-with-two-figures-vincent-van-gogh-dutch-b-1853-d-1890/PgGaehoXTiERQQ
artwork URL: https://artsandculture.google.com/asset/head-of-a-skeleton-with-a-burning-cigarette-vincent-van-gogh/hQGZa2I9Xi6lpA
artwork URL: https://artsandculture.google.com/asset/the-starry-night-vincent-van-gogh/bgEuwDxel93-Pg
artwork URL: https://artsandculture.google.com/asset/self-portrait/9gFw_1Vou2CkwQ
artwork URL: https://artsandculture.google.com/asset/country-road-in-provence-by-night-vincent-van-gogh/4wEXP9j2v6hpYw
artwork URL: https://artsandculture.google.com/asset/almond-blossom-vincent-van-gogh/dAFXSL9sZ1ulDw
artwork URL: https://artsandculture.google.com/asset/wheatfield-with-crows-vincent-van-gogh/dwFdD5AMQfpSew
artwork URL: https://artsandculture.google.com/asset/roses-vincent-van-gogh/UQGFh2ps12F5hw
artwork URL: https://artsandculture.google.com/asset/the-ye

artwork URL: https://artsandculture.google.com/asset/olive-trees/rwGVxwlpIhHn6g
artwork URL: https://artsandculture.google.com/asset/imperial-fritillaries-in-a-copper-vase/wwFUs2icMjlcUA
artwork URL: https://artsandculture.google.com/asset/wheat-fields-with-reaper-auvers-vincent-van-gogh/7QFlVButcidDvw
artwork URL: https://artsandculture.google.com/asset/les-vessenots-in-auvers-vincent-van-gogh/XAHqHrqg761Jrw
artwork URL: https://artsandculture.google.com/asset/portrait-of-joseph-roulin-vincent-van-gogh/2QEeaf-2G95y9Q
artwork URL: https://artsandculture.google.com/asset/irises-vincent-van-gogh/SAEPDJ4Qw6cEag
artwork URL: https://artsandculture.google.com/asset/apples-vincent-van-gogh/4AGLJL7XOS7I2w
artwork URL: https://artsandculture.google.com/asset/thatched-cottages-at-cordeville/pwEO5v3f4pZn3Q
artwork URL: https://artsandculture.google.com/asset/field-with-flowers-near-arles-vincent-van-gogh/lQFzzpeEHU-XxA
artwork URL: https://artsandculture.google.com/asset/il-giardiniere-vincent-v

artwork URL: https://artsandculture.google.com/asset/restaurant-rispal-at-asni%C3%A8res-le-restaurant-rispal-%C3%A0-asni%C3%A8res-vincent-van-gogh/JAENkgCmt29yPg
artwork URL: https://artsandculture.google.com/asset/the-potato-eaters-vincent-van-gogh/rQE6qmf9oVuKPA
artwork URL: https://artsandculture.google.com/asset/road-in-etten-vincent-van-gogh/-AGdhBVEWzlDiw
artwork URL: https://artsandculture.google.com/asset/cypresses-with-two-figures-vincent-van-gogh/QAHDdKGn28GS_Q
artwork URL: https://artsandculture.google.com/asset/the-pink-orchard-vincent-van-gogh/vgEUSZYwZoEWyQ
artwork URL: https://artsandculture.google.com/asset/the-langlois-bridge-vincent-van-gogh/nQFqhidYNTvrYA
artwork URL: https://artsandculture.google.com/asset/dr-gachet-vincent-van-gogh/VAFopd9tf5LE5Q
artwork URL: https://artsandculture.google.com/asset/woman-sewing-vincent-van-gogh/dgEEQO49zXq4wQ
artwork URL: https://artsandculture.google.com/asset/the-stone-bench-in-the-asylum-at-saint-remy/TQEfc6JyEYCISw
artwork URL:

artwork URL: https://artsandculture.google.com/asset/still-life-with-plaster-statuette-vincent-van-gogh/RQGbavb9NR1BSA
artwork URL: https://artsandculture.google.com/asset/peach-trees-in-blossom-vincent-van-gogh/kAGpFWfutEP8KQ
artwork URL: https://artsandculture.google.com/asset/prado-con-vacas-vincent-van-gogh/RAG7UgArhK5v_A
artwork URL: https://artsandculture.google.com/asset/sheaves-of-wheat-vincent-van-gogh/ggHH98S4IkpXVQ
artwork URL: https://artsandculture.google.com/asset/the-cottage-vincent-van-gogh/OAHj50EhIZy5XA
artwork URL: https://artsandculture.google.com/asset/the-ravine-les-peiroulets-vincent-van-gogh/JQHlLPIiPeBpRw
artwork URL: https://artsandculture.google.com/asset/by-the-seine-vincent-van-gogh/nAFt4-B1LUosxQ
artwork URL: https://artsandculture.google.com/asset/head-of-a-woman-vincent-van-gogh/ggHHJgZl2orSjQ
artwork URL: https://artsandculture.google.com/asset/street-in-saintes-maries-de-la-mer-vincent-van-gogh/SAFascJIx1gFfQ
artwork URL: https://artsandculture.google.

artwork URL: https://artsandculture.google.com/asset/irises-vincent-van-gogh/jgHaSIW_d-AStg
artwork URL: https://artsandculture.google.com/asset/portrait-of-a-young-woman-vincent-van-gogh/eQFf8GeRV7RY0w
artwork URL: https://artsandculture.google.com/asset/still-life-vincent-van-gogh/iwHq4hil_bce5A
artwork URL: https://artsandculture.google.com/asset/pine-trees-in-the-garden-of-the-asylum-vincent-van-gogh/sAHmJBp5iWmTtQ
artwork URL: https://artsandculture.google.com/asset/moulin-de-la-galette-vincent-van-gogh/CwG7Q4UIhLZU2Q
artwork URL: https://artsandculture.google.com/asset/still-life-with-jars-vincent-van-gogh/DwFqgfVp-TIKAw
artwork URL: https://artsandculture.google.com/asset/the-hill-of-montmartre-vincent-van-gogh/dwHEYYmdE7idwA
artwork URL: https://artsandculture.google.com/asset/schelpenvisser-gogh-vincent-van/UwHrL7EV3_lyqg
artwork URL: https://artsandculture.google.com/asset/farmhouse-in-nuenen-vincent-van-gogh/FQFfS3CmcNmS0A
artwork URL: https://artsandculture.google.com/asset

Error at attempt 9: Message: Unable to locate element: //*[@id="metadata-RAG2ghqQDCGUQA"]
Stacktrace:
RemoteError@chrome://remote/content/shared/RemoteError.sys.mjs:8:8
WebDriverError@chrome://remote/content/shared/webdriver/Errors.sys.mjs:187:5
NoSuchElementError@chrome://remote/content/shared/webdriver/Errors.sys.mjs:505:5
element.find/</<@chrome://remote/content/marionette/element.sys.mjs:135:16

Could not save artwork information for artwork link https://artsandculture.google.com/asset/basket-of-apples-vincent-van-gogh/RAG2ghqQDCGUQA
artwork URL: https://artsandculture.google.com/asset/man-at-table-vincent-van-gogh/kAHDry9U8xVnnw
artwork URL: https://artsandculture.google.com/asset/bleaching-ground-at-scheveningen/UwFU5PhONOa0Gg
artwork URL: https://artsandculture.google.com/asset/bloemkwekerij-van-pierre-van-de-putte-aan-de-schenkweg-te-den-haag-gogh-vincent-van/GwGSqrY53dEfUw
artwork URL: https://artsandculture.google.com/asset/woman-at-table-vincent-van-gogh/3QGP6IGEgGUWBQ
artwo

artwork URL: https://artsandculture.google.com/asset/the-water-lilies-green-reflections/-wEwoHEvFukepQ
artwork URL: https://artsandculture.google.com/asset/villas-at-bordighera/pQF_4jmpmbxRqw
artwork URL: https://artsandculture.google.com/asset/spring-by-the-seine-claude-monet/5gEwCIKVPWkiLg
artwork URL: https://artsandculture.google.com/asset/waterloo-bridge-in-london/owF6Xdg0d9ObLg
artwork URL: https://artsandculture.google.com/asset/lavacourt-under-snow-claude-monet/CwFEQm13CsYOZg
artwork URL: https://artsandculture.google.com/asset/the-portal-of-rouen-cathedral-in-morning-light-claude-monet/QgHppUFTxKB-Lg
artwork URL: https://artsandculture.google.com/asset/villas-at-bordighera-claude-monet/bgEexoTw0FEy5g
artwork URL: https://artsandculture.google.com/asset/la-corniche-near-monaco-monet-claude/lQH-zRLDuzhsFQ
artwork URL: https://artsandculture.google.com/asset/rouen-cathedral-west-fa%C3%A7ade-sunlight-claude-monet/wwEuC67zYZZZBA
artwork URL: https://artsandculture.google.com/asset/

artwork URL: https://artsandculture.google.com/asset/wind-effect-series-of-the-poplars/tgHEOLlXi2OOlg
artwork URL: https://artsandculture.google.com/asset/winter-sun-at-lavacourt-claude-monet/bwF29Ko37P1UDQ
artwork URL: https://artsandculture.google.com/asset/grainstack-sun-in-the-mist/cQHBeghhc30ymw
artwork URL: https://artsandculture.google.com/asset/nympheas-claude-monet/sQHNcLzqIRib3Q
artwork URL: https://artsandculture.google.com/asset/pond-with-water-lilies/7wFvMeHQCRDVEw
artwork URL: https://artsandculture.google.com/asset/rough-weather-at-%C3%89tretat-claude-monet/ygFw-IRhzTkvDQ
artwork URL: https://artsandculture.google.com/asset/poplars-at-giverny-claude-monet/wAH1TcAoOb2YOw
artwork URL: https://artsandculture.google.com/asset/the-seine-at-giverny-morning-mists-claude-monet/XAFcUMmxNnUVoQ
artwork URL: https://artsandculture.google.com/asset/argenteuil-claude-monet/5QHvbC0Tm8UWoA
artwork URL: https://artsandculture.google.com/asset/waterloo-bridge-claude-monet/pAGQ8rtR9PbpJw
a

artwork URL: https://artsandculture.google.com/asset/road-of-la-roche-guyon/3wF6-c30wxbO3A
artwork URL: https://artsandculture.google.com/asset/the-water-lilies-the-two-willows/3AEQUJnQ8YnB_A
artwork URL: https://artsandculture.google.com/asset/bazille-and-camille-study-for-d%C3%A9jeuner-sur-l-herbe-claude-monet/WgFYt5XfeWCLug
artwork URL: https://artsandculture.google.com/asset/waterloo-bridge-london-at-sunset-claude-monet/ZwFzqUlCQlokNA
artwork URL: https://artsandculture.google.com/asset/snow-effect-at-limetz-claude-monet/0gFCxHZrfYH7sQ
artwork URL: https://artsandculture.google.com/asset/morning-on-the-seine/bQEXOM0GSEYXsA
artwork URL: https://artsandculture.google.com/asset/houses-of-parliament-in-the-fog-claude-monet/owEEr48qDGwjqQ
artwork URL: https://artsandculture.google.com/asset/the-ball-shaped-tree-argenteuil-claude-monet/DAFG-C0d8IDmpQ
artwork URL: https://artsandculture.google.com/asset/floes-at-bennecourt-claude-monet/rAEMFBMZLyiHHA
artwork URL: https://artsandculture.go

artwork URL: https://artsandculture.google.com/asset/le-portail-brouillard-matinal-the-portal-morning-fog-claude-monet/kAGIGTIteSwhkg
artwork URL: https://artsandculture.google.com/asset/grainstacks-white-frost-effect-claude-monet/SwGHr_b-BPYtNw
artwork URL: https://artsandculture.google.com/asset/at-val-saint-nicolas-near-dieppe-in-the-morning-claude-monet/NwHRwehkH8g2kA
artwork URL: https://artsandculture.google.com/asset/eug%C3%A9nie-graff-madame-paul/TAFTzOHg6O647Q
artwork URL: https://artsandculture.google.com/asset/les-falaises-de-varengeville-claude-monet/JwGjyye7WzUWlQ
artwork URL: https://artsandculture.google.com/asset/the-seine-near-rouen-claude-monet/tgHJ8xhaGJMVYg
artwork URL: https://artsandculture.google.com/asset/nymphe-as-reflets-de-saule-monet-claude/DgHhtqc5bCGisA
artist URL: https://artsandculture.google.com/entity/banksy/m023b7b
Error at attempt 0: Expecting value: line 1 column 1 (char 0)
Error at attempt 1: Expecting value: line 1 column 1 (char 0)
Error at attem

artwork URL: https://artsandculture.google.com/asset/faust-rembrandt-van-rijn/7gHZ4Nqb63rCLA
artwork URL: https://artsandculture.google.com/asset/self-portrait-drawing-at-a-window-rembrandt-van-rijn/bwH0UWXIa8-PRw
artwork URL: https://artsandculture.google.com/asset/faust-rembrandt-van-rijn/KwEjzzHxPwTGig
artwork URL: https://artsandculture.google.com/asset/de-drie-kruisen-rijn-rembrandt-van/cAEh-CLszbjkgw
artwork URL: https://artsandculture.google.com/asset/the-shipbuilder-and-his-wife-jan-rijcksen-1560-2-1637-and-his-wife-griet-jans-rembrandt-van-rijn/VQEy8-tpcGYeBg
