In [None]:
# Selenium used to open and interact with a webpage
# BeautifulSoup used to parse the HTML of the webpage (extracts data from the HTML)
# Pandas used to store the data in a DataFrame

In [58]:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from bs4 import BeautifulSoup
from urllib.parse import urljoin

import time
import pandas as pd



In [63]:
options = webdriver.ChromeOptions()
options.add_argument('--headless=new')
driver = webdriver.Chrome(options=options)

In [62]:
driver.quit()

In [54]:
# Function to scrape all auction results for an artist
def scrape_all_pages(artist_name):
    formatted_name = artist_name.lower().replace(" ", "-")
    url = f"https://www.artsy.net/artist/{formatted_name}/auction-results"
    driver.get(url)
    
    time.sleep(5)  # Give the page time to load

    all_work = []
    page_count = 1

    while True:
        # Parse current page with BeautifulSoup
        soup = BeautifulSoup(driver.page_source, "html.parser")

        # Find all auction entries on this page
        auction_entries = soup.find_all("a", class_="RouterLink__RouterAwareLink-sc-c712443b-0 laGLjt")
        all_work.extend(auction_entries)
        
        print(f"Page {page_count}: Collected {len(auction_entries)} entries.")

        # **Step 1: Check if "Next" button exists and is visible**
        try:
            next_button = WebDriverWait(driver, 10).until(
                EC.presence_of_element_located((By.XPATH, "//a[@data-testid='next']"))
            )

            # Ensure the button is visible before clicking
            WebDriverWait(driver, 10).until(
                EC.visibility_of_element_located((By.XPATH, "//a[@data-testid='next']"))
            )
        except:
            print(f"No 'Next' button found after Page {page_count}. Scraping complete.")
            break  # Exit loop if no more pages

        # **Step 2: Click "Next" button and go to the next page**
        try:
            print(f"Clicking 'Next' button to go to page {page_count + 1}...")  

            # Fix: Re-locate button dynamically
            next_button = driver.find_element(By.XPATH, "//a[@data-testid='next']")

            # Fix: Use JavaScript click to bypass any overlay issues
            driver.execute_script("arguments[0].click();", next_button)  
            time.sleep(8)  # Allow the new page to fully load

            # Fix: Ensure a **new set of auction results** has loaded
            WebDriverWait(driver, 10).until(
                EC.presence_of_element_located((By.XPATH, "//a[contains(@class, 'RouterLink__RouterAwareLink')]"))
            )

            page_count += 1
        except Exception as e:
            print(f"Error clicking 'Next' button on Page {page_count}: {e}")
            break  # Exit loop if the Next button cannot be clicked

    return all_work  # Return all collected auction entries

# Example usage
artist_name = "Titian"
all_entries = scrape_all_pages(artist_name)
print(f"Total auction results collected: {len(all_entries)}")

Page 1: Collected 50 entries.
Clicking 'Next' button to go to page 2...
Page 2: Collected 50 entries.
No 'Next' button found after Page 2. Scraping complete.
Total auction results collected: 100
