In [None]:
# Selenium used to open and interact with a webpage
# BeautifulSoup used to parse the HTML of the webpage (extracts data from the HTML)
# Pandas used to store the data in a DataFrame

In [None]:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options
from bs4 import BeautifulSoup
import time
import pandas as pd


In [55]:
options = webdriver.ChromeOptions()
options.add_argument('--headless=new')
driver = webdriver.Chrome(options=options)

In [54]:
driver.quit()

In [None]:
def scrape_all_pages(artist_name):
    """Scrapes all auction entries for an artist, handling pagination dynamically."""
    
    formatted_name = artist_name.lower().replace(" ", "-")
    url = f"https://www.artsy.net/artist/{formatted_name}/auction-results"

    driver.get(url)
    time.sleep(5)  # Wait for the first page to load

    all_work = []  # Store all auction entries
    page_count = 1  # Track the number of pages scraped

    while True:
        # Parse page with BeautifulSoup
        soup = BeautifulSoup(driver.page_source, "html.parser")

        # Find all auction entries on the current page
        auction_entries = soup.find_all("a", class_="RouterLink__RouterAwareLink-sc-c712443b-0 laGLjt")
        all_work.extend(auction_entries)

        print(f"Page {page_count}: Collected {len(auction_entries)} entries.")

        # "Next" button


    return all_work  # Return all collected auction entries

# Example: Scrape all auction results for an artist
artist_name = "Titian"
all_entries = scrape_all_pages(artist_name)

Page 1: Collected 50 entries.
Scraping complete. Total pages scraped: 1


In [47]:
all_entries[:5]

[<a class="RouterLink__RouterAwareLink-sc-c712443b-0 laGLjt" display="block" href="/auction-result/7058655" text-decoration="none"><div class="Box-sc-15se88d-0 CSSGrid-sc-1q8w5xn-0 GridColumns-sc-1g9p6xx-0 gRoBRz fwdhTL"><div class="Box-sc-15se88d-0 GridColumns__Cell-sc-1g9p6xx-1 fHmcuw"><div class="Box-sc-15se88d-0 ilfZqS" overflow="hidden" style="aspect-ratio: 1 / 1; max-width: 130px;" width="100%"><div class="Box-sc-15se88d-0 fnOOKv" height="100%" width="100%"><img alt="" class="Box-sc-15se88d-0 guRykI" display="block" height="100%" loading="lazy" src="https://d7hftxdivxxvm.cloudfront.net?height=130&amp;quality=80&amp;resize_to=fill&amp;src=https%3A%2F%2Fd32dm0rphc51dk.cloudfront.net%2F2PZPW73N0I0_hsrFivbT2Q%2Fthumbnail.jpg&amp;width=130" srcset="https://d7hftxdivxxvm.cloudfront.net?height=130&amp;quality=80&amp;resize_to=fill&amp;src=https%3A%2F%2Fd32dm0rphc51dk.cloudfront.net%2F2PZPW73N0I0_hsrFivbT2Q%2Fthumbnail.jpg&amp;width=130 1x, https://d7hftxdivxxvm.cloudfront.net?height=260

In [None]:
def scrape_auction_results(artist_name, num_entries=10):  # Set a default number of entries
    # Construct the auction results page URL
    formatted_name = artist_name.lower().replace(" ", "-")
    url = f"https://www.artsy.net/artist/{formatted_name}/auction-results"

    # Open the webpage
    driver.get(url)
    time.sleep(5) 

    # Store auction results
    all_work = []
    all_work.append(soup.find_all("a"))

    for i in range(1, num_entries):  # Loop through entries
        try:
            # Generate full XPath dynamically
            auction_xpath = f'/html/body/div[2]/div/div/div[2]/main/div/div[14]/div[2]/div[3]/div/div/div/a[{i}]'

            # Extract title
            title_xpath = f'{auction_xpath}/div/div[3]/div[1]'
            title_element = driver.find_element(By.XPATH, title_xpath)
            title = title_element.text.strip() if title_element else "Unknown"

            # Extract medium
            medium_xpath = f'{auction_xpath}/div/div[3]/div[2]'
            medium_element = driver.find_element(By.XPATH, medium_xpath)
            medium = medium_element.text.strip() if medium_element else "Unknown"

            # Extract dimensions
            dimensions_xpath = f'{auction_xpath}/div/div[3]/div[3]'
            dimensions_element = driver.find_element(By.XPATH, dimensions_xpath)
            dimensions = dimensions_element.text.strip() if dimensions_element else "Unknown"

            # Extract year sold
            year_xpath = f'{auction_xpath}/div/div[4]/div[1]'
            year_element = driver.find_element(By.XPATH, year_xpath)
            year_sold = year_element.text.strip() if year_element else "Unknown"

            # Extract auction house
            auction_house_xpath = f'{auction_xpath}/div/div[4]/div[2]'
            auction_house_element = driver.find_element(By.XPATH, auction_house_xpath)
            auction_house = auction_house_element.text.strip() if auction_house_element else "Unknown"

            # Extract sales name
            sales_name_xpath = f'{auction_xpath}/div/div[4]/div[3]'
            sales_name_element = driver.find_element(By.XPATH, sales_name_xpath)
            sales_name = sales_name_element.text.strip() if sales_name_element else "Unknown"

            # Extract sale price
            price_xpath = f'{auction_xpath}/div/div[5]/div[1]'
            price_element = driver.find_element(By.XPATH, price_xpath)
            price = price_element.text.strip() if price_element else "Price not listed"

            # Extract estimated sale price
            estimated_price_xpath = f'{auction_xpath}/div/div[5]/div[2]'
            estimated_price_element = driver.find_element(By.XPATH, estimated_price_xpath)
            estimated_price = estimated_price_element.text.strip() if estimated_price_element else "Not Available"

            # Store the extracted data
            auction_results.append({
                "Artist": artist_name,
                "Title": title,
                "Medium": medium,
                "Dimensions": dimensions,
                "Year Sold": year_sold,
                "Auction House": auction_house,
                "Sales Name": sales_name,
                "Sale Price": price,
                "Estimated Sale Price": estimated_price,
            })

        except Exception as e:
            print(f"Skipping entry {i}: {e}")
            continue  # Skip if any required data is missing

    return auction_results

# Example: Scraping auction results for Titian (scrape first 10 entries)
artist_name = "Titian"
results = scrape_auction_results(artist_name, num_entries=3)

# Convert to DataFrame for easy viewing
df = pd.DataFrame(results)
print(df)

Skipping entry 1: Message: no such element: Unable to locate element: {"method":"xpath","selector":"/html/body/div[2]/div/div/div[2]/main/div/div[14]/div[2]/div[3]/div/div/div/a[1]/div/div[5]/div[1]"}
  (Session info: chrome=134.0.6998.89); For documentation on this error, please visit: https://www.selenium.dev/documentation/webdriver/troubleshooting/errors#no-such-element-exception
Stacktrace:
0   chromedriver                        0x00000001078c6808 chromedriver + 6105096
1   chromedriver                        0x00000001078be40a chromedriver + 6071306
2   chromedriver                        0x0000000107359600 chromedriver + 415232
3   chromedriver                        0x00000001073ab2c0 chromedriver + 750272
4   chromedriver                        0x00000001073ab511 chromedriver + 750865
5   chromedriver                        0x00000001073fb9c4 chromedriver + 1079748
6   chromedriver                        0x00000001073d163d chromedriver + 906813
7   chromedriver                

In [28]:
# Function to Extract Auction Results per Artist
def scrape_auction_results(artist_name):
    formatted_name = artist_name.lower().replace(" ", "-")
    url = f"https://www.artsy.net/artist/{formatted_name}/auction-results"
    driver.get(url) # Open the URL in the browser
    time.sleep(5)

    # Parse the HTML of Page
    soup = BeautifulSoup(driver.page_source, "html.parser")

    # Find all auction entries (using the <a> tag)
    auction_items = soup.find_all("a")
    print(auction_items)

    # Store auction results
    auction_results = []

    for item in auction_items:
        try: 
# Extract title
            title_element = item.find_element(By.XPATH, '/div/div[3]/div[1]')
            title = title_element.text.strip() if title_element else "Unknown"

            # Extract medium
            medium_element = item.find_element(By.XPATH, '/div/div[3]/div[2]')
            medium = medium_element.text.strip() if medium_element else "Unknown"

            # Extract dimensions
            dimensions_element = item.find_element(By.XPATH, '/div/div[3]/div[3]')
            dimensions = dimensions_element.text.strip() if dimensions_element else "Unknown"

            # Extract year sold
            year_element = item.find_element(By.XPATH, 'div/div[4]/div[1]')
            year_sold = year_element.text.strip() if year_element else "Unknown"

            # Extract auction house
            auction_house_element = item.find_element(By.XPATH, '/div/div[4]/div[2]')
            auction_house = auction_house_element.text.strip() if auction_house_element else "Unknown"

            # Extract sales name
            sales_name_element = item.find_element(By.XPATH, '/div/div[4]/div[3]')
            sales_name = sales_name_element.text.strip() if sales_name_element else "Unknown"

            # Extract sale price
            price_element = item.find_element(By.XPATH, '/div/div[5]/div/div[2]')
            price = price_element.text.strip() if price_element else "Price not listed"

            # Extract estimated sale price
            estimated_price_element = item.find_element(By.XPATH, '/div/div[5]/div/div[2]')
            estimated_price = estimated_price_element.text.strip() if estimated_price_element else "Not Available"

            # Store the extracted data
            auction_results.append({
                "Artist": artist_name,
                "Title": title,
                "Medium": medium,
                "Dimensions": dimensions,
                "Year Sold": year_sold,
                "Auction House": auction_house,
                "Sales Name": sales_name,
                "Sale Price": price,
                "Estimated Sale Price": estimated_price,
                "Auction Link": auction_link
            })

        except Exception as e:
            print(f"Error extracting data: {e}")
            continue  # Skip if any required data is missing

    return auction_results

# Example Artist: Titian
artist_name = "Titian"
results = scrape_auction_results(artist_name)

# Convert to DataFrame
df = pd.DataFrame(results)
print(df)


[<a class="NavBarSkipLink__Container-sc-4b4150a-0 iEJjBS" href="#main"><div class="Box-sc-15se88d-0 Text-sc-18gcpao-0 gpKROX" font-family="sans">Skip to Main Content</div></a>, <a aria-label="Artsy" class="RouterLink__RouterAwareLink-sc-c712443b-0 bmQqzD NavBarPrimaryLogo__HitArea-sc-6b6fd54d-0 gJomZO" href="/"><div class="sc-gtLWhw jxiOWE" height="40" width="40"><svg fill="currentColor" style="position: absolute; inset: 0px; width: 100%; height: 100%;" viewbox="0 0 40 40"><path clip-rule="evenodd" d="M40 39.9962H0V0H40V39.9962ZM31.0691 37.1188H37.1226V2.87615H2.87865V37.1188H28.1892V29.2345H31.0691V37.1188ZM12.955 7.01093H16.1298L22.0088 22.1446H18.9671L17.4022 17.9006H11.7039L10.1165 22.1446H7.07723L12.955 7.01093ZM14.5424 10.0715L12.5911 15.3382H16.4711L14.5424 10.0715Z" fill-rule="evenodd"></path></svg></div></a>, <a class="RouterLink__RouterUnawareLink-sc-c712443b-1 NavBarItem__NavBarItemLink-sc-2c55f466-0 kLEUqi iMmuRo" data-label="Buy" display="flex" href="/collect" text-decorat

In [25]:

# Close the browser
driver.quit()


In [None]:
# Search for an Artist
def scrape_artsy(artist_name):
    formatted_name = artist_name.lower().replace(" ", "-")
    url = "https://www.artsy.net/artist/{formatted_name}/auction-results"
    driver.get(url)
    time.sleep(5)

In [None]:
# Extracting Auction Data
soup = BeautifulSoup(driver.page_source, "html.parser")
auction_items = soup.find_all("a", class_="RouterLink__RouterAwareLink-sc-c712443b-0 laGLjt")
auction_results = []

for item in auction_items:
    try:
        # Extract artwork title
        title_element = item.find("div", class_="Text-sc-18gcpao-0 bxWaGD")
        title = title_element.text.strip() if title_element else "Unknown"

        # Extract medium of artwork
        medium_element = item.find("div", class_="Text-sc-18gcpao-0 irDwAE bbAxnM")
        medium = medium_element.text.strip() if medium_element else "Unknown"

        # Extract dimensions of artwork
        dimensions_element = item.find("div", class_="Text-sc-18gcpao-0 irDwAE bbAxnM")
        dimensions = dimensions_element.text.strip() if dimensions_element else "Unknown"

        # Extract year it was sold
        year_element = item.find("div", class_="Text-sc-18gcpao-0 kXJpsV")
        year_sold = year_element.text.strip() if year_element else "Unknown"

        # Extract auction house
        auction_house_element = item.find("div", class_="Text-sc-18gcpao-0 irDwAE bbAxnM")
        auction_house = auction_house_element.text.strip() if auction_house_element else "Unknown"

        # Extract sales name
        sales_name_element = item.find("div", class_="Text-sc-18gcpao-0 irDwAE bbAxnM")
        sales_name = sales_name_element.text.strip() if sales_name_element else "Unknown"

        # Extract sales price
        price_element = item.find("div", class_="Text-sc-18gcpao-0 cMfkJA")
        price = price_element.text.strip() if price_element else "Price not listed"

        # Extract estimated sales price
        estimated_price_element = item.find("div", class_="Text-sc-18gcpao-0 jEONpp bbAxnM")
        estimated_price = estimated_price_element.text.strip() if estimated_price_element else "Est. price not listed"

        # Store the extracted data
        auction_results.append({
            "Artist": artist_name,
            "Title": title,
            "Medium": medium,
            "Dimensions": dimensions,
            "Year Sold": year_sold,
            "Auction House": auction_house,
            "Sales Name": sales_name,
            "Sale Price": price,
            "Estimated Sale Price": estimated_price,
            "Auction Link": auction_link
        })

    except AttributeError:
        continue  # Skip if any required data is missing


# Example: Artist "Titan"
artist_name = "Titian"
results = scrape_auction_results(artist_name)

# Store the data in a DataFrame
df = pd.DataFrame(results)
print(df)

# Close the browser
driver.quit()



