In [None]:
# Selenium used to open and interact with a webpage
# BeautifulSoup used to parse the HTML of the webpage (extracts data from the HTML)
# Pandas used to store the data in a DataFrame

In [3]:
# Import Necessary Libraries
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from bs4 import BeautifulSoup
import time

import pandas as pd
import time


In [13]:
# Setup Chrome options
chrome_options = webdriver.ChromeOptions()
chrome_options.binary_location = "/Applications/Google Chrome.app/Contents/MacOS/Google Chrome"
# options.add_argument('--headless=new')

# Initialize Chrome driver
driver = webdriver.Chrome(options=chrome_options)
driver.maximize_window()

In [12]:
driver.quit()

In [14]:
# Login to Artsy
# Login Function
def artsy_login(driver, email, password):
    header = driver.find_element(By.TAG_NAME, "header")
    login_button = [i for i in header.find_elements(By.TAG_NAME, "button") if i.text == 'Log In']

    if len(login_button) == 1:
        login_button = login_button[0]
        login_button.click()
        time.sleep(2)

        email_input = WebDriverWait(driver, 10).until(
            EC.presence_of_element_located((By.CSS_SELECTOR, '[placeholder="Enter your email address"]'))
        )
        email_input.send_keys(email)
        time.sleep(2)

        continue_button = WebDriverWait(driver, 10).until(
            EC.element_to_be_clickable((By.XPATH, '//button[@type="submit"]'))
        )
        continue_button.click()
        time.sleep(2)

        password_input = WebDriverWait(driver, 10).until(
            EC.presence_of_element_located((By.CSS_SELECTOR, '[placeholder="Enter your password"]'))
        )
        password_input.send_keys(password)
        time.sleep(2)

        login_submit = WebDriverWait(driver, 10).until(
            EC.element_to_be_clickable((By.XPATH, '//button[@type="submit"]'))
        )
        login_submit.click()
        print("Logged in")
        time.sleep(5)
    else:
        print("Error: Not able to locate log in button")


In [15]:
# Scrape Artist Description
def close_popup_if_present(driver):
    try:
        popup_close_button = WebDriverWait(driver, 5).until(
            EC.element_to_be_clickable((By.CSS_SELECTOR, 'button[aria-label="Close"]'))
        )
        driver.execute_script("arguments[0].click();", popup_close_button)
        print("Popup closed successfully.")
    except:
        print("No popup found or popup already closed.")


def get_artist_description(driver):
    artist_info = driver.find_element(By.CSS_SELECTOR, '[data-test="artistHeader"]')

    # Close popup if present before clicking "Read more"
    close_popup_if_present(driver)

    artist_info_readmore = WebDriverWait(artist_info, 10).until(
        EC.element_to_be_clickable((By.XPATH, './/button[contains(., "Read more")]'))
    )
    driver.execute_script("arguments[0].click();", artist_info_readmore)

    WebDriverWait(artist_info, 10).until(
        EC.presence_of_element_located((By.CSS_SELECTOR, '[aria-expanded="true"]'))
    )

    artist_info_updated = driver.find_element(By.CSS_SELECTOR, '[data-test="artistHeader"]')
    artist_info_html = artist_info_updated.get_attribute('innerHTML')

    soup = BeautifulSoup(artist_info_html, 'html.parser')
    artist_name = soup.find('h1').text.strip()
    artist_country_year = soup.find('h2').text.strip()
    artist_description_paragraph = soup.find('div', {'aria-expanded': 'true'}).text.strip()

    return artist_name, artist_country_year, artist_description_paragraph



In [16]:
# Scrape all Auction Entries for an Artist
def scrape_all_pages(artist_name, email, password):
    formatted_name = artist_name.lower().replace(" ", "-")
    url = f"https://www.artsy.net/artist/{formatted_name}/auction-results"
    driver.get(url)

    time.sleep(5)

    # Login procedure
    artsy_login(driver, email, password)

    # Scrape Artist Description
    artist_name, artist_country_year, artist_description_paragraph = get_artist_description(driver)
    print(f"Artist Name: {artist_name}")
    print(f"Artist Country and Year: {artist_country_year}")
    print(f"Artist Description: {artist_description_paragraph}\n")

    all_work = []
    page_count = 1

    while True:
        soup = BeautifulSoup(driver.page_source, "html.parser")
        auction_entries = soup.find_all("a", class_="RouterLink__RouterAwareLink-sc-c712443b-0 laGLjt")
        all_work.extend(auction_entries)
        print(f"Page {page_count}: Collected {len(auction_entries)} entries.")

        try:
            next_button = WebDriverWait(driver, 15).until(
                EC.element_to_be_clickable((By.CSS_SELECTOR, "a[data-testid='next'][style*='opacity: 1']"))
            )
            driver.execute_script("arguments[0].click();", next_button)
            time.sleep(5)
            page_count += 1

        except Exception as e:
            print(f"No 'Next' button found or error on Page {page_count}: {e}")
            break

    driver.quit()
    return artist_name, artist_country_year, artist_description_paragraph, all_work



In [17]:
# Example
if __name__ == "__main__":
    artist_name = "Titian"
    email = "artauctionproject.57@gmail.com"
    password = "Artauctionproject2025!"

    artist_name, artist_country_year, artist_description_paragraph, all_entries = scrape_all_pages(artist_name, email, password)

    print(f"\nTotal auction results collected: {len(all_entries)}\n")
    for idx, entry in enumerate(all_entries, 1):
        print(f"{idx}: {entry.get('href')}")

Logged in
Popup closed successfully.
Artist Name: Titian
Artist Country and Year: Italian, 1488–1576
Artist Description: Known for his expressive brushwork, brilliant color, and hazy, atmospheric effects, Renaissance painter Tiziano Vecelli, called Titian, along with Giorgione, was the founder of arte moderna, a nuanced style characterized by dynamic asymmetry and non-hierarchical compositions that broke from the work of his master, Giovanni Bellini. After Giorgione and Bellini died, Titian introduced many innovations and was the considered the master of Venetian painting. Titian's 1548 painting Equestrian Portrait of Charles V established equestrian portraiture as a new genre that referenced both the Roman tradition of equestrian sculpture and the medieval representations of Christian knights. One of Titian’s most famous paintings is The Rape of Europa (1562), which introduced a powerful diagonal structure—almost baroque in its blurred lines, swirling color, and vibrant brushwork. Aft

In [18]:
all_entries[1]

<a class="RouterLink__RouterAwareLink-sc-c712443b-0 laGLjt" display="block" href="/auction-result/7049729" text-decoration="none"><div class="Box-sc-15se88d-0 CSSGrid-sc-1q8w5xn-0 GridColumns-sc-1g9p6xx-0 gRoBRz fwdhTL"><div class="Box-sc-15se88d-0 GridColumns__Cell-sc-1g9p6xx-1 fHmcuw"><div class="Box-sc-15se88d-0 ilfZqS" overflow="hidden" style="aspect-ratio: 1 / 1; max-width: 130px;" width="100%"><div class="Box-sc-15se88d-0 fnOOKv" height="100%" width="100%"><img alt="" class="Box-sc-15se88d-0 guRykI" display="block" height="100%" loading="lazy" src="https://d7hftxdivxxvm.cloudfront.net?height=130&amp;quality=80&amp;resize_to=fill&amp;src=https%3A%2F%2Fd32dm0rphc51dk.cloudfront.net%2F4DDBNlFufqY4pofHirWBag%2Fthumbnail.jpg&amp;width=130" srcset="https://d7hftxdivxxvm.cloudfront.net?height=130&amp;quality=80&amp;resize_to=fill&amp;src=https%3A%2F%2Fd32dm0rphc51dk.cloudfront.net%2F4DDBNlFufqY4pofHirWBag%2Fthumbnail.jpg&amp;width=130 1x, https://d7hftxdivxxvm.cloudfront.net?height=260&

In [28]:
def parse_auction_entries(all_entries):
    auction_data = []

    for entry in all_entries:
        soup = BeautifulSoup(str(entry), "html.parser")

        title = soup.select_one('.bxWaGD').text.strip() if soup.select_one('.bxWaGD') else 'N/A'
        medium = soup.select('.irDwAE')[0].text.strip() if len(soup.select('.irDwAE')) > 0 else 'N/A'
        dimensions = soup.select('.irDwAE')[1].text.strip() if len(soup.select('.irDwAE')) > 1 else 'N/A'

        sale_date_house = soup.select('.irDwAE')[2].text.strip() if len(soup.select('.irDwAE')) > 2 else 'N/A'
        if '•' in sale_date_house:
            sale_date, auction_house = [x.strip() for x in sale_date_house.split('•', 1)]
        else:
            sale_date, auction_house = sale_date_house, 'N/A'
        
        sale_name = soup.select('.irDwAE')[6].text.strip() if len(soup.select('.irDwAE')) > 6 else 'N/A'
        lot_number = soup.select('.irDwAE')[7].text.strip() if len(soup.select('.irDwAE')) > 7 else 'N/A'
        
        sale_location_full = soup.select('.bbAxnM')[2].text.strip() if len(soup.select('.bbAxnM')) > 2 else 'N/A'
        if '•' in sale_location_full:
            sale_location = sale_location_full.split('•')[1].strip()
        else:
            sale_location = 'N/A'

        price_sold = soup.select_one('.gJsJRn').text.strip() if soup.select_one('.gJsJRn') else 'N/A'
        price_estimated = soup.select_one('.jEONpp').text.strip().replace("(est)", "") if soup.select_one('.jEONpp') else 'N/A'

        auction_data.append({
            'Title': title,
            'Medium': medium,
            'Dimensions': dimensions,
            'Sale Date': sale_date,
            'Auction House': auction_house,
            'Sale Location': sale_location,
            'Sale Name': sale_name,
            'Lot Number': lot_number,
            'Price Sold': price_sold,
            'Price Estimated': price_estimated,
        })

    return pd.DataFrame(auction_data)

# Example usage:
df_auctions = parse_auction_entries(all_entries[1])
print(df_auctions.head())



              Title         Medium    Dimensions   Sale Date Auction House  \
0  Venus and Adonis  oil on canvas  177 x 199 cm  7 Dec 2022     Sotheby's   

  Sale Location                    Sale Name Lot Number            Price Sold  \
0        London  Old Masters Evening Auction      Lot 6  Sign up to see price   

  Price Estimated  
0             N/A  
