In [1]:
## Can also get the price chart by pokemon number, or url for past prices. Look intro trading strategies; also, work on real-time offering feed vs history. 

In [7]:
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from concurrent.futures import ThreadPoolExecutor, as_completed
import pandas as pd
import re

# Customize number of pages to scrape
num_pages = 1
base_url = 'https://www.tcgplayer.com/search/pokemon/product?productLineName=pokemon&view=grid&Condition=Near+Mint|Lightly+Played&page={}&Language=English'

# Setup headless Chrome options
options = Options()
options.add_argument("--headless=new")
prefs = {"profile.default_content_setting_values": {"images": 2, "stylesheets": 2}}
options.add_experimental_option("prefs", prefs)

def scrape_page(page_num):
    driver = webdriver.Chrome(options=options)
    wait = WebDriverWait(driver, 10)
    url = base_url.format(page_num)
    driver.get(url)

    try:
        wait.until(EC.presence_of_all_elements_located((By.CLASS_NAME, "product-card__product")))
    except:
        driver.quit()
        return []

    products = driver.find_elements(By.CLASS_NAME, "product-card__product")
    results = []

    for product in products:
        try:
            # Grab the link directly from the <a> tag
            link_elem = product.find_element(By.XPATH, "./ancestor::a")
            product_link = link_elem.get_attribute("href")
        except Exception:
            product_link = None

        try:
            img_elem = product.find_element(By.TAG_NAME, "img")
            name = img_elem.get_attribute("alt").strip()
        except Exception:
            name = None

        try:
            set_name = product.find_element(By.CLASS_NAME, "product-card__set-name__variant").text.strip()
        except Exception:
            set_name = None

        try:
            mktprice_text = product.find_element(By.CLASS_NAME, "product-card__market-price--value").text.strip()
            mktprice_match = re.search(r"\$([\d,.]+)", mktprice_text)
            mktprice = float(mktprice_match.group(1).replace(",", "")) if mktprice_match else None
        except Exception:
            mktprice = None

        try:
            listings_span = product.find_element(By.CLASS_NAME, "inventory__listing-count").text.strip()
            listings_match = re.search(r"(\d+)\s+listings", listings_span)
            listings = int(listings_match.group(1)) if listings_match else None
        except Exception:
            listings = None

        results.append({
            "name": name,
            "link": product_link,
            "set": set_name,
            "mktprice": mktprice,
            "listings": listings
        })

    driver.quit()
    return results

# Run across multiple pages
all_results = []
with ThreadPoolExecutor(max_workers=6) as executor:
    futures = [executor.submit(scrape_page, p) for p in range(1, num_pages + 1)]
    for future in as_completed(futures):
        all_results.extend(future.result())

# Convert to DataFrame
pricedf = pd.DataFrame(all_results)
print(pricedf)


                                             name  \
0        Code Card - Destined Rivals Booster Pack   
1   Code Card - Prismatic Evolutions Booster Pack   
2            Code Card - White Flare Booster Pack   
3             Code Card - Black Bolt Booster Pack   
4                                           Hilda   
5                                     Air Balloon   
6                                         Pikachu   
7                                 Arven - 166/198   
8                                   Victini - 208   
9                                    Prism Energy   
10                                 Iono - 080/091   
11                                Night Stretcher   
12                                   Brave Bangle   
13                                Luminous Energy   
14                        Black Bolt Booster Pack   
15                             Buddy-Buddy Poffin   
16                                         Kyogre   
17                       White Flare Booster P

In [3]:
pricedf

Unnamed: 0,name,link,set,mktprice,listings
0,Code Card - Destined Rivals Booster Pack,,SV10: Destined Rivals,0.05,223.0
1,Code Card - Prismatic Evolutions Booster Pack,,SV: Prismatic Evolutions,0.04,222.0
2,Code Card - White Flare Booster Pack,,SV: White Flare,0.08,95.0
3,Code Card - Black Bolt Booster Pack,,SV: Black Bolt,0.05,97.0
4,Hilda,,SV: White Flare,3.86,163.0
5,Air Balloon,,SV: Black Bolt,1.58,263.0
6,Pikachu,,Celebrations,7.25,258.0
7,Arven - 166/198,,SV01: Scarlet & Violet Base Set,2.5,545.0
8,Victini - 208,,SV: Scarlet & Violet Promo Cards,7.35,186.0
9,Prism Energy,,SV: Black Bolt,0.36,586.0


In [4]:
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from selenium.common.exceptions import TimeoutException, NoSuchElementException, StaleElementReferenceException
from concurrent.futures import ThreadPoolExecutor, as_completed
import pandas as pd
import shutil
import tempfile
import os


def create_driver():
    options = webdriver.ChromeOptions()

    # Make the driver headless with the new recommended flag
    options.add_argument("--headless=new")

    # Use a temporary copy of Chrome user profile for signed-in session
    orig_profile = "/Users/nshaffer/Library/Application Support/Google/Chrome/Profile 5"
    temp_profile_dir = tempfile.mkdtemp()

    # Copy profile contents to temp location
    shutil.copytree(orig_profile, os.path.join(temp_profile_dir, "Profile 5"))

    options.add_argument(r"--user-data-dir=" + temp_profile_dir)
    options.add_argument(r"--profile-directory=Profile 5")

    # Disable loading images, stylesheets, and fonts to speed up scraping
    prefs = {
        "profile.default_content_setting_values": {
            "images": 2,
            "stylesheets": 2,
            "fonts": 2
        }
    }
    options.add_experimental_option("prefs", prefs)

    driver = webdriver.Chrome(options=options)
    return driver


def scrape_sales_history(product_link):
    driver = create_driver()
    wait = WebDriverWait(driver, 15)
    driver.get(product_link)

    def extract_table_rows():
        rows = driver.find_elements(By.CSS_SELECTOR,
            "section.latest-sales table.latest-sales-table tbody tr")
        data = []
        for row in rows:
            try:
                date = row.find_element(By.CLASS_NAME, "latest-sales-table__tbody__date").text.strip()
                cond_elem = row.find_element(By.CLASS_NAME, "latest-sales-table__tbody__condition")
                condition = cond_elem.text.strip()
                qty = row.find_element(By.CLASS_NAME, "latest-sales-table__tbody_quantity").text.strip()
                price = row.find_element(By.CLASS_NAME, "latest-sales-table__tbody__price").text.strip()
                data.append({
                    "date": date,
                    "condition": condition,
                    "qty": int(qty),
                    "price": price
                })
            except NoSuchElementException:
                continue
        return data

    sales_data = []

    try:
        view_more_btn = wait.until(EC.element_to_be_clickable(
            (By.CSS_SELECTOR, "div.modal__activator[role='button']")
        ))
        view_more_btn.click()

        wait.until(lambda d: len(d.find_elements(By.CSS_SELECTOR,
                  "section.latest-sales table.latest-sales-table tbody tr")) > 0)

        while True:
            try:
                load_more_btn = wait.until(EC.element_to_be_clickable(
                    (By.CSS_SELECTOR, "button.sales-history-snapshot__load-more__button")
                ))
                driver.execute_script("arguments[0].click();", load_more_btn)

                prev_count = len(driver.find_elements(By.CSS_SELECTOR,
                    "section.latest-sales table.latest-sales-table tbody tr"))
                wait.until(lambda d: len(d.find_elements(By.CSS_SELECTOR,
                    "section.latest-sales table.latest-sales-table tbody tr")) > prev_count)
            except TimeoutException:
                break
            except StaleElementReferenceException:
                continue

        sales_data = extract_table_rows()

    except TimeoutException:
        try:
            wait.until(EC.presence_of_all_elements_located(
                (By.CSS_SELECTOR, "section.latest-sales table.latest-sales-table tbody tr")
            ))
            sales_data = extract_table_rows()
        except TimeoutException:
            print(f"No sales data available for {product_link}")

    driver.quit()
    return pd.DataFrame(sales_data)


all_sales = {}
with ThreadPoolExecutor(max_workers=6) as executor:
    futures = {executor.submit(scrape_sales_history, link): link
               for link in pricedf['link'].dropna().head(5)}
    for future in as_completed(futures):
        link = futures[future]
        try:
            df = future.result()
            all_sales[link] = df
            print(f"Completed scraping for {link}")
        except Exception as e:
            print(f"Error scraping {link}: {e}")

all_sales_df = pd.concat([df.assign(link=link) for link, df in all_sales.items()], ignore_index=True)
print(all_sales_df.head())


ValueError: No objects to concatenate

In [None]:
print(all_sales)


{'https://www.tcgplayer.com/product/633169/pokemon-sv10-destined-rivals-code-card-destined-rivals-booster-pack?Condition=Near+Mint|Lightly+Played&Language=English&page=1':       date condition  qty  price
0  8/16/25        NM    6  $0.04
1  8/16/25        NM    7  $0.04
2  8/16/25        NM    2  $0.03
3  8/16/25        NM  100  $0.06
4  8/15/25        NM   99  $0.05}


In [None]:
/Users/nshaffer/Library/Application Support/Google/Chrome/Profile 5