In [1]:
## Can also get the price chart by pokemon number, or url for past prices. Look intro trading strategies; also, work on real-time offering feed vs history. 

In [2]:
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from concurrent.futures import ThreadPoolExecutor, as_completed
import pandas as pd
import re

# Customize number of pages to scrape
num_pages = 1
base_url = 'https://www.tcgplayer.com/search/pokemon/product?productLineName=pokemon&view=grid&Condition=Near+Mint|Lightly+Played&page={}&Language=English'

# Setup headless Chrome options
options = Options()
options.add_argument("--headless=new")
prefs = {"profile.default_content_setting_values": {"images": 2, "stylesheets": 2}}
options.add_experimental_option("prefs", prefs)

def scrape_page(page_num):
    driver = webdriver.Chrome(options=options)
    wait = WebDriverWait(driver, 10)
    url = base_url.format(page_num)
    driver.get(url)

    try:
        wait.until(EC.presence_of_all_elements_located((By.CLASS_NAME, "product-card__product")))
    except:
        driver.quit()
        return []

    products = driver.find_elements(By.CLASS_NAME, "product-card__product")
    results = []

    for product in products:
        try:
            # Grab the link directly from the <a> tag
            link_elem = product.find_element(By.XPATH, "./ancestor::a")
            product_link = link_elem.get_attribute("href")
        except Exception:
            product_link = None

        try:
            img_elem = product.find_element(By.TAG_NAME, "img")
            name = img_elem.get_attribute("alt").strip()
        except Exception:
            name = None

        try:
            set_name = product.find_element(By.CLASS_NAME, "product-card__set-name__variant").text.strip()
        except Exception:
            set_name = None

        try:
            mktprice_text = product.find_element(By.CLASS_NAME, "product-card__market-price--value").text.strip()
            mktprice_match = re.search(r"\$([\d,.]+)", mktprice_text)
            mktprice = float(mktprice_match.group(1).replace(",", "")) if mktprice_match else None
        except Exception:
            mktprice = None

        try:
            listings_span = product.find_element(By.CLASS_NAME, "inventory__listing-count").text.strip()
            listings_match = re.search(r"(\d+)\s+listings", listings_span)
            listings = int(listings_match.group(1)) if listings_match else None
        except Exception:
            listings = None

        results.append({
            "name": name,
            "link": product_link,
            "set": set_name,
            "mktprice": mktprice,
            "listings": listings
        })

    driver.quit()
    return results

# Run across multiple pages
all_results = []
with ThreadPoolExecutor(max_workers=6) as executor:
    futures = [executor.submit(scrape_page, p) for p in range(1, num_pages + 1)]
    for future in as_completed(futures):
        all_results.extend(future.result())

# Convert to DataFrame
pricedf = pd.DataFrame(all_results)
print(pricedf)


                                             name  \
0        Code Card - Destined Rivals Booster Pack   
1   Code Card - Prismatic Evolutions Booster Pack   
2                                   Victini - 208   
3            Code Card - White Flare Booster Pack   
4             Code Card - Black Bolt Booster Pack   
5                                         Pikachu   
6                                           Hilda   
7                                 Arven - 166/198   
8                                     Air Balloon   
9                                  Iono - 080/091   
10                                Night Stretcher   
11                        Black Bolt Booster Pack   
12                                   Prism Energy   
13                             Buddy-Buddy Poffin   
14                                   Brave Bangle   
15                       White Flare Booster Pack   
16                                 Iono - 185/193   
17                                Luminous Ene

In [3]:
pricedf

Unnamed: 0,name,link,set,mktprice,listings
0,Code Card - Destined Rivals Booster Pack,https://www.tcgplayer.com/product/633169/pokem...,SV10: Destined Rivals,0.05,227.0
1,Code Card - Prismatic Evolutions Booster Pack,https://www.tcgplayer.com/product/614046/pokem...,SV: Prismatic Evolutions,0.04,230.0
2,Victini - 208,https://www.tcgplayer.com/product/646169/pokem...,SV: Scarlet & Violet Promo Cards,7.25,164.0
3,Code Card - White Flare Booster Pack,https://www.tcgplayer.com/product/646130/pokem...,SV: White Flare,0.08,94.0
4,Code Card - Black Bolt Booster Pack,https://www.tcgplayer.com/product/646128/pokem...,SV: Black Bolt,0.06,97.0
5,Pikachu,https://www.tcgplayer.com/product/250303/pokem...,Celebrations,6.94,246.0
6,Hilda,https://www.tcgplayer.com/product/642200/pokem...,SV: White Flare,4.01,107.0
7,Arven - 166/198,https://www.tcgplayer.com/product/488071/pokem...,SV01: Scarlet & Violet Base Set,2.36,549.0
8,Air Balloon,https://www.tcgplayer.com/product/642531/pokem...,SV: Black Bolt,1.59,245.0
9,Iono - 080/091,https://www.tcgplayer.com/product/534442/pokem...,SV: Paldean Fates,0.3,911.0


In [7]:
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
import pandas as pd

options = Options()
# Keep browser visible but optimize resource use
options.add_argument("--disable-gpu")
options.add_argument("--no-sandbox")
options.add_argument("--disable-dev-shm-usage")
options.add_argument("--window-size=1280,800")

prefs = {"profile.default_content_setting_values": {"images": 2, "stylesheets": 2}}
options.add_experimental_option("prefs", prefs)

def scrape_product_page(url):
    driver = webdriver.Chrome(options=options)
    wait = WebDriverWait(driver, 30)  # reduced wait time for efficiency
    driver.get(url)
    try:
        table = wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, "table[role='region'][aria-live='polite']")))
        html_content = table.get_attribute("outerHTML")
    except Exception as e:
        print("Table not found or failed to load:", e)
        html_content = None
    driver.quit()
    return html_content

if 'pricedf' in globals() and not pricedf.empty and pd.notna(pricedf.loc[0, "link"]):
    first_link = pricedf.loc[0, "link"]
    html = scrape_product_page(first_link)
    if html:
        print(html)
    else:
        print("Failed to scrape table HTML content.")
else:
    print("No valid link found in pricedf.")


<table data-v-8daf4e1f="" role="region" aria-live="polite"><thead data-v-8daf4e1f=""><tr data-v-8daf4e1f=""><th data-v-8daf4e1f="">Date</th><th data-v-8daf4e1f="">Normal</th></tr></thead><tbody data-v-8daf4e1f=""><tr data-v-8daf4e1f=""><td data-v-8daf4e1f="">5/21 to 5/23</td><td data-v-8daf4e1f="">$0.00</td><td data-v-8daf4e1f="">$0.00</td></tr><tr data-v-8daf4e1f=""><td data-v-8daf4e1f="">5/24 to 5/26</td><td data-v-8daf4e1f="">$0.00</td><td data-v-8daf4e1f="">$67.00</td></tr><tr data-v-8daf4e1f=""><td data-v-8daf4e1f="">5/27 to 5/29</td><td data-v-8daf4e1f="">$0.10</td><td data-v-8daf4e1f="">$7,564.00</td></tr><tr data-v-8daf4e1f=""><td data-v-8daf4e1f="">5/30 to 6/1</td><td data-v-8daf4e1f="">$0.10</td><td data-v-8daf4e1f="">$3,532.00</td></tr><tr data-v-8daf4e1f=""><td data-v-8daf4e1f="">6/2 to 6/4</td><td data-v-8daf4e1f="">$0.09</td><td data-v-8daf4e1f="">$2,881.00</td></tr><tr data-v-8daf4e1f=""><td data-v-8daf4e1f="">6/5 to 6/7</td><td data-v-8daf4e1f="">$0.09</td><td data-v-8d

In [8]:
from bs4 import BeautifulSoup
# Parse the HTML content with BeautifulSoup
soup = BeautifulSoup(html, "html.parser")

# Locate the table containing historical price data
price_table = soup.find("table", {"role": "region", "aria-live": "polite"})

# Extract rows from the table
rows = price_table.find_all("tr")[1:]  # Skip header row

# Parse each row into structured data
price_history = []
for row in rows:
    cells = row.find_all("td")
    if len(cells) >= 3:
        date_range = cells[0].get_text(strip=True)
        normal_price = cells[1].get_text(strip=True)
        alt_price = cells[2].get_text(strip=True)
        price_history.append({
            "Date Range": date_range,
            "Normal Price": normal_price,
            "Volume": alt_price 
        })

# Convert to DataFrame for analysis or export
import pandas as pd
history_df = pd.DataFrame(price_history)

# Display the result
print(history_df)


      Date Range Normal Price     Volume
0   5/21 to 5/23        $0.00      $0.00
1   5/24 to 5/26        $0.00     $67.00
2   5/27 to 5/29        $0.10  $7,564.00
3    5/30 to 6/1        $0.10  $3,532.00
4     6/2 to 6/4        $0.09  $2,881.00
5     6/5 to 6/7        $0.09  $1,108.00
6    6/8 to 6/10        $0.07    $946.00
7   6/11 to 6/13        $0.07  $1,034.00
8   6/14 to 6/16        $0.07  $2,896.00
9   6/17 to 6/19        $0.06  $2,095.00
10  6/20 to 6/22        $0.05  $1,428.00
11  6/23 to 6/25        $0.04  $1,299.00
12  6/26 to 6/28        $0.06  $1,455.00
13   6/29 to 7/1        $0.06    $756.00
14    7/2 to 7/4        $0.07  $1,344.00
15    7/5 to 7/7        $0.07  $1,558.00
16   7/8 to 7/10        $0.07  $1,156.00
17  7/11 to 7/13        $0.06    $773.00
18  7/14 to 7/16        $0.05  $1,209.00
19  7/17 to 7/19        $0.06  $1,444.00
20  7/20 to 7/22        $0.05  $1,013.00
21  7/23 to 7/25        $0.04  $1,395.00
22  7/26 to 7/28        $0.05    $624.00
23  7/29 to 7/31