In [1]:
## Can also get the price chart by pokemon number, or url for past prices. Look intro trading strategies; also, work on real-time offering feed vs history. 

In [2]:
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from concurrent.futures import ThreadPoolExecutor, as_completed
import pandas as pd
import re

# Customize number of pages to scrape
num_pages = 1
base_url = 'https://www.tcgplayer.com/search/pokemon/product?productLineName=pokemon&view=grid&Condition=Near+Mint|Lightly+Played&page={}&Language=English'

# Setup headless Chrome options
options = Options()
options.add_argument("--headless=new")
prefs = {"profile.default_content_setting_values": {"images": 2, "stylesheets": 2}}
options.add_experimental_option("prefs", prefs)

def scrape_page(page_num):
    driver = webdriver.Chrome(options=options)
    wait = WebDriverWait(driver, 10)
    url = base_url.format(page_num)
    driver.get(url)

    try:
        wait.until(EC.presence_of_all_elements_located((By.CLASS_NAME, "product-card__product")))
    except:
        driver.quit()
        return []

    products = driver.find_elements(By.CLASS_NAME, "product-card__product")
    results = []

    for product in products:
        try:
            # Grab the link directly from the <a> tag
            link_elem = product.find_element(By.XPATH, "./ancestor::a")
            product_link = link_elem.get_attribute("href")
        except Exception:
            product_link = None

        try:
            img_elem = product.find_element(By.TAG_NAME, "img")
            name = img_elem.get_attribute("alt").strip()
        except Exception:
            name = None

        try:
            set_name = product.find_element(By.CLASS_NAME, "product-card__set-name__variant").text.strip()
        except Exception:
            set_name = None

        try:
            mktprice_text = product.find_element(By.CLASS_NAME, "product-card__market-price--value").text.strip()
            mktprice_match = re.search(r"\$([\d,.]+)", mktprice_text)
            mktprice = float(mktprice_match.group(1).replace(",", "")) if mktprice_match else None
        except Exception:
            mktprice = None

        try:
            listings_span = product.find_element(By.CLASS_NAME, "inventory__listing-count").text.strip()
            listings_match = re.search(r"(\d+)\s+listings", listings_span)
            listings = int(listings_match.group(1)) if listings_match else None
        except Exception:
            listings = None

        results.append({
            "name": name,
            "link": product_link,
            "set": set_name,
            "mktprice": mktprice,
            "listings": listings
        })

    driver.quit()
    return results

# Run across multiple pages
all_results = []
with ThreadPoolExecutor(max_workers=6) as executor:
    futures = [executor.submit(scrape_page, p) for p in range(1, num_pages + 1)]
    for future in as_completed(futures):
        all_results.extend(future.result())

# Convert to DataFrame
pricedf = pd.DataFrame(all_results)
print(pricedf)


                                             name  \
0        Code Card - Destined Rivals Booster Pack   
1            Code Card - White Flare Booster Pack   
2             Code Card - Black Bolt Booster Pack   
3   Code Card - Prismatic Evolutions Booster Pack   
4                                           Hilda   
5                                   Victini - 208   
6                                         Pikachu   
7                                 Arven - 166/198   
8                                     Air Balloon   
9                                  Iono - 080/091   
10                                   Prism Energy   
11                                Night Stretcher   
12                                Luminous Energy   
13                                   Brave Bangle   
14                        Black Bolt Booster Pack   
15                             Buddy-Buddy Poffin   
16                                         Kyogre   
17                       White Flare Booster P

In [3]:
pricedf

Unnamed: 0,name,link,set,mktprice,listings
0,Code Card - Destined Rivals Booster Pack,https://www.tcgplayer.com/product/633169/pokem...,SV10: Destined Rivals,0.05,223.0
1,Code Card - White Flare Booster Pack,https://www.tcgplayer.com/product/646130/pokem...,SV: White Flare,0.08,97.0
2,Code Card - Black Bolt Booster Pack,https://www.tcgplayer.com/product/646128/pokem...,SV: Black Bolt,0.05,99.0
3,Code Card - Prismatic Evolutions Booster Pack,https://www.tcgplayer.com/product/614046/pokem...,SV: Prismatic Evolutions,0.04,227.0
4,Hilda,https://www.tcgplayer.com/product/642200/pokem...,SV: White Flare,3.5,156.0
5,Victini - 208,https://www.tcgplayer.com/product/646169/pokem...,SV: Scarlet & Violet Promo Cards,6.99,168.0
6,Pikachu,https://www.tcgplayer.com/product/250303/pokem...,Celebrations,7.24,265.0
7,Arven - 166/198,https://www.tcgplayer.com/product/488071/pokem...,SV01: Scarlet & Violet Base Set,2.37,549.0
8,Air Balloon,https://www.tcgplayer.com/product/642531/pokem...,SV: Black Bolt,1.52,257.0
9,Iono - 080/091,https://www.tcgplayer.com/product/534442/pokem...,SV: Paldean Fates,0.35,912.0


In [4]:
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
import pandas as pd  # Ensure pandas is imported if you're using pricedf


# Setup headless Chrome options
options = Options()
#options.add_argument("--headless=new")
#prefs = {
#   "profile.default_content_setting_values": {
 #       "images": 2,
  #      "stylesheets": 2
   # }
#}
options.add_experimental_option("prefs", prefs)


def scrape_product_page(url):
    driver = webdriver.Chrome(options=options)
    wait = WebDriverWait(driver, 15)

    driver.get(url)

    try:
        # Wait for a specific product title inside #app
        wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, "#app .product-title")))
    except:
        try:
            # Fallback: wait for #app container
            wait.until(EC.presence_of_element_located((By.ID, "app")))
        except:
            print("Failed to load #app container.")
            driver.quit()
            return None

    # Refresh page to ensure content updates
    driver.refresh()

    try:
        # Wait for the 3M button to be clickable and click it
        button_3m = wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, "button.active.charts-item")))
        button_3m.click()
        # Optionally wait again for updated table rows after clicking the button
        wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, "table[role='region'] tbody tr")))
    except Exception as e:
        print("Failed to click 3M button or wait for update:", e)

    # Extract the HTML content inside the #app container
    try:
        app_container = driver.find_element(By.ID, "app")
        html_content = app_container.get_attribute("innerHTML")
    except:
        html_content = None
        print("Failed to extract innerHTML from #app.")

    driver.quit()
    return html_content


# Example usage with the first link from your DataFrame
if 'pricedf' in globals() and not pricedf.empty and pd.notna(pricedf.loc[0, "link"]):
    first_link = pricedf.loc[0, "link"]
    html = scrape_product_page(first_link)
    if html:
        print(html)
    else:
        print("Failed to scrape HTML content.")
else:
    print("No valid link found in pricedf.")


<div class=""><div class="marketplace"><span data-v-37071977=""></span><!----><div class="site-alerts"><!----></div><!----><header data-v-94cc0f67="" class="mp-header mp-header mp-header-reduced-padding" logo-url="/"><div data-v-94cc0f67="" class="mp-header__content"><div data-v-53b7ac64="" data-v-94cc0f67="" class="marketplace-nav-mobile dropdown"><div data-v-53b7ac64="" class="dropdown-trigger"><button data-v-53b7ac64="" class="tcg-button tcg-button--md tcg-standard-button tcg-standard-button--flat button" aria-haspopup="true" aria-controls="mp-nav-mobile-drop" aria-label="open mobile navigation menu"><!----><span class="tcg-standard-button__content"><span data-v-53b7ac64="" class="tcg-icon" aria-hidden="true" style="font-size: inherit;"><svg aria-hidden="true" focusable="false" data-prefix="far" data-icon="bars" class="svg-inline--fa fa-bars" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><path fill="currentColor" d="M0 88C0 74.75 10.75 64 24 64H424C437.3 64 448

In [5]:
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
import pandas as pd

options = Options()
# Keep browser visible but optimize resource use
options.add_argument("--disable-gpu")
options.add_argument("--no-sandbox")
options.add_argument("--disable-dev-shm-usage")
options.add_argument("--window-size=1280,800")

prefs = {"profile.default_content_setting_values": {"images": 2, "stylesheets": 2}}
options.add_experimental_option("prefs", prefs)

def scrape_product_page(url):
    driver = webdriver.Chrome(options=options)
    wait = WebDriverWait(driver, 30)  # reduced wait time for efficiency
    driver.get(url)
    try:
        table = wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, "table[role='region'][aria-live='polite']")))
        html_content = table.get_attribute("outerHTML")
    except Exception as e:
        print("Table not found or failed to load:", e)
        html_content = None
    driver.quit()
    return html_content

if 'pricedf' in globals() and not pricedf.empty and pd.notna(pricedf.loc[0, "link"]):
    first_link = pricedf.loc[0, "link"]
    html = scrape_product_page(first_link)
    if html:
        print(html)
    else:
        print("Failed to scrape table HTML content.")
else:
    print("No valid link found in pricedf.")


<table data-v-8daf4e1f="" role="region" aria-live="polite"><thead data-v-8daf4e1f=""><tr data-v-8daf4e1f=""><th data-v-8daf4e1f="">Date</th><th data-v-8daf4e1f="">Normal</th></tr></thead><tbody data-v-8daf4e1f=""><tr data-v-8daf4e1f=""><td data-v-8daf4e1f="">5/17 to 5/19</td><td data-v-8daf4e1f="">$0.00</td><td data-v-8daf4e1f="">$0.00</td></tr><tr data-v-8daf4e1f=""><td data-v-8daf4e1f="">5/20 to 5/22</td><td data-v-8daf4e1f="">$0.00</td><td data-v-8daf4e1f="">$0.00</td></tr><tr data-v-8daf4e1f=""><td data-v-8daf4e1f="">5/23 to 5/25</td><td data-v-8daf4e1f="">$0.00</td><td data-v-8daf4e1f="">$67.00</td></tr><tr data-v-8daf4e1f=""><td data-v-8daf4e1f="">5/26 to 5/28</td><td data-v-8daf4e1f="">$0.10</td><td data-v-8daf4e1f="">$5,246.00</td></tr><tr data-v-8daf4e1f=""><td data-v-8daf4e1f="">5/29 to 5/31</td><td data-v-8daf4e1f="">$0.10</td><td data-v-8daf4e1f="">$4,695.00</td></tr><tr data-v-8daf4e1f=""><td data-v-8daf4e1f="">6/1 to 6/3</td><td data-v-8daf4e1f="">$0.10</td><td data-v-8da

In [15]:
from bs4 import BeautifulSoup
# Parse the HTML content with BeautifulSoup
soup = BeautifulSoup(html, "html.parser")

# Locate the table containing historical price data
price_table = soup.find("table", {"role": "region", "aria-live": "polite"})

# Extract rows from the table
rows = price_table.find_all("tr")[1:]  # Skip header row

# Parse each row into structured data
price_history = []
for row in rows:
    cells = row.find_all("td")
    if len(cells) >= 3:
        date_range = cells[0].get_text(strip=True)
        normal_price = cells[1].get_text(strip=True)
        alt_price = cells[2].get_text(strip=True)
        price_history.append({
            "Date Range": date_range,
            "Normal Price": normal_price,
            "Volume": alt_price 
        })

# Convert to DataFrame for analysis or export
import pandas as pd
history_df = pd.DataFrame(price_history)

# Display the result
print(history_df)


      Date Range Normal Price     Volume
0   5/17 to 5/19        $0.00      $0.00
1   5/20 to 5/22        $0.00      $0.00
2   5/23 to 5/25        $0.00     $67.00
3   5/26 to 5/28        $0.10  $5,246.00
4   5/29 to 5/31        $0.10  $4,695.00
5     6/1 to 6/3        $0.10  $3,681.00
6     6/4 to 6/6        $0.09  $1,361.00
7     6/7 to 6/9        $0.08    $813.00
8   6/10 to 6/12        $0.07    $750.00
9   6/13 to 6/15        $0.07  $3,208.00
10  6/16 to 6/18        $0.06  $1,818.00
11  6/19 to 6/21        $0.06  $1,209.00
12  6/22 to 6/24        $0.05  $1,459.00
13  6/25 to 6/27        $0.05  $1,471.00
14  6/28 to 6/30        $0.06  $1,149.00
15    7/1 to 7/3        $0.07  $1,018.00
16    7/4 to 7/6        $0.07    $835.00
17    7/7 to 7/9        $0.07  $1,714.00
18  7/10 to 7/12        $0.07    $879.00
19  7/13 to 7/15        $0.05  $1,323.00
20  7/16 to 7/18        $0.06    $923.00
21  7/19 to 7/21        $0.05  $1,776.00
22  7/22 to 7/24        $0.04  $1,363.00
23  7/25 to 7/27