In [1]:
## Can also get the price chart by pokemon number, or url for past prices. Look intro trading strategies; also, work on real-time offering feed vs history. 

In [2]:
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from concurrent.futures import ThreadPoolExecutor, as_completed
import pandas as pd
import re

# Customize number of pages to scrape
num_pages = 1
base_url = 'https://www.tcgplayer.com/search/pokemon/product?productLineName=pokemon&view=grid&Condition=Near+Mint|Lightly+Played&page={}&Language=English'

# Setup headless Chrome options
options = Options()
options.add_argument("--headless=new")
prefs = {"profile.default_content_setting_values": {"images": 2, "stylesheets": 2}}
options.add_experimental_option("prefs", prefs)

def scrape_page(page_num):
    driver = webdriver.Chrome(options=options)
    wait = WebDriverWait(driver, 10)
    url = base_url.format(page_num)
    driver.get(url)

    try:
        wait.until(EC.presence_of_all_elements_located((By.CLASS_NAME, "product-card__product")))
    except:
        driver.quit()
        return []

    products = driver.find_elements(By.CLASS_NAME, "product-card__product")
    results = []

    for product in products:
        try:
            # Grab the link directly from the <a> tag
            link_elem = product.find_element(By.XPATH, "./ancestor::a")
            product_link = link_elem.get_attribute("href")
        except Exception:
            product_link = None

        try:
            img_elem = product.find_element(By.TAG_NAME, "img")
            name = img_elem.get_attribute("alt").strip()
        except Exception:
            name = None

        try:
            set_name = product.find_element(By.CLASS_NAME, "product-card__set-name__variant").text.strip()
        except Exception:
            set_name = None

        try:
            mktprice_text = product.find_element(By.CLASS_NAME, "product-card__market-price--value").text.strip()
            mktprice_match = re.search(r"\$([\d,.]+)", mktprice_text)
            mktprice = float(mktprice_match.group(1).replace(",", "")) if mktprice_match else None
        except Exception:
            mktprice = None

        try:
            listings_span = product.find_element(By.CLASS_NAME, "inventory__listing-count").text.strip()
            listings_match = re.search(r"(\d+)\s+listings", listings_span)
            listings = int(listings_match.group(1)) if listings_match else None
        except Exception:
            listings = None

        results.append({
            "name": name,
            "link": product_link,
            "set": set_name,
            "mktprice": mktprice,
            "listings": listings
        })

    driver.quit()
    return results

# Run across multiple pages
all_results = []
with ThreadPoolExecutor(max_workers=6) as executor:
    futures = [executor.submit(scrape_page, p) for p in range(1, num_pages + 1)]
    for future in as_completed(futures):
        all_results.extend(future.result())

# Convert to DataFrame
pricedf = pd.DataFrame(all_results)
print(pricedf)


                                             name  \
0        Code Card - Destined Rivals Booster Pack   
1            Code Card - White Flare Booster Pack   
2   Code Card - Prismatic Evolutions Booster Pack   
3             Code Card - Black Bolt Booster Pack   
4                                           Hilda   
5                                 Arven - 166/198   
6                                     Air Balloon   
7                                         Pikachu   
8                                   Victini - 208   
9                                  Iono - 080/091   
10                                   Prism Energy   
11                                Night Stretcher   
12                                   Brave Bangle   
13                                Luminous Energy   
14                        Black Bolt Booster Pack   
15                             Buddy-Buddy Poffin   
16                                         Kyogre   
17                       White Flare Booster P

In [3]:
pricedf

Unnamed: 0,name,link,set,mktprice,listings
0,Code Card - Destined Rivals Booster Pack,https://www.tcgplayer.com/product/633169/pokem...,SV10: Destined Rivals,0.05,222.0
1,Code Card - White Flare Booster Pack,https://www.tcgplayer.com/product/646130/pokem...,SV: White Flare,0.08,95.0
2,Code Card - Prismatic Evolutions Booster Pack,https://www.tcgplayer.com/product/614046/pokem...,SV: Prismatic Evolutions,0.04,225.0
3,Code Card - Black Bolt Booster Pack,https://www.tcgplayer.com/product/646128/pokem...,SV: Black Bolt,0.05,99.0
4,Hilda,https://www.tcgplayer.com/product/642200/pokem...,SV: White Flare,3.77,155.0
5,Arven - 166/198,https://www.tcgplayer.com/product/488071/pokem...,SV01: Scarlet & Violet Base Set,2.35,542.0
6,Air Balloon,https://www.tcgplayer.com/product/642531/pokem...,SV: Black Bolt,1.45,253.0
7,Pikachu,https://www.tcgplayer.com/product/250303/pokem...,Celebrations,7.24,262.0
8,Victini - 208,https://www.tcgplayer.com/product/646169/pokem...,SV: Scarlet & Violet Promo Cards,6.8,165.0
9,Iono - 080/091,https://www.tcgplayer.com/product/534442/pokem...,SV: Paldean Fates,0.34,908.0


In [12]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException
import pandas as pd

driver = webdriver.Chrome()
driver.set_page_load_timeout(60)  # wait up to 60 seconds for full page load

first_link = pricedf['link'].iloc[0]

try:
    driver.get(first_link)
except TimeoutException:
    print("Page load timed out but continuing...")

# Wait for full page ready state
WebDriverWait(driver, 30).until(lambda d: d.execute_script('return document.readyState') == 'complete')

# Switch to iframe if exists
try:
    iframe = WebDriverWait(driver, 5).until(EC.presence_of_element_located((By.TAG_NAME, 'iframe')))
    driver.switch_to.frame(iframe)
except TimeoutException:
    pass

# Wait until the table body element is present
table_body = WebDriverWait(driver, 30).until(
    EC.presence_of_element_located((By.XPATH, "//div[contains(@class, 'martech-charts-history')]//table/tbody"))
)

rows = table_body.find_elements(By.TAG_NAME, "tr")

dates, prices, quantities = [], [], []

for row in rows:
    cols = row.find_elements(By.TAG_NAME, "td")
    if len(cols) >= 3:
        date_text = cols[0].text.strip()
        price_text = cols[11].text.strip().replace('$', '').replace(',', '')
        quantity_text = cols[12].text.strip().replace('$', '').replace(',', '')
        if price_text == '' or quantity_text == '':
            continue
        price_int = int(float(price_text) * 100)
        quantity_int = int(float(quantity_text))
        dates.append(date_text)
        prices.append(price_int)
        quantities.append(quantity_int)

subdataframe = pd.DataFrame({
    'Date': dates,
    'PriceCents': prices,
    'Quantity': quantities,
    'Link': first_link
})

print(subdataframe)

# driver.quit()


TimeoutException: Message: 
Stacktrace:
0   chromedriver                        0x0000000109f96ef8 chromedriver + 5918456
1   chromedriver                        0x0000000109f8e48a chromedriver + 5883018
2   chromedriver                        0x0000000109a5ae20 chromedriver + 429600
3   chromedriver                        0x0000000109aaced4 chromedriver + 765652
4   chromedriver                        0x0000000109aad0f1 chromedriver + 766193
5   chromedriver                        0x0000000109afcce4 chromedriver + 1092836
6   chromedriver                        0x0000000109ad2f3d chromedriver + 921405
7   chromedriver                        0x0000000109afa024 chromedriver + 1081380
8   chromedriver                        0x0000000109ad2ce3 chromedriver + 920803
9   chromedriver                        0x0000000109a9f29b chromedriver + 709275
10  chromedriver                        0x0000000109a9ff81 chromedriver + 712577
11  chromedriver                        0x0000000109f53c00 chromedriver + 5643264
12  chromedriver                        0x0000000109f57ab4 chromedriver + 5659316
13  chromedriver                        0x0000000109f2f472 chromedriver + 5493874
14  chromedriver                        0x0000000109f5855f chromedriver + 5662047
15  chromedriver                        0x0000000109f1e414 chromedriver + 5424148
16  chromedriver                        0x0000000109f7b778 chromedriver + 5805944
17  chromedriver                        0x0000000109f7b940 chromedriver + 5806400
18  chromedriver                        0x0000000109f8e061 chromedriver + 5881953
19  libsystem_pthread.dylib             0x00007ff80a45a18b _pthread_start + 99
20  libsystem_pthread.dylib             0x00007ff80a455ae3 thread_start + 15


In [5]:
"""from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from webdriver_manager.chrome import ChromeDriverManager
import pandas as pd

# Setup Chrome options for speed optimizations
options = webdriver.ChromeOptions()
options.add_argument("--headless")           # Run in headless mode
options.add_argument("--disable-gpu")        # Disable GPU hardware acceleration
options.add_argument("--no-sandbox")         # Bypass OS security model, needed in some environments
options.add_argument("--disable-extensions") # Disable extensions for faster load
options.add_argument("--disable-animations") # Disable animations if supported
options.add_argument("--disable-infobars")   # Disable info bars

# Initialize driver with options and WebDriver Manager
service = Service(ChromeDriverManager().install())
driver = webdriver.Chrome(service=service, options=options)

url = pricedf["link"].iloc[0]  # First link
driver.get(url)

wait = WebDriverWait(driver, 10)
# Use precise CSS selector for the table inside the dynamically loaded div
table = wait.until(EC.presence_of_element_located(
    (By.CSS_SELECTOR, "div.martek-charts-chart table")
))

rows = table.find_elements(By.CSS_SELECTOR, "tbody tr")  # Cache rows

data = []
for row in rows:
    cols = row.find_elements(By.TAG_NAME, "td")
    if len(cols) >= 3:
        date = cols.text.strip()
        price = cols[10].text.strip()
        quantity = cols[11].text.strip()
        data.append([date, price, quantity])

driver.quit()

df_history = pd.DataFrame(data, columns=["Date", "Price", "Quantity"])
print(df_history.head())
"""

'from selenium import webdriver\nfrom selenium.webdriver.chrome.service import Service\nfrom selenium.webdriver.common.by import By\nfrom selenium.webdriver.support.ui import WebDriverWait\nfrom selenium.webdriver.support import expected_conditions as EC\nfrom webdriver_manager.chrome import ChromeDriverManager\nimport pandas as pd\n\n# Setup Chrome options for speed optimizations\noptions = webdriver.ChromeOptions()\noptions.add_argument("--headless")           # Run in headless mode\noptions.add_argument("--disable-gpu")        # Disable GPU hardware acceleration\noptions.add_argument("--no-sandbox")         # Bypass OS security model, needed in some environments\noptions.add_argument("--disable-extensions") # Disable extensions for faster load\noptions.add_argument("--disable-animations") # Disable animations if supported\noptions.add_argument("--disable-infobars")   # Disable info bars\n\n# Initialize driver with options and WebDriver Manager\nservice = Service(ChromeDriverManager(

In [6]:
print(df_history)
# try scraping single head

NameError: name 'df_history' is not defined

In [None]:
/Users/nshaffer/Library/Application Support/Google/Chrome/Profile 5