In [None]:
import time
import random
import json
import os
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.chrome.service import Service as ChromeService
from selenium.webdriver.chrome.options import Options
from webdriver_manager.chrome import ChromeDriverManager

In [None]:
tcg = 'Digimon'

In [None]:
cardlist_file_path = f"{os.getcwd()}/data/{tcg}/cardlist.json"
finished_expansions_file_path = f"{os.getcwd()}/data/{tcg}/cardlist-finished-expansions.json"

In [None]:
try:
    with open(cardlist_file_path, "r") as file:
        data = json.load(file)
except Exception as e:
    data = {'Singles': {}}

In [None]:
def save_data(data):
    sorted_data = {
        category: {
            exp_name: {
                card_name: card_data
                for card_name, card_data in sorted(exp_data.items())
            }
            for exp_name, exp_data in sorted(category_data.items())
        }
        for category, category_data in sorted(data.items())
    }
    os.makedirs(os.path.dirname(cardlist_file_path), exist_ok=True)
    with open(cardlist_file_path, "w") as file:
        json.dump(sorted_data, file, indent=4)

In [None]:
URL = f"https://www.cardmarket.com/en/{tcg}/Products"
URL_expansions = f"https://www.cardmarket.com/en/{tcg}/Expansions"
headers = {
    'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/127.0.0.0 Safari/537.36'
}

In [None]:
def random_delay(min_delay=3, max_delay=10):
    time.sleep(random.uniform(min_delay, max_delay))

In [None]:
def setup_driver():
    options = Options()
    # options.add_argument('--headless')
    options.add_argument('--disable-gpu')
    options.add_argument('--no-sandbox')
    options.add_argument('--disable-blink-features=AutomationControlled')
    driver = webdriver.Chrome(service=ChromeService(ChromeDriverManager().install()), options=options)
    driver.set_window_size(random.randint(800, 1200), random.randint(600, 800))
    actions = ActionChains(driver)
    print("Driver set")
    
    # Accepting Cookies
    driver.get(URL)
    cookie_button = """//*[@id="CookiesConsent"]/div/div/form/div/button"""
    cookie_btn = driver.find_element(By.XPATH, cookie_button)
    random_delay(4, 6)
    try:
        cookie_btn.click()
        print("Cookie Accepted")
        random_delay(2, 3)
    except Exception as e:
        print(f'Could not click on cookie button {URL}: {e}')
    return driver, actions

In [None]:
driver, actions = setup_driver()

In [None]:
last_n_years = 5

In [None]:
def get_expansions(expansions):
    global driver, actions
    url = f"{URL_expansions}"
    driver.get(url)
    random_delay(2, 3)
    years = driver.find_element(By.ID, "ExpansionList").find_elements(By.CLASS_NAME, "expansion-group")
    counter = 0
    for year in years:
        if counter == last_n_years:
            break
        y = int(year.find_element(By.TAG_NAME, 'h2').text.split('\n')[0])
        print(f"Getting expansions for year {y}")
        random_delay(2, 6)
        driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
        expansion_divs = year.find_elements(By.CLASS_NAME, "expansion-row")
        for div in expansion_divs:
            expansion = div.get_attribute("data-url").split('/')[-1]
            if expansion not in expansions:
                expansions.append(expansion)
        counter += 1

In [None]:
expansions = []
get_expansions(expansions)

In [None]:
len(expansions)

In [None]:
for expansion in expansions:
    if expansion not in data['Singles']:
        data['Singles'][expansion] = {}
        print(expansion)

In [None]:
save_data(data)

In [None]:
def get_cards(expansion, cards):
    url = f"{URL}/Singles/{expansion}"
    driver.get(url)
    random_delay(2, 3)
    try:
        no_res = driver.find_element(By.CLASS_NAME, "noResults").text
        if no_res != '':
            print(f"Could not get {expansion}. No Results")
            return 0
    except Exception as e:
        pass
    pages = int(driver.find_element(By.ID, "pagination").text.split(' ')[-1].strip('+'))
    for page in range(1, pages+1):
        page_url = f"{URL}/Singles/{expansion}?idRarity=0&site={page}"
        random_delay(2, 6)
        driver.get(page_url)
        random_delay(1, 3)
        # actions.move_by_offset(random.randint(0, 100), random.randint(0, 100)).perform()
        driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
        random_delay(2, 6)

        card_names = []
        cols = driver.find_element(By.CLASS_NAME, "table-body").find_elements(By.CLASS_NAME, "col-10")
        for col in cols:
            card_name = col.find_element(By.TAG_NAME, 'a').get_attribute("href").split('/')[-1]
            card_names.append(card_name)

        card_prices = []
        cols_price = driver.find_element(By.CLASS_NAME, "table-body").find_elements(By.CLASS_NAME, "col-price")
        for col_price in cols_price:
            try:
                card_price = float(col_price.text.split(' ')[0].replace(',', '.'))
            except Exception as e:
                print(f"Could not get price for {card_names[cols_price.index(col_price)]}. error: {e}")
                card_price = 0.0
            card_prices.append(card_price)
        for i in range(len(card_names)):
            card = {card_names[i]: card_prices[i]} if i < len(card_prices) else {card_names[i]: 0.0}
            cards.append(card)

In [None]:
try:
    with open(finished_expansions_file_path, "r") as file:
        finished_expansions = json.load(file)
except Exception as e:
    finished_expansions = []

In [None]:
def save_finished_expansions(finished_expansions):
    os.makedirs(os.path.dirname(finished_expansions_file_path), exist_ok=True)
    with open(finished_expansions_file_path, "w") as file:
        json.dump(sorted(finished_expansions), file, indent=4)

In [None]:
cards = []
selected_expansions = list(data['Singles'].keys())
selected_expansions = [e for e in selected_expansions if e not in finished_expansions]
print(f'Left {len(selected_expansions)} expansions')
for expansion in selected_expansions:
    print(f"Getting cards for {expansion}")
    get_cards(expansion, cards)
    for card in cards:
        for card_name, card_price in card.items():
            if card_name not in data['Singles'][expansion]:
                card = {card_name: {
                    "quantity": 0,
                    "price_from": card_price,
                    "price_trend": 0.0,
                    "price_30_day_avg": 0.0
                    }}
                data['Singles'][expansion].update(card)
    finished_expansions.append(expansion)
    save_finished_expansions(finished_expansions)
    save_data(data)
    cards = []
    driver.quit()
    driver, actions = setup_driver()

In [None]:
driver.quit()