In [1]:
from selenium import webdriver
from selenium.webdriver.firefox.options import Options
from selenium.webdriver.common.by import By
from selenium.common.exceptions import TimeoutException
from selenium.webdriver.support import expected_conditions as ec
from selenium.webdriver.support.ui import WebDriverWait

url = "https://www.tradeupspy.com/skins"
options = Options()
options.add_argument('--headless')

# Configure o Selenium para usar o Firefox
browser = webdriver.Firefox(options=options)
try:
    browser.get(url)
    # Usamos o XPath para encontrar o elemento desejado
    div_xpath = "/html/body/app-root/html/body/app-skins/div[4]/div[2]/div[7]"
    #target_div = browser.find_element(By.XPATH, div_xpath)
    target_div = WebDriverWait(browser, 10).until(
        ec.presence_of_element_located((By.XPATH, div_xpath))
    )

    div_html = target_div.get_attribute('outerHTML')
except TimeoutException:
    print("I give up...")
finally:
    browser.quit()


In [2]:
website_prefix = "https://www.tradeupspy.com"
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as ec

def parse_collection(collection_name, collection_url):
    url = website_prefix + collection_url
    print(f"Processing: {url}")
    browser = webdriver.Firefox(options=options)
    try:    
        browser.get(url)

        xpath_to_wait = "/html/body/app-root/html/body/app-skins/div[7]/div[2]"
        stattrak_xpath = "/html/body/app-root/html/body/app-skins/div[6]/div[2]/div[2]/div/p"

        # We wait for the first skin card to load. This way, hopefully all skin cards have loaded
        WebDriverWait(browser, 10).until(
            ec.presence_of_element_located((By.XPATH, xpath_to_wait))
        )

        # We wait for the 'stattrak available' element to load
        stattrak_available_p = WebDriverWait(browser, 10).until(
            ec.presence_of_element_located((By.XPATH, stattrak_xpath))
        )
        stattrak_text = stattrak_available_p.text
        stattrak_available = False

        # Check if the text is "StatTrak™ unavailable" or "StatTrak™ available"
        if stattrak_text == "StatTrak™ available":
            print("StatTrak is available.")
            stattrak_available = True

        all_skin_cards_div_xpath = "/html/body/app-root/html/body/app-skins/div[7]/div[2]"
        all_skin_cards_div = browser.find_element(By.XPATH, all_skin_cards_div_xpath)
        div_html = all_skin_cards_div.get_attribute('outerHTML')
        divSoup = BeautifulSoup(div_html,'html.parser')

        # iterate over all skin cards
        for skin_card_div in divSoup.find_all('div', class_="skin_card_collection"):
            skin_card_wrapper = skin_card_div.find('div', class_="skin_card_wrapper")

            # get skin quality. For example: classified_bg, industrial_bg, etc...
            skin_quality = skin_card_wrapper['class'][1]
            if skin_quality == 'knife_bg':
                # current skin is a knife, so we skip (because knives cant be in tradeups)
                continue

            # get weapon and skin name
            skin_card_div_name = skin_card_wrapper.find('div', class_="skin_card_name_container")
            skin_card_div_name_a = skin_card_div_name.find('a', class_="skin_card_collection_name")
            # Get the text before and after the <br> tag.
            weapon_name = skin_card_div_name_a.find(text=True, recursive=False)  # Text before the <br> tag.
            skin_name = skin_card_div_name_a.br.nextSibling  # Text after the <br> tag.

            # get url to skin web page
            skin_card_div_image = skin_card_wrapper.find('div', class_="skin_card_image_container")
            skin_card_div_image_a = skin_card_div_image.find('a', class_="skin_card_collection_image_url")
            url = skin_card_div_image_a['href']

            # Combine the parts with a separator.
            full_skin_name = f"{weapon_name.strip()} | {skin_name.strip()}"
            parse_skin(collection_name, full_skin_name, skin_quality, url, stattrak_available)
    except TimeoutException:
        print("I give up...")
    finally:
        browser.quit()



In [3]:
from selenium.common.exceptions import NoSuchElementException
from selenium.common.exceptions import StaleElementReferenceException
from entities import Skin
from db import create_skin

website_prefix = "https://www.tradeupspy.com"

def wait_for_defined_text(driver, xpath, timeout=10):
    def _predicate(drv):
        element = drv.find_element(By.XPATH, xpath)
        text = element.text.strip()
        return text if text.lower() != 'undefined' else False
    ignored_exceptions = (NoSuchElementException,StaleElementReferenceException,)
    return WebDriverWait(driver, timeout, ignored_exceptions=ignored_exceptions).until(_predicate)

def parse_skin(collection_name, skin_name, quality, url, stattrak_available):
    """
    collection_name -> example: "2018 Inferno"
    skin_name -> example : "M4A4 | Converter"
    quality -> example : "classified_bg"
    url -> internal url of the skin
    """
    url = website_prefix + url
    print(f"Processing skin: {url}")
    browser = webdriver.Firefox(options=options)
    try:    
        browser.get(url)

        min_float_xpath = "/html/body/app-root/html/body/app-skins/div[6]/div[2]/div[2]/div[1]/div/div/p[1]/b"
        max_float_xpath = "/html/body/app-root/html/body/app-skins/div[6]/div[2]/div[2]/div[1]/div/div/p[2]/b"
        
        # Wait until the text of the element is not 'undefined'
        min_float_text = wait_for_defined_text(browser, min_float_xpath)
        max_float_text = wait_for_defined_text(browser, max_float_xpath)
    
        # Now that we have the texts and they are not 'undefined', convert them to float
        min_float = float(min_float_text)
        max_float = float(max_float_text)

        print(f"{collection_name}, {skin_name}, {quality}, {min_float}, {max_float}")
        skin_non_stattrak = Skin(skin_name, min_float, max_float, stattrak=False, collection_name=collection_name, quality=quality)
        create_skin(skin_non_stattrak)
        if stattrak_available:
            skin_stattrak = Skin(skin_name, min_float, max_float, stattrak=True, collection_name=collection_name, quality=quality)
            create_skin(skin_stattrak)
    except TimeoutException:
        print("I give up...")
    finally:
        browser.quit()

In [None]:
from bs4 import BeautifulSoup

divSoup = BeautifulSoup(div_html,'html.parser')

# Iterate over all <a> tags directly under the div
stored_collections = ['2018 Inferno', '2018 Nuke', '2021 Dust 2', '2021 Mirage', '2021 Train', '2021 Vertigo', 'Alpha']
for a_tag in divSoup.find_all('a', class_='a_subcategory'):
    # Find the <p> tag inside the current <a> tag
    p_tag = a_tag.find('p', class_='p_subcategory_weapon')
    if p_tag:
        # Print the text inside the <p> tag and the href of the <a> tag
        collection = p_tag.text
        if collection not in stored_collections:
            collection_url = a_tag['href']
            parse_collection(collection, collection_url)
