In [8]:
#Import packages
from selenium import webdriver
from selenium.webdriver.common.by import By
import pandas as pd
from bs4 import BeautifulSoup
from time import sleep
from random import randint
import os
from selenium.common.exceptions import TimeoutException

In [9]:
#Reference the current working directory
cwd = os.getcwd()

## Create functions

In [10]:
def page_scroller(browser_driver, num_scrolls, wait_time):
    for i in range(num_scrolls):
        browser_driver.execute_script("window.scrollBy(0, 2000);")
        sleep(wait_time)

In [11]:
def get_product_details(browser_driver, product_url_list):
    #Create a list for dictionaries
    prods = []
    sales = []
    error_prods = []
    
    print(str(len(product_url_list))+' products')
    for url in product_url_list:
        #Random sleep time
        sleep(randint(3,6))
        
        #Create an empty dictionary of results
        prod_details = {}
        
        
        #Add the url to the dictionary
        prod_details['url'] = url
        
        #Go to the item page
        print(url)
        
        try:
            driver.get(url = url)
        except TimeoutException:
            prod_details['status'] = 'Could not retrieve product information'
            error_prods.append(url)
        else:
            sleep(3)
            #Get the html from the link
            soup = BeautifulSoup(driver.page_source, 'html.parser')


            #Get product details and add to dictionary
            #Get the name
            product_name = soup.find('h1', class_ = 'name').text
            prod_details['name'] = product_name
            #print(product_name)

            #Get the other details
            #Some products have a product details column, others have a product details row
            if soup.find('div', class_ = 'product-details detail-column') != None:
                prod_info_section = soup.find('div', class_ = 'product-details detail-column').find_all('div', class_ = 'detail')
            elif soup.find('div', class_ = 'product-details detail-row') != None:    
                prod_info_section = soup.find('div', class_ = 'product-details detail-row').find_all('div', class_ = 'detail')
            else:
                prod_details['status'] = 'Could not retrieve product information'

            #Loop thorugh all of the product details in that section of the page
            for info_item in prod_info_section:
                info_label = info_item.find('span', class_ = 'title').text
                info_value = info_item.find('span', class_ = False).text

                #Add to dictionary
                prod_details[info_label] = info_value

            prods.append(prod_details)





            ######## GET HISTORIC SALES #######
            #Find the view all sales button using the link text
            driver.find_element_by_link_text("View All Sales").click()
            soup = BeautifulSoup(driver.page_source, 'html.parser')

            historic_sales_head = soup.find('div', class_ = "latest-sales-container").thead
            historic_sales_body = soup.find('div', class_ = "latest-sales-container").tbody

            #Get the headings for the sales section
            if historic_sales_body is not None:
                all_header = historic_sales_head.find_all('th')
                all_sales = historic_sales_body.find_all('tr')

                #Make sure atleast one sale has occured

                #Do this for every sale in the sales
                for sale in all_sales:
                    #Get a sales record
                    prod_sales = {}
                    prod_sales['url'] = item_link
                    all_variables = sale.find_all('td')

                    for header, value in zip(all_header, all_variables):
                        prod_sales[header.text] = value.text
                    #Append to list    
                    sales.append(prod_sales)    


    
    #Return the list of results
    return prods, sales, error_prods    

## Login
Logging in is required to get unrestricted access to historic sales

In [12]:
#Instantiate a firefix session - Requires geckodriver in the current working directory
driver = webdriver.Firefox(executable_path=cwd+'/geckodriver')
sleep(2)

In [13]:
#Load credentials from .py file
from credentials import username, password

In [14]:
#Navigate to login page
driver.get(url = "https://stockx.com/login")

In [15]:
#Login
driver.find_element_by_name("email").send_keys(username)
driver.find_element_by_name("password").send_keys(password)
sleep(1)
#Multiple class names for the login button so css selector is used instead
driver.find_element_by_css_selector("#bottom-bar-root > div > div > button.button.right-button.button-green").click()
sleep(5)

## Get products from a search
This section can be bypassed if you have list of product urls you want to retrieve information from


In [16]:
#Replace search_term with whatever you're looking for
#Spaces need to be replaced with %20
search_term = "Supreme Lacoste"
search_term_converted = search_term.replace(' ', '%20')

In [17]:
#Search
driver.get(url = "https://stockx.com/search?s="+search_term_converted)

In [18]:
#Scroll down to load all items - Use a large number if you expect many results
page_scroller(driver, 10, 3)

In [19]:
#Get the html from the page
soup = BeautifulSoup(driver.page_source, 'html.parser')

In [20]:
#Get the results grid
search_result_grid=  soup.find('div', class_ = "search-results-grid")

In [21]:
#Get the results tiles
search_result_tiles = search_result_grid.find_all('div', class_ = "result-tile")

In [22]:
#Get urls for each tile
search_urls = []

for tile in search_result_tiles:
    link = tile.find('a', class_ = 'tile-link')
    item_link_href = link['href']
    item_link = 'https://stockx.com/'+ item_link_href
    
    search_urls.append(item_link)


## Get the details from the products

If you already have a list of urls you want to get information for, just create a list and use it in the function below

In [24]:
prod_info_list, sales_info_list, not_found = get_product_details(driver, search_urls)

119 products
https://stockx.com/supreme-lacoste-shoulder-bag-black
https://stockx.com/supreme-lacoste-hooded-sweatshirt-black
https://stockx.com/supreme-lacoste-crewneck-black
https://stockx.com/supreme-lacoste-waist-bag-red
https://stockx.com/supreme-lacoste-velour-crusher-bucket-black
https://stockx.com/supreme-lacoste-velour-crusher-bucket-teal
https://stockx.com/supreme-lacoste-waist-bag-black
https://stockx.com/supreme-lacoste-shoulder-bag-red
https://stockx.com/supreme-lacoste-twill-6-panel-black
https://stockx.com/supreme-lacoste-wool-varsity-jacket-black
https://stockx.com/supreme-lacoste-crewneck-navy
https://stockx.com/supreme-lacoste-reflective-grid-nylon-camp-cap-black
https://stockx.com/supreme-lacoste-waist-bag-green
https://stockx.com/supreme-lacoste-crewneck-red
https://stockx.com/supreme-lacoste-reflective-grid-nylon-anorak-green
https://stockx.com/supreme-lacoste-hooded-sweatshirt-light-brown
https://stockx.com/supreme-lacoste-reflective-grid-nylon-anorak-peach
https:

In [25]:
#Create dataframes
prods_df =  pd.DataFrame.from_records(prod_info_list)
sales_df =  pd.DataFrame.from_records(sales_info_list)

In [26]:
prods_df.head()

Unnamed: 0,Color,Release Date,Retail,Season,Size,name,url
0,Black,2018-04-19,£110,SS18,One Size,Supreme LACOSTE Shoulder Bag Black,https://stockx.com/supreme-lacoste-shoulder-ba...
1,Black,2018-04-19,£148,SS18,,Supreme LACOSTE Hooded Sweatshirt Black,https://stockx.com/supreme-lacoste-hooded-swea...
2,Black,2018-04-19,£148,SS18,,Supreme LACOSTE Crewneck Black,https://stockx.com/supreme-lacoste-crewneck-black
3,Red,2018-04-19,£110,SS18,One Size,Supreme LACOSTE Waist Bag Red,https://stockx.com/supreme-lacoste-waist-bag-red
4,Black,2018-04-19,,SS18,,Supreme LACOSTE Velour Crusher Black,https://stockx.com/supreme-lacoste-velour-crus...


In [27]:
sales_df.head()

Unnamed: 0,Date,Sale Price,Size,Time,url
0,"Saturday, January 5, 2019",£240,,4:20 pm EST,https://stockx.com/supreme-lacoste-track-jacke...
1,"Friday, January 4, 2019",£260,,6:20 pm EST,https://stockx.com/supreme-lacoste-track-jacke...
2,"Friday, January 4, 2019",£258,,12:03 am EST,https://stockx.com/supreme-lacoste-track-jacke...
3,"Thursday, January 3, 2019",£238,,2:15 pm EST,https://stockx.com/supreme-lacoste-track-jacke...
4,"Wednesday, January 2, 2019",£251,,9:25 pm EST,https://stockx.com/supreme-lacoste-track-jacke...
