In [135]:
import subprocess
import sys

reqs = subprocess.check_output([sys.executable, '-m', 'pip', 'freeze'])
installed_packages = [r.decode().split('==')[0] for r in reqs.split()]

In [139]:
if 'selenium' not in installed_packages:
    !pip install -U selenium
else: print('Selenium already installed.')
    
if 'pandas' not in installed_packages:
    !pip install -U pandas
else: print('Pandas already installed.')

if 'bs4' not in installed_packages:
    !pip install -U bs4
else: print('BeautifulSoup4 already installed.')

Selenium already installed.
Pandas already installed.
BeautifulSoup4 already installed.


In [117]:
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from selenium.common.exceptions import TimeoutException

import time

import re

import pandas as pd

from bs4 import BeautifulSoup

In [118]:
options = webdriver.ChromeOptions()
options.add_argument('--ignore-certificate-errors')
options.add_argument('--incognito')
options.add_argument('--headless')

driver = webdriver.Chrome(options = options)

In [119]:
driver.get('https://www.renodepot.com/webapp/wcs/stores/servlet/CategorySearchDisplay?pageSize=infinite&catalogId=10551&top=Y&urlLangId=-1&categoryId=32504&langId=-1&storeId=10701&productCategory=14745&page=1')
driver.refresh()

In [120]:
driver.find_element_by_id("department").click()
# Probably needs a pause here to work properly

In [121]:
SCROLL_PAUSE_TIME = 2

# Get scroll height
last_height = driver.execute_script("return document.body.scrollHeight")

while True:
    # Scroll down to bottom
    driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")

    # Wait to load page
    time.sleep(SCROLL_PAUSE_TIME)

    # Calculate new scroll height and compare with last scroll height
    new_height = driver.execute_script("return document.body.scrollHeight")
    if new_height == last_height:
        break
    last_height = new_height

In [122]:
page_source = driver.page_source

In [123]:
soup = BeautifulSoup(page_source, 'lxml')

In [124]:
products = soup.find_all('a', class_ = 'product_box')
make = []
makeTool = []
deltaPrice = []
normPrice = []
salePrice = []
productURL = []

for product in products:
    make.append(product.find('div', class_ = 'product_manufacturer').get_text().strip())
    makeTool.append(product.find('div', class_ = 'product_name').get_text().strip())
    salePrice.append(product.find('span', class_ = 'integer').get_text().strip())
    productURL.append(product.get('href'))  
    try:
        deltaPrice.append(product.find('div', class_ = 'rebate').get_text().strip())
        normPrice.append(product.find('span', class_ = 'linethrough').get_text().strip())
    except:
        deltaPrice.append('0')
        normPrice.append(product.find('span', class_ = 'integer').get_text().strip())
        continue
        
     

In [125]:
product_df = pd.DataFrame(columns=['Make', 'Tool_Type', 'Regular_Price', 'Sale_Price', 'Rebate', 'Product_URL'])

product_df['Make'] = make
product_df['Tool_Type'] = makeTool
product_df['Regular_Price'] = normPrice
product_df['Sale_Price'] = salePrice
product_df['Rebate'] = deltaPrice
product_df['Product_URL'] = productURL

In [128]:
product_df['Regular_Price'] = product_df['Regular_Price'].str.replace(r'\$', '')
product_df['Rebate'] = product_df['Rebate'].str.replace(r'Save \$', '')

product_df['Regular_Price'] = product_df['Regular_Price'].str.replace(r',', '')
product_df['Rebate'] = product_df['Rebate'].str.replace(r',', '')
product_df['Sale_Price'] = product_df['Sale_Price'].str.replace(r',', '')

In [129]:
product_df['Rebate'] = pd.to_numeric(product_df['Rebate'])
product_df['Regular_Price'] = pd.to_numeric(product_df['Regular_Price'])
product_df['Sale_Price'] = pd.to_numeric(product_df['Sale_Price'])

In [130]:
product_df['pctRebate'] = (product_df['Rebate'] / product_df['Regular_Price'])*100
product_df = product_df.round({'pctRebate': 0})

In [131]:
product_df = product_df.sort_values(['pctRebate'], ascending = False)

In [133]:
product_df

Unnamed: 0,Make,Tool_Type,Regular_Price,Sale_Price,Rebate,Product_URL,pctRebate
0,METABO HPT,"Pneumatic Stripe Brad Nailer - 5/8""-2""",179.0,89,90.0,https://www.renodepot.com/en/hitachi-pneumatic...,50.0
1,MAKITA,Set of 2 Cordless Tools 18V,319.0,189,130.0,https://www.renodepot.com/en/set-of-2-cordless...,41.0
98,CRAFTSMAN,Oil-Free Compressor - 6 Gallons - 150 PSI - Re...,249.0,149,100.0,https://www.renodepot.com/en/oil-free-compress...,40.0
99,DEWALT,Jigsaw - 20 V,229.0,139,90.0,https://www.renodepot.com/en/20-v-jigsaw-00275732,39.0
2,MAKITA,Set of 2 Cordless Tools - 18 V Lithium-ion,399.0,249,150.0,https://www.renodepot.com/en/set-of-2-cordless...,38.0
...,...,...,...,...,...,...,...
431,JOHNSON,CONTRACTOR PENCILS,3.0,3,0.0,https://www.renodepot.com/en/contractor-pencil...,0.0
430,SHOP-VAC,Vacuum Filter,9.0,9,0.0,https://www.renodepot.com/en/vacuum-filter-059...,0.0
429,FORESTO,Finishing Nails - 1 3/4'' - 18-Gauge Steel - B...,13.0,13,0.0,https://www.renodepot.com/en/foresto-finishing...,0.0
428,JOHNSON,Measuring tape,20.0,20,0.0,https://www.renodepot.com/en/measuring-tape-07...,0.0
