# ShoppingSpreeScript (s3)
Tested and working as of 25/07/2021. All rights reserved.

In [1]:
### importing all the libraries needed
import time
from time import sleep
from urllib.request import urlopen
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.common.exceptions import TimeoutException
from webdriver_manager.chrome import ChromeDriverManager
import re
import pandas as pd

## Part 1: Setting up Selenium

In [2]:
### initial set up of selenium and chrome settings. Selenium is imitating as a Chrome broswer. 
### credits @ https://stackoverflow.com/questions/62057645/how-to-scrape-data-from-shopee-using-beautiful-soup

driver = webdriver.Chrome(ChromeDriverManager().install()) #this ensures you are using the latest chrome version everytime! Dont even need to find driver_path.  
chrome_options = Options()
chrome_options.headless = True
chrome_options.add_argument("--window-size=1920,1200")
chrome_options.add_argument('disable-notifications')
chrome_options.add_argument('--disable-infobars')
chrome_options.add_argument('start-maximized')
chrome_options.add_argument('user-data-dir=C:\\Users\\username\\AppData\\Local\\Google\\Chrome\\User Data\\Default')

# To disable the message, "Chrome is being controlled by automated test software"
chrome_options.add_argument("disable-infobars")

# Pass the argument 1 to allow and 2 to block
chrome_options.add_experimental_option("prefs", {"profile.default_content_setting_values.notifications": 2})

### get the browser load the url and display stuffs 
driver.get("https://shopee.sg/flash_deals")

### To get broswer to scroll endlessly. Credits @ https://stackoverflow.com/questions/20986631/how-can-i-scroll-a-web-page-using-selenium-webdriver-in-python
SCROLL_PAUSE_TIME = 0.5

# Get scroll height
last_height = driver.execute_script("return document.body.scrollHeight")

while True:
    # Scroll down to bottom
    driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")

    # Wait to load page
    time.sleep(SCROLL_PAUSE_TIME)

    # Calculate new scroll height and compare with last scroll height
    new_height = driver.execute_script("return document.body.scrollHeight")
    if new_height == last_height:
        break
    last_height = new_height

### transferring to BeautifulSoup

sleep(30) # seconds. it is v impt to give browser some time to sleep, so that the scrolling can be completed and get full HTML

selenium_html = driver.page_source
soup = BeautifulSoup(selenium_html, 'lxml') #lxml parsing is faster than html parsing 

driver.quit() #to automatically close the browser



Current google-chrome version is 91.0.4472
Get LATEST driver version for 91.0.4472
Driver [C:\Users\user\.wdm\drivers\chromedriver\win32\91.0.4472.101\chromedriver.exe] found in cache


## Part 2: Data Extraction

In [3]:
### Data wrangling - selecting portions of interest from the css 
sold_out = soup.find_all('div', class_ = "flash-sale-item-card flash-sale-item-card--landing-page flash-sale-item-card--SG flash-sale-item-card--sold-out")
still_selling = soup.find_all('div', class_ = "flash-sale-item-card flash-sale-item-card--landing-page flash-sale-item-card--SG")
items_type = [sold_out, still_selling]

# empty lists for loop below
items_name = []
original_price = []
sale_price = []
items_url = []
sold_quantity = []

# A double loop to get all fields of interest into lists. Credits @ QYGoh
for types in range(len(items_type)):
    for item in range(len(items_type[types])):
        items_name.append(items_type[types][item].find('div', class_ ="flash-sale-item-card__item-name-box").get_text())
        original_price.append(items_type[types][item].find('div', class_ = "flash-sale-item-card__original-price flash-sale-item-card__original-price--landing-page").get_text())
        sale_price.append(items_type[types][item].find('div', class_ = "flash-sale-item-card__current-price flash-sale-item-card__current-price--landing-page").get_text())
        
        # to get url
        for link in items_type[types][item].find_all('a', attrs={'href': re.compile("^/")}):
            items_url.append(link.get('href'))
        
        # special arrangement for sold quantity. Fully sold out have diff classes vs still selling
        if types == 0:
            sold_quantity.append(items_type[types][item].find('div', class_ = "flash-sale-sold-out flash-sale-sold-out--landing-page").get_text()) 
        elif types == 1:
            sold_quantity.append(items_type[types][item].find('div', class_ = "flash-sale-progress-bar__text").get_text())

## Part 3: Reading & Saving outputs

In [4]:
### reading extracted data in a pandas dataframe

df = pd.DataFrame(list(zip(items_name, original_price, sale_price, sold_quantity, items_url)),\
                 columns =['Name','Original_price','Sale_price','Sold_quantity','URL'])

### Making good of the URL
for link in range(len(df['URL'])):
    df['URL'][link] = "https://shopee.sg"+ df['URL'][link]
    
df

Unnamed: 0,Name,Original_price,Sale_price,Sold_quantity,URL
0,Water Moisturizing Clear Invisible Mask Patch ...,$ 3.84,$ 0.10,200 sold in 30 minutes,https://shopee.sg/Water-Moisturizing-Clear-Inv...
1,1pcs Bath duck Toy bathroom playing water soun...,$ 2.00,$ 0.10,113 sold in 33 minutes,https://shopee.sg/1pcs-Bath-duck-Toy-bathroom-...
2,Nine Beads Hair Ring High Elastic Hair Rope He...,$ 1.64,$ 0.10,200 sold in 34 minutes,https://shopee.sg/Nine-Beads-Hair-Ring-High-El...
3,【Timed specials Buy 10 FREE 2】🐢💰Japan Sensoji ...,$ 9.98,$ 0.10,200 sold in 42 minutes,https://shopee.sg/【Timed-specials-Buy-10-FREE-...
4,Alcon Tears Naturale Free (32 Vials x 0.8ml),$ 14.55,$ 11.78,30 sold in 1 hour,https://shopee.sg/Alcon-Tears-Naturale-Free-(3...
...,...,...,...,...,...
302,Awei powerbank 10000mAh fast charging apple an...,$ 9.16,$ 6.90,0 sold,https://shopee.sg/XiaoboACC-Korean-Fashion-Mic...
303,New QC 3.0 30W Quick Charging 3 Ports USB Hub ...,$ 18.95,$ 5.99,0 sold,https://shopee.sg/Ts6-Probiotics-Box-45-Pack-M...
304,Ready Stock A3 Size Children's Room Decoration...,$ 18.79,$ 1.99,1 sold,https://shopee.sg/「LuluFurniture.SG」Foldable-O...
305,Ready Stock💖Men's Watch Bluetooth Smart Watch ...,$ 36.90,$ 5.99,2 sold,https://shopee.sg/Sexy-Woman-Gauze-Lace-Tube-T...


In [5]:
df.to_excel('Shoppee_flash_sales.xlsx')