## Price Check

In [None]:
"""
Checks price at RedMart, Giant and Coldstorage
Saves as csv file
Displays output using pandas
Show items on promo/ 10 lowest price items
"""

In [1]:
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
import time
from datetime import datetime
from bs4 import BeautifulSoup
import csv
import numpy as np
import json
import requests

#------------------------------------------------------------------
#  Lazada
#------------------------------------------------------------------

def get_url(keyword):
    print("RedMart")    
    driver = webdriver.Chrome() 
    driver.get("https://redmart.lazada.sg") 
    elem = driver.find_element_by_name("q") 
    elem.clear() 
    elem.send_keys(keyword) 
    elem.send_keys(Keys.RETURN) 

    keyword_to_url = driver.current_url

    driver.close()
    return keyword_to_url

def collect_data(keyword_to_url):
    resp = requests.get(keyword_to_url)
    soup = BeautifulSoup(resp.text, "html.parser")
    
    data = json.loads(soup.findAll('script', type='application/ld+json')[1].text)    
    data2 = soup.findAll('script')[2].text    
    json_acceptable_string = data2.replace("window.pageData=","")
    data3 = json.loads(json_acceptable_string)
    
    num = int(data3["mainInfo"]["totalResults"])
    last = int(np.ceil(num/40))
    print(f"Downloading {num} items")

    try:
        last_page = np.ceil(num/40) #gives you int of last page
    except:
        last_page = 1

    for pageno in range(1, last+1):

        url = keyword_to_url[:-1] + "&page=" + str(pageno)
        resp = requests.get(url)
        soup = BeautifulSoup(resp.text, "html.parser")
        data = json.loads(soup.findAll('script', type='application/ld+json')[1].text)   
        
        cart = []
        
        len(data['itemListElement'])
        for i in range(len(data['itemListElement'])):
            site = "RedMart"
            name = data['itemListElement'][i]["name"]
            price = data['itemListElement'][i]["offers"]["price"].replace("$","").split("/")[0]
            bundle = None
            size = 1
            image = data['itemListElement'][i]["image"]
            item_data = {
                "site" : site,
                "name" : name,
                "price": price,
                "bundle": bundle,
                "size": size,
                "image": image
            }

            cart.append(item_data)

        with open("cart.csv", "a", encoding='utf-8') as f:
            csv_dict_writer = csv.DictWriter(f, fieldnames=["site","name","price","bundle","size","image"])
            csv_dict_writer.writerows(cart)
    print("RedMart done")
    print("----------------------------------------")

#------------------------------------------------------------------
#  Giant Online
#------------------------------------------------------------------
    
def get_url2(keyword):
    print("Giant Online")
    driver = webdriver.Chrome()
    driver.get("https://giantonline.com.sg/")
    elem = driver.find_element_by_name("q")
    elem.clear()
    elem.send_keys(keyword)
    elem.send_keys(Keys.RETURN)

    keyword_to_url2 = driver.current_url
    driver.close()
    return keyword_to_url2

def collect_data2(keyword_to_url2):
    driver = webdriver.Chrome()    
    driver.get(keyword_to_url2)
    lenOfPage = driver.execute_script("window.scrollTo(0, document.body.scrollHeight);var lenOfPage=document.body.scrollHeight;return lenOfPage;")
    match=False
    while(match==False):
            lastCount = lenOfPage
            time.sleep(3)
            lenOfPage = driver.execute_script("window.scrollTo(0, document.body.scrollHeight);var lenOfPage=document.body.scrollHeight;return lenOfPage;")
            if lastCount==lenOfPage:
                match=True

    soup = BeautifulSoup(driver.page_source, "html.parser")    

    cart = []
    
    boxes = soup.findAll("li", {"class":"col-xs-6 col-sm-4 col-md-3 col-lg-2 open-product-detail algolia-click"})
    total = len(boxes)
    print (f"Downloading {total} items")
    
    for box in boxes:
        site = "Giant"

        name = box.find("a", {"class":"product-link"}).text

        try:
            size = box.find("span", {"class":"size"}).text.replace("Size: ", "")
        except:
            size = 1
      
        try:
            price = box.find("div", {"class" : "price product-price"}).text.strip().replace("$","").split("/")[0]
        except:
            price = box.find("div", {"class" : "price product-price red"}).text.replace("$","").split("/")[0]

        try:
            bundle = box.find("span", {"class" : "product-discount-label mm-promo"}).text
        except:
            bundle = None

        #finds image of product
        image = box.find("img", {"class" : "img-responsive"})['src']

        item_data = {
            "site" : site,
            "name" : name,
            "price" : price,
            "bundle" : bundle,
            "size" : size,            
            "image" : image
        }

        cart.append(item_data)

    with open("cart.csv","a", encoding='utf-8') as f:
        csv_dict_writer = csv.DictWriter(f, fieldnames=["site","name","price","bundle","size","image"])
        csv_dict_writer.writerows(cart)
        
    driver.close()
    print("Giant Online done")
    print("----------------------------------------")

#------------------------------------------------------------------
#  Cold Storage
#------------------------------------------------------------------

def get_url3(keyword):
    print("Cold Storage")
    driver = webdriver.Chrome()
    driver.get("https://www.coldstorage.com.sg/")
    elem = driver.find_element_by_name("q")
    elem.clear()
    elem.send_keys(keyword)
    elem.send_keys(Keys.RETURN)

    keyword_to_url3 = driver.current_url
    driver.close()
    return keyword_to_url3
    
def collect_data3(keyword_to_url3):
    driver = webdriver.Chrome()    
    driver.get(keyword_to_url3)
    lenOfPage = driver.execute_script("window.scrollTo(0, document.body.scrollHeight);var lenOfPage=document.body.scrollHeight;return lenOfPage;")
    match=False
    while(match==False):
            lastCount = lenOfPage
            time.sleep(3)
            lenOfPage = driver.execute_script("window.scrollTo(0, document.body.scrollHeight);var lenOfPage=document.body.scrollHeight;return lenOfPage;")
            if lastCount==lenOfPage:
                match=True

    soup = BeautifulSoup(driver.page_source, "html.parser")    

    cart = []
    
    boxes = soup.findAll("li", {"class":"col-xs-6 col-sm-4 col-md-3 col-lg-2 open-product-detail algolia-click"})
    total = len(boxes)
    print (f"Downloading {total} items")
    
    for box in boxes:
        site = "Cold Storage"

        name = box.find("a", {"class":"product-link"}).text


        try:
            size = box.find("span", {"class":"size"}).text.replace("Size: ", "")
        except:
            size = 1
       
        try:
            price = box.find("div", {"class" : "price product-price"}).text.strip().replace("$","").split("/")[0]
        except:
            price = box.find("div", {"class" : "price product-price red"}).text.replace("$","").split("/")[0]

        try:
            bundle = box.find("span", {"class" : "product-discount-label mm-promo"}).text
        except:
            bundle = None

        image = box.find("img", {"class" : "img-responsive"})['src']

        item_data = {
            "site" : site,
            "name" : name,
            "price" : price,
            "bundle" : bundle,
            "size" : size,            
            "image" : image
        }

        cart.append(item_data)

    with open("cart.csv","a", encoding='utf-8') as f:
        csv_dict_writer = csv.DictWriter(f, fieldnames=["site","name","price","bundle","size","image"])
        csv_dict_writer.writerows(cart)
        
    driver.close()
    print("Cold Storage done")
    print("----------------------------------------")

#------------------------------------------------------------------
#  Input search item
#------------------------------------------------------------------
    
keyword = input("Enter product to search: ")
startTime = datetime.now()
print("----------------------------------------")

keyword_to_url = get_url(keyword)
collect_data(keyword_to_url)

keyword_to_url2 = get_url2(keyword)
collect_data2(keyword_to_url2)

keyword_to_url3 = get_url3(keyword)
collect_data3(keyword_to_url3)

#------------------------------------------------------------------
#  Search end
#------------------------------------------------------------------
print("Scraping completed")
duration = datetime.now()-startTime 
print(f"Time taken: {duration}")
print("----------------------------------------")

Enter product to search: truffle oil
----------------------------------------
RedMart
Downloading 58 items
RedMart done
----------------------------------------
Giant Online
Downloading 2 items
Giant Online done
----------------------------------------
Cold Storage
Downloading 7 items
Cold Storage done
----------------------------------------
Scraping completed
Time taken: 0:01:11.347245
----------------------------------------


### Show all results

In [2]:
import pandas as pd
filename = r"C:\Users\Home\Desktop\PYDOT Proj\cart.csv"
df = pd.read_csv(filename, names = ['Site','Product','Price','Promo','Unit', 'Url'])
pd.set_option('display.max_rows', 5000) #display 5000 rows
df.style.background_gradient(cmap='summer', subset="Price")

Unnamed: 0,Site,Product,Price,Promo,Unit,Url
0,RedMart,Naturel Extra Virgin Olive Spray - White Truffle Flavoured,6.25,,1,https://sg-test-11.slatic.net/p/8918ff17e06a402ea3365998d362fc10.jpg
1,RedMart,Colavita Aromatic Truffle Extra Virgin Olive Oil,15.5,,1,https://sg-test-11.slatic.net/p/8577562ae8dd01fcee53526fa882f020.jpg
2,RedMart,Savini Tartufi Black Truffle Olive Oil,17.9,,1,https://sg-test-11.slatic.net/p/2624ef39ef9805e2f0bb9aeeb629a24d.jpg
3,RedMart,Mantova Spray (Italian) Truffle Infused Extra Virgin Olive Oil,13.35,,1,https://sg-test-11.slatic.net/p/e101e11fbc6d234e4672589dd4ccc142.jpg
4,RedMart,Longhi White Truffle Oil,24.9,,1,https://sg-test-11.slatic.net/p/7b893662904da2ac51dee0df047afc07.jpg
5,RedMart,Naturel Extra Virgin Olive Oil - White Truffle Flavoured,7.9,,1,https://sg-test-11.slatic.net/p/850cf88fe8f8256545ca6d3b110a68c6.jpg
6,RedMart,Longhi White Truffle Oil With White Leaf,13.9,,1,https://sg-test-11.slatic.net/p/9fd310977de81ebee9b9f6d5db9e7b5e.jpg
7,RedMart,Casa Rinaldi White Truffle Oil,17.5,,1,https://sg-test-11.slatic.net/p/f04400a7997c970ea700225393c6df95.jpg
8,RedMart,Longhi White Truffle Oil with white truffle leaf,12.9,,1,https://sg-test-11.slatic.net/p/321830eea0da798debc33dd8c86f4585.jpg
9,RedMart,Longhi Truffle Oil Black,22.9,,1,https://sg-test-11.slatic.net/p/53ae539d47502ae802e1cc68abe9e1c9.jpg


### Get Items on Promo (if any)

In [33]:
df[["Site","Product","Price","Promo","Unit"]].dropna()

Unnamed: 0,Site,Product,Price,Promo,Unit


### Shows X lowest price products

In [3]:
num = 10
print(f"Top {num} lowest price products")
df[["Site","Product","Price","Unit"]].nsmallest(num, 'Price').style.background_gradient(cmap='summer', subset="Price")

Top 10 lowest price products


Unnamed: 0,Site,Product,Price,Unit
0,RedMart,Naturel Extra Virgin Olive Spray - White Truffle Flavoured,6.25,1
48,Giant,Extra Virgin Olive Spray Oil With White Truffle 200ml,6.25,200ML
55,Cold Storage,Extra Virgin Olive Spray Oil With White Truffle 200ml,6.25,200ML
5,RedMart,Naturel Extra Virgin Olive Oil - White Truffle Flavoured,7.9,1
49,Giant,Extra Virgin Olive With Truffle 250ml,7.9,250ML
56,Cold Storage,Extra Virgin Olive With Truffle 250ml,7.9,250ML
22,RedMart,Iliada Sliver Line Extra Virgin Olive Oil With Truffle Flavour,9.26,1
8,RedMart,Longhi White Truffle Oil with white truffle leaf,12.9,1
3,RedMart,Mantova Spray (Italian) Truffle Infused Extra Virgin Olive Oil,13.35,1
53,Cold Storage,Extra Virgin Oil Truffle Spray 227ml,13.35,227ML


In [None]:
#Try searching
#Truffle oil
#Hokkaido scallop
#Magnolia omega plus 1l