# Web Scraper

In [56]:
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.common.exceptions import NoSuchElementException
from typing import List, Dict
import pandas as pd
import os
import openpyxl

In [77]:
class ScraperBase(object):
    def __init__(self, webdriver: webdriver, name: str, web_url: str) -> None:
        self._webdriver = webdriver
        self._name = name
        self._web_url = web_url
        self._storage = {}
    
    def _search_desired_item(self) -> None:
        raise NotImplementedError
    
    def _get_all_product_links(self) -> List[str]:
        raise NotImplementedError
    
    def _parse_data(self, link: str) -> None:
        raise NotImplementedError
    
    def run(self) -> Dict:
        self._webdriver.get(self._web_url)
        self._search_desired_item()
        links = self._get_all_product_links()
        print("Num of links: %d" % len(links))
        for i in range(len(links)):
            self._parse_data(link=links[i])
            print("Progress: %d/%d" % (i + 1, len(links)), end='\r' )
        print("Progress: %d/%d" % (len(links), len(links)))
        return self._storage

In [100]:
class EbayScraper(ScraperBase):
    def __init__(self, webdriver: webdriver, name: str, web_url: str) -> None:
        super(EbayScraper, self).__init__(webdriver=webdriver, name=name, web_url=web_url)

    def _search_desired_item(self) -> None:
        search_bar = self._webdriver.find_element_by_xpath('//*[@id="gh-ac"]')
        search_bar.send_keys("laptop", Keys.ENTER)
        
        refurbished_items = self._webdriver.find_element_by_xpath(
            '//*[@id="x-refine__group__2"]/ul/li[3]/div/a/div/div/span[1]')
        refurbished_items.click()
        
        non_auction_items = self._webdriver.find_element_by_xpath(
            '//*[@id="s0-14-11-5-1[0]"]/div[2]/div/div/ul/li[3]/a/h2')
        non_auction_items.click()

        self._webdriver.get(driver.current_url + "&_ipg=200")
    
    def _get_all_product_links(self) -> List[str]:
        link_container = self._webdriver.find_element_by_xpath(
            '//*[@id="srp-river-results"]/ul').find_elements_by_tag_name("a")
        
        links = []
        for link in link_container:
            links.append(link.get_attribute("href"))
        links = list(set(links))
        return links
    
    def _parse_data(self, link: str) -> None:
        self._webdriver.get(link)
        product_id = str(len(self._storage.keys()) + 1)
        try:
            specs = self._webdriver.find_element_by_xpath(
                '//*[@id="viTabs_0_is"]/div/table').find_elements_by_tag_name("tr")
            product_title = self._webdriver.find_element_by_xpath('//*[@id="itemTitle"]').text
            product_cost = self._webdriver.find_element_by_xpath('//*[@id="prcIsum"]').get_attribute("content")
            try:
                product_desc = self._webdriver.find_element_by_xpath(
                    '//*[@id="ds_div"]/font/font/font/font/font/font/font/font/font/ul/li/p').text
            except NoSuchElementException:
                product_desc = ""
            self._storage[product_id] = {"title": product_title, 'link': link, "price": product_cost, 
                                         "description": product_desc}
            
            for spec in specs:
                spec_titles = spec.find_elements_by_class_name("attrLabels")
                spec_values = spec.find_elements_by_tag_name("span")
                if len(spec_titles) == len(spec_values):
                    for j in range(len(spec_titles)):
                        self._storage[product_id][spec_titles[j].text] = spec_values[j].text
        except NoSuchElementException:
            print("\n\rParsing failed, link: %s" % link)
            self._storage.pop(product_id, None)


In [90]:
# Instantiate the webdriver
driver = webdriver.Chrome(executable_path=os.getenv("CHROMEDRIVER_PATH"))

In [101]:
# Instantiate all scrapers
ebay_scraper = EbayScraper(webdriver=driver, name="eBay", web_url="https://www.ebay.com.sg/")

In [102]:
# Start scraping Ebay
ebay_storage = ebay_scraper.run()

Num of links: 63
Progress: 3/63
Parsing failed
Progress: 14/63
Parsing failed
Progress: 30/63
Parsing failed
Progress: 34/63
Parsing failed
Progress: 40/63
Parsing failed
Progress: 63/63


In [None]:
# Start scraping others

In [107]:
# Stop the webdriver
driver.quit()

In [103]:
ebay_storage

{'1': {'title': 'Apple MacBook Pro 15" 2014 Retina A1398 Laptop i7-4870HQ 256GB 16GB Big Sur',
  'link': 'https://www.ebay.com.sg/itm/Apple-MacBook-Pro-15-2014-Retina-A1398-Laptop-i7-4870HQ-256GB-16GB-Big-Sur/303806780108?hash=item46bc4b92cc%3Ag%3AGbgAAOSwVs1f01r8&LH_BIN=1&LH_ItemCondition=2500',
  'price': '1212.54',
  'description': '',
  'Condition:': 'Remanufactured',
  'Model:': 'A1398',
  'Most Suitable For:': 'Casual Computing',
  'Type:': 'Notebook/Laptop',
  'GPU:': 'NVIDIA GeForce GT 750M',
  'Features:': 'Wi-Fi',
  'Processor Speed:': '2.50 GHz',
  'MPN:': 'MGXC2LL/A',
  'RAM Size:': '16 GB',
  'Series:': 'MacBook Pro',
  'Screen Size:': '15.4 in',
  'Brand:': 'Apple',
  'Processor:': 'Intel Core i7 4th Gen.',
  'Storage Capacity:': '256GB',
  'Storage Type:': 'SSD (Solid State Drive)',
  'SSD Capacity:': '256 GB',
  'UPC:': 'Does not apply',
  'Connectivity:': 'HDMI'},
 '2': {'title': 'ASUS VivoBook Slim S330FA 13.3" Full HD Quad Core i7-8565U 8GB 512GB SSD Laptop',
  'link

In [105]:
df_ebay = pd.DataFrame.from_dict(ebay_storage, orient='index')
df_ebay

Unnamed: 0,title,link,price,description,Condition:,Model:,Most Suitable For:,Type:,GPU:,Features:,...,Touchscreen:,Depth (mm):,Bluetooth:,WiFi:,Audio connections:,Country/Region of Manufacture:,Article modified:,Product non-national:,Colour main:,Colour the manufacturer's:
1,"Apple MacBook Pro 15"" 2014 Retina A1398 Laptop...",https://www.ebay.com.sg/itm/Apple-MacBook-Pro-...,1212.54,,Remanufactured,A1398,Casual Computing,Notebook/Laptop,NVIDIA GeForce GT 750M,Wi-Fi,...,,,,,,,,,,
2,"ASUS VivoBook Slim S330FA 13.3"" Full HD Quad C...",https://www.ebay.com.sg/itm/ASUS-VivoBook-Slim...,981.59,,Remanufactured,ASUS VivoBook Slim,Casual Computing,Notebook/Laptop,,"Built-in Microphone, Built-in Webcam, Bluetoot...",...,,,,,,,,,,
3,"ACER Swift 3 14"" Full HD Laptop Intel Pentium™...",https://www.ebay.com.sg/itm/ACER-Swift-3-14-Fu...,611.17,,Remanufactured,Acer Swift 3,Casual Computing,Notebook/Laptop,,Backlit Keyboard,...,,,,,,,,,,
4,Dell XPS 15 7590 Fast Full HD LED Ultrabook La...,https://www.ebay.com.sg/itm/Dell-XPS-15-7590-F...,2696.09,,Remanufactured,Dell XPS 15 7590,,Notebook/Laptop,NVIDIA(R) GeForce(R) GTX 1650 4GB GDDR5,"Backlit Keyboard, Built-in Webcam, Bluetooth",...,,,,,,,,,,
5,"HP Pavilion 14-ce3600sa 14"" Full HD Intel Core...",https://www.ebay.com.sg/itm/HP-Pavilion-14-ce3...,746.18,,Remanufactured,HP Pavilion 14-ce3600sa,Casual Computing,Notebook/Laptop,,"Built-in Microphone, Built-in Webcam, Bluetoot...",...,,,,,,,,,,
6,"ASUS ZenBook UX305FA 13.3"" Full HD M-5Y10 8GB ...",https://www.ebay.com.sg/itm/ASUS-ZenBook-UX305...,839.44,,Remanufactured,UX305FA-FC061H,,Notebook/Laptop,,Wi-Fi,...,,,,,,,,,,
7,"HP 14-cf2504sa 14"" Full HD Laptop Intel Quad C...",https://www.ebay.com.sg/itm/HP-14-cf2504sa-14-...,851.95,,Remanufactured,14-cf2504sa,,Notebook/Laptop,,"Built-in Microphone, Built-in Webcam, Bluetoot...",...,,,,,,,,,,
8,"HP 255 G7 15.6"" Full HD Laptop Quad Core AMD R...",https://www.ebay.com.sg/itm/HP-255-G7-15-6-Ful...,888.99,,Remanufactured,HP 255 G7,,Notebook/Laptop,,"Built-in Microphone, Built-in Webcam, Bluetoot...",...,,,,,,,,,,
9,ASUS TP410U 14 inch FHD Laptop 8th Gen i5-8250...,https://www.ebay.com.sg/itm/ASUS-TP410U-14-inc...,899.0,,Remanufactured,ASUS VivoBook,"working, studying",Notebook/Laptop,,,...,,,,,,,,,,
10,"ASUS ZenBook UX360CA 13.3"" Full HD Touchscreen...",https://www.ebay.com.sg/itm/ASUS-ZenBook-UX360...,839.44,,Remanufactured,ASUS ZenBook,,Notebook/Laptop,,"Touchscreen, Wi-Fi, Built-in Webcam",...,,,,,,,,,,


In [106]:
df_ebay.to_excel("ebay.xlsx")

In [None]:
# # Lazada example
# driver = webdriver.Chrome(executable_path=os.getenv("CHROMEDRIVER_PATH"))
# driver.get('https://www.lazada.sg/')

# # Search for laptop
# search_bar = driver.find_element_by_id("q")
# search_bar.send_keys("laptop", Keys.ENTER)

# # Filter by Refurbished items
# used_checkbox = driver.find_element_by_xpath("//span[contains(text(), 'Refurbish')]")
# used_checkbox.click()

# link_container = driver.find_element_by_xpath('//*[@id="root"]/div/div[2]/div[1]/div/div[1]/div[3]').find_elements_by_tag_name("a")
# links = []
# for link in link_container:
#     links.append(link.get_attribute("href"))
# links = list(set(links))

# storage = {}
# for i in range(len(links)):
#     driver.get(links[i])
#     try:
#         product_title = driver.find_element_by_class_name("pdp-mod-product-badge-title").text
#         specs = driver.find_element_by_class_name("specification-keys").find_elements_by_tag_name("li")
#         storage[i] = {"title": product_title}
#         for spec in specs:
#             spec_title = spec.find_element_by_class_name("key-title").text
#             storage[i][spec_title] = spec.find_element_by_class_name("key-value").text 
#     except NoSuchElementException:
#         pass