# Code to extract TV info from Bing Lee

In [None]:
# Dependencies
import requests
from bs4 import BeautifulSoup as bs
from time import sleep
import pandas as pd
from splinter import Browser
from splinter.exceptions import ElementDoesNotExist
from selenium.webdriver import ActionChains
from webdriver_manager.chrome import ChromeDriverManager

There are 134 televisions listed on Bing Lee's website contained over 1 continuous page that is halted after 36 with a "Load 36 More" button, Use Splinter to help us automate pressing that button.

In [None]:
executable_path = {'executable_path': ChromeDriverManager().install()}
browser = Browser('chrome', **executable_path, headless=False)

url = "https://www.binglee.com.au/tech/tv-video/led-lcd-tvs"
browser.visit(url)

## Automate Browser Navigation

In [3]:
# Create function to automate browser navigation

def click_load(y):

    # Ensuring the link is in view
    browser.execute_script('window.scrollTo(0, 10000);')

    # Find Next button and click to navigate to next page
    browser.click_link_by_partial_text("Load")
        
    sleep(0.05)

In [4]:
# Create function to automate browser navigation

def click_all(z):

    # Ensuring the link is in view
    browser.execute_script('window.scrollTo(0, 20000);')

    # Find Next button and click to navigate to next page
    browser.click_link_by_partial_text("Load all")
        
    sleep(0.05)

In [5]:
# Create function to retrieve url page, create Beautiful Soup object, 
# parse and get results for loop.

def scrape(page_no): 
    
    # Create a Beautiful Soup object; parse with 'html.parser'
    soup = bs(browser.html, "html.parser")

    # results are returned as an iterable list
    results = soup.find_all('a', {"class" : "bni product-link"})
    
    return results
      
# Example: to find the Product Category for the first result
# print(f"There are {len(get_results(2))} results on this page")



In [6]:
click_load(84)



In [7]:
click_all(3)

In [10]:
scrape(3)

[<a class="bni product-link" href="https://www.binglee.com.au/lg-oled55cxpta-55-oled-4k-smart-tv" id="" onclick="cbar_ga_eventtrack(this, 'Barilliance', 'Product', 'LG OLED55CXPTA - CX 55&quot; OLED 4K with LG AI ThinQ'); return cbar_click3(this, 	'https://www.binglee.com.au/lg-oled55cxpta-55-oled-4k-smart-tv', 1, 1403188786,'lg-oled55cxpta-55-oled-4k-smart-tv');" style=""><img class="product-img img-responsive" height="185" id="" onerror="cbar_img_error(this)" src="https://media.binglee.com.au/media/catalog/product/cache/1/small_image/185x185/9df78eab33525d08d6e5fb8d27136e95/o/l/oled-77-cx-hero_2.jpg" style="" width="185"/><span class="quickview-button" data-quickview-url="https://www.binglee.com.au/blcatalog/quickview/quickview/id/52835/" onclick="cbar_quick_view(this, 'lg-oled55cxpta-55-oled-4k-smart-tv');"><i class="fa fa-external-link"></i>Quick View</span></a>,
 <a class="bni product-link" href="https://www.binglee.com.au/tcl-55p715-55-quhd-android-smart-tv" id="" onclick="cbar_g

In [8]:
#results = scrape(3)

In [9]:
len(results)

154

##### Even though there are 154 in the length - there is duplicate data that is in a different format. (20)

In [11]:
results

[<a class="bni product-link" href="https://www.binglee.com.au/sony-kd-55x8000h-55-inch-x8000h-4k-ultra-hd-hdr-smart-android-tv" id="" onclick="cbar_ga_eventtrack(this, 'Barilliance', 'Category', 'Sony KD-55X8000H 55&quot; X8000H 4K Ultra HD HDR Smart Android TV'); return cbar_click3(this, 	'https://www.binglee.com.au/sony-kd-55x8000h-55-inch-x8000h-4k-ultra-hd-hdr-smart-android-tv', 5, 1631214482,'sony-kd-55x8000h-55-inch-x8000h-4k-ultra-hd-hdr-smart-android-t');" style=""><img class="product-img img-responsive" height="185" id="" onerror="cbar_img_error(this)" src="https://media.binglee.com.au/media/catalog/product/cache/1/small_image/185x185/9df78eab33525d08d6e5fb8d27136e95/s/o/sony-x80h-hero1_1_2.jpg" style="" width="185"/><span class="quickview-button" data-quickview-url="https://www.binglee.com.au/blcatalog/quickview/quickview/id/52906/" onclick="cbar_quick_view(this, 'sony-kd-55x8000h-55-inch-x8000h-4k-ultra-hd-hdr-smart-android-t');"><i class="fa fa-external-link"></i>Quick View

#### Check the "results"

In [110]:
print(results[120]['href']) # to extract the link to the TV on the shopping cart

https://www.binglee.com.au/samsung-qa55q95tawxxy-55-inch-q95t-qled-smart-4k-tv


In [111]:
print(results[20]['data-product_brand']) # to extract the brand

TCL


## Functions to get ready to put into a df

Summary of attributes to assist blelow: 

* categories = data-product_category (not scraped due to redundancy); 
* brand = data-product_brand; 
* models = data-product_sku; 
* names = data-product_name; 
* sizes = title; 
* price data-product_price; 
* images = src

Note that the first 20 rows is redundant data that has duplicate data.  This information was not extracted below as the script would not run through those rows.

In [178]:
# Create function to get model number for loop
def brand_finder(x):

    brands = results[x]['data-product_brand']
    
    return brands

# Example: to find the Model Number for the first result
print(brand_finder(20))

TCL


In [159]:
# Create function to get model number for loop
def model_finder(x):

    models = results[x]['data-product_sku']
    
    return models

# Example: to find the Model Number for the first result
print(model_finder(20))

65P615


In [160]:
# Create function to get the name of the TV for loop
def name_finder(x):

    names = results[x]['data-product_name']
    
    return names

# Example: to find the Model Number for the first result
print(name_finder(20))

TCL - 65P615 - 65" UHD 4K Android TV 


In [161]:
# Create function for the for loop to enable the size to be split
def size_finder(x):

    sizes = results[x]['title']
    
    return sizes

# Example: to find the Model Number for the first result
print(size_finder(20))

TCL - 65P615 - 65" UHD 4K Android TV 


In [168]:
# Create function to get the price for loop
def price_finder(x):

    prices = results[x]['data-product_price']
    
    return prices

# Example: to find the Model Number for the first result
print(price_finder(20))

995.0000


In [222]:
# Create function to get the image for loop
import re
def image_finder(x):

#     img = results[x].find_all('img')
    img = results[x].img['src']

    #express = re.search("^src*",omg)
    
    return img

# Example: to find the Model Number for the first result
print(image_finder(20))

https://media.binglee.com.au/media/catalog/product/cache/1/small_image/185x185/9df78eab33525d08d6e5fb8d27136e95/t/c/tcl-p615-hero.jpg


In [223]:
# Create lists to hold values
brands = list()
models = list()
names = list()
sizes = list()
prices = list()
images = list()

In [224]:
# Create a loop from the start of the correct list of TV's to place into a dataframe
x = 0

for x in range(20, len(results)):
    try:
        brand = brand_finder(x)
        model = model_finder(x)
        name = name_finder(x)
        size = size_finder(x)
        price = price_finder(x)
        image = image_finder(x)
        
        if (brand and model and name and size and price):
            
                brands.append(brand)
                models.append(model)
                names.append(name)
                sizes.append(size)
                prices.append(price)
                images.append(image)
                    
    except:
        print("Price not available: ",name_finder(x))
        print("")
            
    x += 1

In [225]:
#Convert into a Dataframe
bing_lee_df = pd.DataFrame({
        "retailer": "Bing_Lee",
        "category": "Televisions",
        "currency": "AUD",
        "brand": brands,
        "model": models,
        "name": names,
        "size": sizes,
        "price": prices,
        "image": images
})

bing_lee_df

Unnamed: 0,retailer,category,currency,brand,model,name,size,price,image
0,Bing_Lee,Televisions,AUD,TCL,65P615,"TCL - 65P615 - 65"" UHD 4K Android TV","TCL - 65P615 - 65"" UHD 4K Android TV",995.0000,https://media.binglee.com.au/media/catalog/pro...
1,Bing_Lee,Televisions,AUD,TCL,75C815,TCL - 75C815 - 75” QLED Android TV,TCL - 75C815 - 75” QLED Android TV,2995.0000,https://media.binglee.com.au/media/catalog/pro...
2,Bing_Lee,Televisions,AUD,Hisense,40S4,"Hisense - 40S4 - 40"" Series 4 FHD Smart TV","Hisense - 40S4 - 40"" Series 4 FHD Smart TV",495.0000,https://media.binglee.com.au/media/catalog/pro...
3,Bing_Lee,Televisions,AUD,Hisense,49S4,"Hisense - 49S4 - 49"" Series 4 FHD Smart TV","Hisense - 49S4 - 49"" Series 4 FHD Smart TV",595.0000,https://media.binglee.com.au/media/catalog/pro...
4,Bing_Lee,Televisions,AUD,TCL,40S615,TCL - 40S615 - 40” Full HD Android TV,TCL - 40S615 - 40” Full HD Android TV,495.0000,https://media.binglee.com.au/media/catalog/pro...
...,...,...,...,...,...,...,...,...,...
129,Bing_Lee,Televisions,AUD,LG,55UH770T,"LG - 55UH770T - 55"" UHD Smart LED TV","LG - 55UH770T - 55"" UHD Smart LED TV",2249.0000,https://media.binglee.com.au/media/catalog/pro...
130,Bing_Lee,Televisions,AUD,Changhong,UD42C5600I,"Changhong - UD42C5600I - 42"" UHD Smart TV","Changhong - UD42C5600I - 42"" UHD Smart TV",698.0000,https://media.binglee.com.au/media/catalog/pro...
131,Bing_Lee,Televisions,AUD,Samsung,QA98Q900RB,"Samsung - QA98Q900RBWXXY - 98"" QLED Smart 8K U...","Samsung - QA98Q900RBWXXY - 98"" QLED Smart 8K U...",99999.0000,https://media.binglee.com.au/media/catalog/pro...
132,Bing_Lee,Televisions,AUD,Panasonic,TH65GX740A,"Panasonic - TH-65GX740A - 65"" LED 4K UHD LED TV","Panasonic - TH-65GX740A - 65"" LED 4K UHD LED TV",1295.0000,https://media.binglee.com.au/media/catalog/pro...


In [226]:
# Drop any duplicates
bing_lee_df = bing_lee_df.drop_duplicates()
bing_lee_df

Unnamed: 0,retailer,category,currency,brand,model,name,size,price,image
0,Bing_Lee,Televisions,AUD,TCL,65P615,"TCL - 65P615 - 65"" UHD 4K Android TV","TCL - 65P615 - 65"" UHD 4K Android TV",995.0000,https://media.binglee.com.au/media/catalog/pro...
1,Bing_Lee,Televisions,AUD,TCL,75C815,TCL - 75C815 - 75” QLED Android TV,TCL - 75C815 - 75” QLED Android TV,2995.0000,https://media.binglee.com.au/media/catalog/pro...
2,Bing_Lee,Televisions,AUD,Hisense,40S4,"Hisense - 40S4 - 40"" Series 4 FHD Smart TV","Hisense - 40S4 - 40"" Series 4 FHD Smart TV",495.0000,https://media.binglee.com.au/media/catalog/pro...
3,Bing_Lee,Televisions,AUD,Hisense,49S4,"Hisense - 49S4 - 49"" Series 4 FHD Smart TV","Hisense - 49S4 - 49"" Series 4 FHD Smart TV",595.0000,https://media.binglee.com.au/media/catalog/pro...
4,Bing_Lee,Televisions,AUD,TCL,40S615,TCL - 40S615 - 40” Full HD Android TV,TCL - 40S615 - 40” Full HD Android TV,495.0000,https://media.binglee.com.au/media/catalog/pro...
...,...,...,...,...,...,...,...,...,...
129,Bing_Lee,Televisions,AUD,LG,55UH770T,"LG - 55UH770T - 55"" UHD Smart LED TV","LG - 55UH770T - 55"" UHD Smart LED TV",2249.0000,https://media.binglee.com.au/media/catalog/pro...
130,Bing_Lee,Televisions,AUD,Changhong,UD42C5600I,"Changhong - UD42C5600I - 42"" UHD Smart TV","Changhong - UD42C5600I - 42"" UHD Smart TV",698.0000,https://media.binglee.com.au/media/catalog/pro...
131,Bing_Lee,Televisions,AUD,Samsung,QA98Q900RB,"Samsung - QA98Q900RBWXXY - 98"" QLED Smart 8K U...","Samsung - QA98Q900RBWXXY - 98"" QLED Smart 8K U...",99999.0000,https://media.binglee.com.au/media/catalog/pro...
132,Bing_Lee,Televisions,AUD,Panasonic,TH65GX740A,"Panasonic - TH-65GX740A - 65"" LED 4K UHD LED TV","Panasonic - TH-65GX740A - 65"" LED 4K UHD LED TV",1295.0000,https://media.binglee.com.au/media/catalog/pro...


In [227]:
# Convert the DF into a csv file for tidy up
bing_lee_df.to_csv("output/bing_lee.csv")