# DATA EXTRACTION with SELENIUM

## 1 - Setting up

In [None]:
# install selenium and other resources for scraping data
!pip install selenium
!apt-get update
!apt install chromium-chromedriver

Collecting selenium
  Downloading selenium-4.1.0-py3-none-any.whl (958 kB)
[K     |████████████████████████████████| 958 kB 6.8 MB/s 
[?25hCollecting trio~=0.17
  Downloading trio-0.19.0-py3-none-any.whl (356 kB)
[K     |████████████████████████████████| 356 kB 44.2 MB/s 
[?25hCollecting trio-websocket~=0.9
  Downloading trio_websocket-0.9.2-py3-none-any.whl (16 kB)
Collecting urllib3[secure]~=1.26
  Downloading urllib3-1.26.7-py2.py3-none-any.whl (138 kB)
[K     |████████████████████████████████| 138 kB 54.5 MB/s 
Collecting outcome
  Downloading outcome-1.1.0-py2.py3-none-any.whl (9.7 kB)
Collecting async-generator>=1.9
  Downloading async_generator-1.10-py3-none-any.whl (18 kB)
Collecting sniffio
  Downloading sniffio-1.2.0-py3-none-any.whl (10 kB)
Collecting wsproto>=0.14
  Downloading wsproto-1.0.0-py3-none-any.whl (24 kB)
Collecting cryptography>=1.3.4
  Downloading cryptography-36.0.1-cp36-abi3-manylinux_2_24_x86_64.whl (3.6 MB)
[K     |████████████████████████████████| 3.

In [None]:
# IMPORTS #
import re
import time
import pandas as pd

from selenium import webdriver
from selenium.common.exceptions import NoSuchElementException
from selenium.webdriver.common.by import By

# Global driver to use throughout the script
DRIVER = None

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
# dont run this cell unless setting up fake user agent is needed 
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from fake_useragent import UserAgent

options = Options()
ua = UserAgent()
userAgent = ua.random
print(userAgent)
options.add_argument(f'user-agent={userAgent}')
driver = webdriver.Chrome(chrome_options=options, executable_path=r'C:\WebDrivers\ChromeDriver\chromedriver_win32\chromedriver.exe')
driver.get("https://www.google.co.in")
driver.quit()

In [None]:
# Function to (re)start driver
def start_driver(force_restart=False):
    global DRIVER
    
    if DRIVER is not None:
        if force_restart:
            DRIVER.close()
        else:
            raise RuntimeError('ERROR: cannot overwrite an active driver. Please close the driver before restarting.')
    # backup user agent: Mozilla/5.0 (Windows Phone 10.0; Android 4.2.1; Microsoft; Lumia 640 XL LTE) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/42.0.2311.135 Mobile Safari/537.36 Edge/12.10166
    # Setting up the driver
    options = webdriver.ChromeOptions()
    options.add_argument('-headless') # we don't want a chrome browser opens, so it will run in the background
    options.add_argument('-no-sandbox')
    options.add_argument('-disable-dev-shm-usage')
    options.add_argument('--user-agent="Windows Phone 10.0; Android 4.2.1; Microsoft; Lumia 640 XL LTE) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/42.0.2311.135 Mobile Safari/537.36 Edge/12.10166"')
    # above line is to define user agent as a real user 
    DRIVER = webdriver.Chrome('chromedriver',options=options)
    

# Wrapper to close driver if its created
def close_driver():
    global DRIVER
    if DRIVER is not None:
        DRIVER.close()
    DRIVER = None

# Create the new chrome browser with specific options
def initialize_driver():
    global DRIVER
    if DRIVER is None:
        print('Initiating driver...')
        chrome_options = webdriver.ChromeOptions()
        chrome_options.add_argument('-headless') 
        chrome_options.add_argument('-no-sandbox') 
        DRIVER = webdriver.Chrome('chromedriver',options=chrome_options)  
        print('Finished!')

In [None]:
close_driver()
initialize_driver()

Initiating driver...
Finished!


## 2 - Paula Choice Ingredient Dictionary Web Scraping

In [None]:
# Functions to get url of one ingredient 

def get_url_list():
  page = 'https://www.paulaschoice.com/ingredients?csortb1=name&csortd1=1&start=0&sz=1927'
  DRIVER.get(page)
  main_list = DRIVER.find_elements(By.CLASS_NAME,"IngredientList__IngredientRow-sc-1nbipyf-5")
  iter_list = iter(main_list)
  url_list = []
  for i in iter_list:
    single_ingredient = i.find_element(By.CLASS_NAME, 'Link__StyledLink-wqxkev-0')
    ingredient_link = single_ingredient.get_attribute('href')
    url_list.append(ingredient_link)
  
  return url_list

In [None]:
# total ingredient number listed on paula choice's ingredient dictionary
get_url_list()
len(url_list)

1927

In [None]:
# save as text file in local in case DRIVER may get depreciated
textfile = open("url.txt", "w")
for element in url_list:
    textfile.write(element + "\n")
textfile.close()

In [None]:
# function to get ingredient's info from each ingredient's url. Info includes: name, rating/score, categories, benefits and description

def get_ingredient_info(i):
    d = {'name':'',
        'rating':'',
        'categories':'',
        'benefits':'',
        'description':''}

    # name
    try:
        name_elem = i.find_element(By.CLASS_NAME, 'IngredientPage__Name-sc-1dg1q2l-5')
        d['name'] = name_elem.get_attribute('innerHTML').strip()
    except NoSuchElementException:
        pass
        
    # rating
    try:
        rating_elem = i.find_element(By.CLASS_NAME,'ColoredIngredientRating__Rating-r02772-0')
        d['rating'] = rating_elem.get_attribute('innerHTML').strip()
    except NoSuchElementException:
        pass

    # categories
    try:
        cat_elem = i.find_elements(By.CLASS_NAME,'large7')
        for n in cat_elem:
          if n.text[:11] == 'Categories:':
            d['categories'] = n.text[11:].strip()
    except NoSuchElementException:
        pass

    # benefits:
    try:
        benefit_elem  = i.find_elements(By.CLASS_NAME,'large7')
        for n in benefit_elem:
          if n.text[:9] == 'Benefits:':
            d['benefits'] = n.text[9:].strip()
    except NoSuchElementException:
        pass

    # description:
    try:
        description_elem = i.find_element(By.CLASS_NAME,'last')
        d['description'] = description_elem.text.strip()
    except NoSuchElementException:
        pass 
    
    return d

In [None]:
# start scraping

ingredient_data = []
for link in url_list:
  global DRIVER
  DRIVER.get(link)
  ingredient_data.append(get_ingredient_info(DRIVER))
  print(len(ingredient_data))
  time.sleep(1)

close_driver()

In [None]:
df = pd.DataFrame(data = ingredient_data, columns = ingredient_data[0].keys())
df.to_csv('ingredient_dictionary.csv')

In [None]:
df.sample(10)

Unnamed: 0.1,Unnamed: 0,name,rating,categories,benefits,description
803,803,Himanthalia Elongate Extract,Good,Texture Enhancer,,Extract of a species of algae.
1295,1295,Persicaria Hydropiper,Best,"Antioxidant, Plant Extracts","Anti-Aging, Soothing, Hydration",Persicaria hydropiper (water pepper) extract i...
1298,1298,Petrolatum,Best,"Emollient, Occlusive/Opacifying Agent","Hydration, Soothing",Small amounts of petrolatum are commonly found...
791,791,Helianthus Oil,Best,"Emollient, Plant Extracts, Antioxidant","Hydration, Anti-Aging",
1676,1676,Sodium Tallowate,Worst,"Cleansing Agent, Irritant",,Sodium salt of tallow.
731,731,Glyceryl Ascorbate,Best,Antioxidant,"Anti-Aging, Dark Spot Fading, Evens Skin Tone",Concentration range of glyceryl ascorbate in s...
1709,1709,Stearates,Good,"Texture Enhancer, Emollient, Emulsifier",Hydration,
638,638,Eugenia Aromatica,Worst,"Irritant, Plant Extracts",,
1646,1646,Sodium Hydroxide,Average,pH Adjuster/Stabilizer,,Sodium hydroxide is seen in many applications ...
662,662,Feverfew Extract,Average,"Plant Extracts, Antioxidant",Soothing,Extract that can be very soothing to skin but ...


In [None]:
df.drop(columns = 'Unnamed: 0', inplace = True)

In [None]:
df.isna().sum()

name             0
rating           0
categories      28
benefits       735
description    438
dtype: int64

In [None]:
df[df.isna().any(axis=1)]

Unnamed: 0,name,rating,categories,benefits,description
0,"1, 2-Hexanediol",Good,"Preservative, Humectant",,A synthetic preservative and moisture-binding ...
3,Acacia Farnesiana Extract,Worst,"Fragrance: Synthetic and Natural, Plant Extracts",,A fragrant extract from a type of acacia tree....
6,Acetic Acid,Worst,"Irritant, pH Adjuster/Stabilizer",,"Acid found in vinegar, some fruits, and human ..."
7,Acetone,Worst,"Irritant, Solvent",,Strong solvent that is used in nail polish rem...
13,Acetyl Carnitine HCL,Best,Antioxidant,Hydration,
...,...,...,...,...,...
1921,Zinc Sulfate,Worst,"Preservative, Irritant",,Chemical compound resulting from the interacti...
1922,Zingiberaceae,Worst,"Irritant, Plant Extracts, Antioxidant",,
1924,Zingiber Zerumbet,Worst,"Plant Extracts, Humectant, Irritant",Hydration,
1925,Zingiber Zerumbet Extract,Worst,"Plant Extracts, Humectant, Irritant",Hydration,


In [None]:
# check for the same or subtances derived from silicone 
df[df.name.isin(['Dimethicone', 'Silicone','Cyclomethicone','Cyclohexasiloxane','Cetearyl Methicone', 'Cyclopentasiloxane'])]

Unnamed: 0,name,rating,categories,benefits,description
505,Cyclohexasiloxane,Good,"Silicone, Emollient, Solvent",Hydration,One of the numerous forms of synthetic silicon...
506,Cyclomethicone,Good,"Silicone, Emollient, Solvent",Hydration,Cyclomethicone is a silicone that is used to i...
507,Cyclopentasiloxane,Good,"Silicone, Emollient, Solvent",Hydration,According to the Cosmetic Ingredient Review Ex...
552,Dimethicone,Best,"Silicone, Emollient",Hydration,Dimethicone is considered a synthetic ingredie...
1595,Silicone,Good,Silicone,"Hydration, Oil Control, Pore Minimizer",Bottom line: The types of silicones used in co...


## 3 - Watson Product Items

In [None]:
initialize_driver()
DRIVER

Initiating driver...
Finished!


<selenium.webdriver.chrome.webdriver.WebDriver (session="03fa485e00bac976718480a39e6dc5dc")>

In [None]:
page = 'https://www.watsons.com.sg/skincare/face/c/2201000?q=%3AigcBestSeller&page=0&resultsForPage=64&text=&sort=igcBestSeller&deliveryType='
name = 'productItemPhotoContainer'
DRIVER.get(page)
n = DRIVER.find_elements(By.CLASS_NAME, name)
link = n[0].find_element(By.CLASS_NAME,'ClickSearchResultEvent_Class').get_attribute('href')
link

'https://www.watsons.com.sg/aloe-vera-moisturizing-lotion-50ml/p/BP_36564'

In [None]:
page_1 = 'https://www.watsons.com.sg/skincare/face/c/2201000?q=%3AigcBestSeller&page=0&resultsForPage=64&text=&sort=igcBestSeller&deliveryType='
page_2 = 'https://www.watsons.com.sg/skincare/face/c/2201000?q=%3AigcBestSeller&page=1&resultsForPage=64&text=&sort=igcBestSeller&deliveryType='

In [None]:
n = 2
re.sub(r'&page=\d+&', f'&page={n}&', page_1)

'https://www.watsons.com.sg/skincare/face/c/2201000?q=%3AigcBestSeller&page=2&resultsForPage=64&text=&sort=igcBestSeller&deliveryType='

In [None]:
# function to get single product's link from one category
def get_product_link(page, name, number_of_page):
  url_list = []
  for i in range(number_of_page):
    sub_page = re.sub(r'&page=\d+&', f'&page={i}&', page)
    start_driver(force_restart=True)
    DRIVER.get(sub_page)
    block_list = DRIVER.find_elements(By.CLASS_NAME, name)
    for n in range(len(block_list)):
      link = block_list[n].find_element(By.CLASS_NAME,'ClickSearchResultEvent_Class').get_attribute('href')
      url_list.append(link)
    time.sleep(10)
  return url_list

In [None]:
# FACE / WATSONS / PRODUCTS' URL LIST
page = 'https://www.watsons.com.sg/skincare/face/c/2201000?q=%3AigcBestSeller&page=0&resultsForPage=64&text=&sort=igcBestSeller&deliveryType='
name = 'productItemPhotoContainer'
result = get_product_link(page, name, 22)
len(result)

1404

In [None]:
# save links as text file
textfile = open("face_watsons_urls.txt", "w")
for element in result:
    textfile.write(element + "\n")
textfile.close()

In [None]:
# EYE / WATSONS / PRODUCTS' URL LIST
page = 'https://www.watsons.com.sg/skincare/eye-care/c/2202000?q=%3AigcBestSeller&page=0&resultsForPage=64&text=&sort=igcBestSeller&deliveryType='
name = 'productItemPhotoContainer'
result = get_product_link(page, name, 2)
len(result)

127

In [None]:
# save links as text file
textfile = open("eye_watsons_urls.txt", "w")
for element in result:
    textfile.write(element + "\n")
textfile.close()

In [None]:
# DERMA / WATSONS / PRODUCTS' URL LIST
page = 'https://www.watsons.com.sg/skincare/derma-skin-care/c/2203000?q=%3AigcBestSeller&page=0&resultsForPage=64&text=&sort=igcBestSeller&deliveryType='
name = 'productItemPhotoContainer'
result = get_product_link(page, name, 12)
len(result)

736

In [None]:
# save links as text file
textfile = open("derma_watsons_urls.txt", "w")
for element in result:
    textfile.write(element + "\n")
textfile.close()

In [None]:
# SUN / WATSONS / PRODUCTS' URL LIST
page = 'https://www.watsons.com.sg/skincare/suncare/c/2204000?q=%3AigcBestSeller&page=0&resultsForPage=64&text=&sort=igcBestSeller&deliveryType='
name = 'productItemPhotoContainer'
result = get_product_link(page, name, 3)
len(result)

179

In [None]:
# save links as text file
textfile = open("sun_watsons_urls.txt", "w")
for element in result:
    textfile.write(element + "\n")
textfile.close()

In [None]:
# functions to orginize urls into dataframe:
def text_to_dataframe(path, tag):
  with open(path, 'r') as links:
    link_list = links.readlines()
  link_dic = {}
  link_dic['link'] = link_list
  link_dic['cat'] = tag
  link_df = pd.DataFrame(data = link_dic, columns = link_dic.keys())
  return link_df

In [None]:
paths = ['/content/derma_watsons_urls.txt', '/content/eye_watsons_urls.txt', '/content/face_watsons_urls.txt', '/content/sun_watsons_urls.txt']
tags = ['derma', 'eye', 'face', 'sun']

df = pd.DataFrame()
df['link'] = []
df['cat'] = []
for path, tag in zip(paths, tags):
  concat_dic = text_to_dataframe(path, tag)
  df = pd.concat([df, concat_dic], axis = 0, ignore_index=True)
df.sample(20)


Unnamed: 0,link,cat
53,https://www.watsons.com.sg/moisturizing-lotion...,derma
2092,https://www.watsons.com.sg/100-organic-avocado...,face
104,https://www.watsons.com.sg/eau-thermale-micell...,derma
2160,https://www.watsons.com.sg/limited-edition-ril...,face
2130,https://www.watsons.com.sg/new-skin-caviar-lux...,face
13,https://www.watsons.com.sg/eau-thermale-water-...,derma
1060,https://www.watsons.com.sg/hydrating-perfect-g...,face
1810,https://www.watsons.com.sg/cica-calming-mask-5...,face
428,https://www.watsons.com.sg/sebumclar-acne-trea...,derma
163,https://www.watsons.com.sg/aqualia-thermal-hyd...,derma


In [None]:
df.to_csv('watsons_product_urls.csv')

In [None]:
df = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/AA - FINAL PROJECT/Dataset/watsons_product_urls.csv')
df.drop(columns = 'Unnamed: 0', inplace = True)
df.head(10)

Unnamed: 0,link,cat
0,https://www.watsons.com.sg/eau-thermale-water-...,derma
1,https://www.watsons.com.sg/depiderm-brightenin...,derma
2,https://www.watsons.com.sg/hyseac-cleansing-ge...,derma
3,https://www.watsons.com.sg/bariesun-creme-mine...,derma
4,https://www.watsons.com.sg/vegetal-10-regulati...,derma
5,https://www.watsons.com.sg/vegetal-08-moisturi...,derma
6,https://www.watsons.com.sg/vegetal-12-anti-age...,derma
7,https://www.watsons.com.sg/kids-double-action-...,derma
8,https://www.watsons.com.sg/physiological-micel...,derma
9,https://www.watsons.com.sg/cold-cream-nourishi...,derma


In [None]:
def get_product_info(page):
  d = {}

  # brand
  try:
    brand = DRIVER.find_element(By.TAG_NAME, 'h2')
    d['brand'] = brand.text.strip()
  except NoSuchElementException:
    pass

  # product description 
  try:
    product_des = DRIVER.find_element(By.TAG_NAME, 'h1')
    d['product_description'] = product_des.get_attribute('innerHTML')
  except NoSuchElementException:
    pass

  #### INGREDIENTS ###
  # try:
  #   article = DRIVER.find_element(By.TAG_NAME, 'article')
  #   p_tags = article.find_elements(By.TAG_NAME, 'p')
  #   count_len = []
  #   for p in p_tags:
  #     count_len.append(len(p.text.split(',')))
  #   for p in p_tags:
  #     if len(p.text.split(',')) == max(count_len):
  #       d['ingredients'] = p.text.strip()
  # except NoSuchElementException:
  #   pass
  
  # price
  try:
    price = DRIVER.find_element(By.CLASS_NAME, 'productPrice')
    d['price'] = price.text.strip('S$')
  except NoSuchElementException:
    pass

  return d

In [None]:
df['link'][0]

'https://www.watsons.com.sg/eau-thermale-water-cream-40ml/p/BP_56482\n'

In [None]:
# test with one product url:
DRIVER.get(df['link'][600])
get_product_info(DRIVER)

{'brand': 'SUKIN',
 'description': 'This natural Micellar Water is infused with gentle ingredients such as Aloe Vera which help to remove make-up and dirt build up, while soothing and purifying your base. Chamomile and Cucumber also calms the skin leaving it lightly hydrated and clean.',
 'price': '15.90',
 'product_description': 'Micellar Cleansing Water 250ml'}

# PROBLEMS

In [None]:
page = df['link'][600]
DRIVER.get(page)
print(page)
get_product_info(DRIVER)
# don't have ingredient info

https://www.watsons.com.sg/micellar-cleansing-water-250ml/p/BP_64149



{'brand': 'SUKIN',
 'ingredients': 'This natural Micellar Water is infused with gentle ingredients such as Aloe Vera which help to remove make-up and dirt build up, while soothing and purifying your base. Chamomile and Cucumber also calms the skin leaving it lightly hydrated and clean.',
 'price': '15.90',
 'product_description': 'Micellar Cleansing Water 250ml'}

In [None]:
page = df['link'][16]
DRIVER.get(page)
print(page)
get_product_info(DRIVER)
# fix 

https://www.watsons.com.sg/hypoallergenic-exfoliating-mask-(captures-impurities-refines-skin-texture)-100ml/p/BP_93665



{'brand': 'URIAGE',
 'ingredients': 'Uriage Thermal Water, Kaolin, Malic acid esters, Glycerin',
 'price': '32.90',
 'product_description': 'Hypoallergenic Exfoliating Mask (Captures Impurities Refines Skin Texture) 100ml'}

In [None]:
page = df['link'][2200]
DRIVER.get(page)
print(page)
get_product_info(DRIVER)
# each ingredient is in one tag p, br

https://www.watsons.com.sg/hyalucomplex-hydrating-gel-makeup-remover-(suitable-for-normal-to-dry-skin)-200ml/p/BP_16972



{'brand': 'DR. WU',
 'ingredients': 'In case of contact with eyes, rinse with water immediately.',
 'price': '36.90',
 'product_description': 'Hyalucomplex Hydrating Gel Makeup Remover (Suitable for Normal to Dry Skin) 200ml'}

In [None]:
page = df['link'][123]
DRIVER.get(page)
print(page)
get_product_info(DRIVER)
# ingredients arr seperated by . not ,

https://www.watsons.com.sg/eau-thermale-gentle-toning-lotion-200ml/p/BP_43863



{'brand': 'EAU THERMALE AVENE',
 'ingredients': 'Perfects your Make-up and protects your Skin.\n\nBenefits\n- Protects : Enriched in silicates, protects your Skin gently. Alcohol-free, respects the integrity of each Skin type, even the most sensitive.\n- Soothes : Avène Thermal Spring Water, soothing, softening, provides your Skin with an instant feeling of comfort.\n\nThe Gentle toner is particularly recommended for dry sensitive Skin.',
 'price': '30.16',
 'product_description': 'Eau Thermale Gentle Toning Lotion 200ml'}

In [None]:
page = df['link'][123]
DRIVER.get(page)
print(page)
get_product_info(DRIVER)

https://www.watsons.com.sg/eau-thermale-gentle-toning-lotion-200ml/p/BP_43863



{'brand': 'EAU THERMALE AVENE',
 'price': '30.16',
 'product_description': 'Eau Thermale Gentle Toning Lotion 200ml'}

## Start Scraping (without ingredients info)

In [None]:
watsons_product_data = []
for index, link in enumerate(df['link']):
  DRIVER = None 
  start_driver(force_restart=True)
  DRIVER.get(link)
  watsons_product_data.append(get_product_info(DRIVER))
  if index // 50:
    print(index)
    watsons_product_df = pd.DataFrame(data = watsons_product_data, columns = watsons_product_data[0].keys())
    watsons_product_df.to_csv('watson_product_df.csv')

  time.sleep(10)

close_driver()

KeyboardInterrupt: ignored

In [None]:
watsons_product_data

[{'brand': 'URIAGE',
  'price': '34.90',
  'product_description': 'Eau Thermale Water Cream  40ml'},
 {'brand': 'URIAGE',
  'price': '31.90',
  'product_description': 'Depiderm Brightening Cleansing Foam 100ml'},
 {'brand': 'URIAGE',
  'price': '31.90',
  'product_description': 'Hyseac Cleansing Gel (Gel Nettoyant) 150ml'},
 {'brand': 'URIAGE',
  'price': '42.90',
  'product_description': 'Bariesun Creme Minerale SPF50+ (Oil Free Water Resistant Hypoallergenique) 50ml'},
 {'brand': 'PLACENTOR VEGETAL',
  'price': '29.90',
  'product_description': 'Vegetal 10 Regulating Cream Oily 50ml'},
 {'brand': 'PLACENTOR VEGETAL',
  'price': '29.90',
  'product_description': 'Vegetal 08 Moisturizing Cream Dry 40ml'},
 {'brand': 'PLACENTOR VEGETAL',
  'price': '59.90',
  'product_description': 'Vegetal 12 Anti Agelight 50ml'},
 {'brand': 'REDOXON',
  'price': '22.90',
  'product_description': 'Kids Double Action 250mg 90 Chewable Tablets'},
 {'brand': 'LA ROCHE-POSAY',
  'price': '32.90',
  'produc

In [None]:
watsons_product_df = pd.DataFrame(data = watsons_product_data, columns = watsons_product_data[0].keys())
watsons_product_df.to_csv('watson_product_df.csv')


In [None]:
watsons_product_data

[{'brand': 'URIAGE',
  'price': '34.90',
  'product_description': 'Eau Thermale Water Cream 40ml'},
 {'brand': 'URIAGE',
  'price': '31.90',
  'product_description': 'Depiderm Brightening Cleansing Foam 100ml'},
 {'brand': 'URIAGE',
  'price': '31.90',
  'product_description': 'Hyseac Cleansing Gel (Gel Nettoyant) 150ml'},
 {'brand': 'URIAGE',
  'price': '42.90',
  'product_description': 'Bariesun Creme Minerale SPF50+ (Oil Free Water Resistant Hypoallergenique) 50ml'},
 {'brand': 'PLACENTOR VEGETAL',
  'price': '29.90',
  'product_description': 'Vegetal 10 Regulating Cream Oily 50ml'},
 {'brand': 'PLACENTOR VEGETAL',
  'price': '29.90',
  'product_description': 'Vegetal 08 Moisturizing Cream Dry 40ml'}]

In [None]:
i = product_blocks[1]
get_product_info(i)

{'brand': 'SEPHORA COLLECTION',
 'description': 'Clean Face Mask',
 'link': 'https://www.sephora.com/product/sephora-collection-clean-face-mask-P460701?skuId=2282069',
 'price': 6.0,
 'rating': 4.5}

In [None]:
i = product_blocks[2]
get_product_info(i)

{'brand': 'First Aid Beauty',
 'description': 'Ultra Repair® Cream Intense Hydration',
 'link': 'https://www.sephora.com/product/ultra-repair-cream-intense-hydration-P248407?skuId=1217744',
 'price': 46.0,
 'rating': 4.5}

In [None]:
i = product_blocks[4]
get_product_info(i)

{'brand': 'Supergoop!',
 'description': 'Unseen Sunscreen SPF 40 PA+++',
 'link': 'https://www.sephora.com/product/supergoop-unseen-sunscreen-spf-40-P454380?skuId=2315935',
 'price': 34.0,
 'rating': 4.5}

In [None]:
i = product_blocks[6]
get_product_info(i)

{'brand': 'fresh',
 'description': 'Sugar Hydrating Lip Value Set',
 'link': 'https://www.sephora.com/product/fresh-sugar-hydrating-lip-value-set-P475941?skuId=2480945',
 'price': 45.0,
 'rating': 5.0}

In [None]:
i = product_blocks[7]
get_product_info(i)

{'brand': 'Glow Recipe',
 'description': 'Plum Plump Hyaluronic Acid Moisturizer',
 'link': 'https://www.sephora.com/product/glow-recipe-plum-plump-hyaluronic-acid-moisturizer-P479327?skuId=2535128',
 'price': 39.0,
 'rating': 5.0}

In [None]:
i = product_blocks[8]
get_product_info(i)

{'link': 'https://www.sephora.com/brand/fenty-skin-rihanna?icid2=rwd%20cat%20tile_fentyskin_081121'}

In [None]:
i = product_blocks[9]
get_product_info(i)

{'link': 'https://www.sephora.com/beauty/skin-care-by-age?icid2=rwd%20cat%20tile_skincarebyage_081121'}

In [None]:
i = product_blocks[10]
get_product_info(i)

{'link': 'https://www.sephora.com/beauty/skincare-routine-builder?icid2=rwd%20cat%20tile_skincareroutinebuilder_081121'}

In [None]:
i = product_blocks[11]
get_product_info(i)

{}

In [None]:
i = product_blocks[12]
get_product_info(i)

{}

In [None]:
product_blocks[10]

<selenium.webdriver.remote.webelement.WebElement (session="7b774324bd3538a791146e3aeaa55c8b", element="71d512aa-0f7f-4a9c-96fe-1ff764e5e285")>

In [None]:
body_element = DRIVER.find_element_by_tag_name('body')
print(body_element.get_attribute('outerHTML'))

<body class="css-1jrcxm1"><script>if(Sephora.Util.Perf.isReportSupported()) {   window.performance.mark("HeadscriptRuntime End");   window.performance.measure("HeadscriptRuntime", "HeadscriptRuntime Start", "HeadscriptRuntime End");}</script><div class="css-o44is"><div rootid="Header" style="display:contents"><header style="display:contents"><div class="css-4tfdwd eanm77i0" data-comp="TestTarget PersistentBanner BccRwdSmallBanner BccRwdLinkHOC StyledComponent BaseComponent PersistentBanner BccRwdSmallBanner BccRwdLinkHOC StyledComponent BaseComponent PersistentBanner BccRwdSmallBanner BccRwdLinkHOC StyledComponent BaseComponent "><div class="css-wxytes eanm77i0" data-comp="StyledComponent BaseComponent "><div class="css-0"><div class="css-7niyij eanm77i0" data-comp="StyledComponent BaseComponent "><div class="css-1yymmq2 eanm77i0" data-comp="Markdown StyledComponent BaseComponent "><p><span style="color:#fff"><a title="" class="Markdown-link" href="/beauty/holiday-sale#saleonsale" styl

  """Entry point for launching an IPython kernel.
