# Notebook for tests

First we'll install the required libraries in the local environment.

In [16]:
import sys
!{sys.executable} -m pip install -r requirements.txt



Setting up some global variables.

In [17]:
LINK = "https://www.amazon.in/s?rh=n%3A6612025031&fs=true&ref=lp_6612025031_sar"
CONTAINER = ".s-main-slot.s-result-list.s-search-results"
ITEM = '.s-result-item[data-component-type="s-search-result"]'

In [18]:
from selenium import webdriver
driver = webdriver.Firefox()

In [19]:
driver.get(LINK)

Wait for the items to load...

In [20]:
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

container = WebDriverWait(driver, 10).until(
    EC.presence_of_element_located((By.CSS_SELECTOR, CONTAINER))
)

In [21]:
container

<selenium.webdriver.remote.webelement.WebElement (session="3d973983-ce85-45e0-bb64-0472d22892a1", element="d490d594-4f00-4533-8405-a255540d7dda")>

In [22]:
inside = container.get_attribute("innerHTML")
inside

'\n                  \n\n\n\n\n\n\n    <div data-asin="" data-index="0" class="a-section a-spacing-none s-result-item s-flex-full-width s-border-bottom-none s-widget s-widget-spacing-large" data-cel-widget="search_result_0"><div data-uuid="7d42ce3d-7eb4-4de9-979a-6bba4c295810" cel_widget_id="MAIN-TOP_BANNER_MESSAGE-0" class="s-widget-container s-spacing-mini s-widget-container-height-mini celwidget slot=MAIN template=TOP_BANNER_MESSAGE widgetId=messaging-messages-results-header-builder" data-csa-c-id="9wbjgk-f37ekt-l6slmr-cjhk51" data-cel-widget="MAIN-TOP_BANNER_MESSAGE-0">\n\n\n\n<span data-component-type="s-messaging-widget-results-header" class="rush-component" data-component-id="1">\n    <div class="a-section a-spacing-none s-messaging-widget-results-header">\n        <div tabindex="0" class="s-no-outline">\n            <span class="a-size-medium-plus a-color-base a-text-bold">Results</span>\n        </div>\n    </div>\n</span>\n</div></div>\n\n    \n    \n    \n\n    \n\n    <scri

In [23]:
driver.quit()

We don't need selenium anymore so we'll get rid of the overhead it creates.  
We'll continue by using bs4.

In [24]:
from bs4 import BeautifulSoup

soup = BeautifulSoup(inside, "html.parser")

In [25]:
item_elements = soup.select(ITEM)
len(item_elements)

33

But we need to get rid of some of the invisible ones which are probably ads.

In [26]:
item_elements = soup.select(ITEM + ":not(.AdHolder)")
len(item_elements)

24

Just seeing if the CSS selectors I discovered are working...

In [27]:
for el in item_elements[:3]:
    print("> Item " + el['data-component-id'])
    print("Name: " + el.select_one('[data-cy="title-recipe"]').text)
    print("Price: " + el.select_one('.a-price-whole').text)
    print("Rating: " + el.select_one('[data-a-popover*="average-customer-review"]').text)

> Item 25
Name: MI Power Bank 3i 20000mAh Lithium Polymer 18W Fast Power Delivery Charging | Input- Type C | Micro USB| Triple Output | Sandstone Black  
Price: 2,149
Rating: 4.2 out of 5 stars
> Item 28
Name: Mi 10000mAH Li-Polymer, Micro-USB and Type C Input Port, Power Bank 3i with 18W Fast Charging (Midnight Black)  
Price: 1,299
Rating: 4.2 out of 5 stars
> Item 30
Name: MI 10000mAh Lithium Ion, Lithium Polymer Power Bank Pocket Pro with 22.5 Watt Fast Charging, Dual Input Ports(Micro-USB and Type C), Triple Output Ports, (Black)  
Price: 1,699
Rating: 4.3 out of 5 stars


Clearly the seller name isn't in the list... We'll solve this with another approach.

In [28]:
NAME_SELECTOR = '[data-cy="title-recipe"]'
PRICE_SELECTOR = '.a-price-whole'
RATING_SELECTOR = '[data-a-popover*="average-customer-review"]'
UNAVAILABLE = '[aria-label="Currently unavailable."]'

In [29]:
driver = webdriver.Firefox()

In [30]:
driver.get(LINK)

In [31]:
container = WebDriverWait(driver, 10).until(
    EC.presence_of_element_located((By.CSS_SELECTOR, CONTAINER))
)
item_elements = soup.select(ITEM + ":not(.AdHolder)")

In [32]:
# for item in item_elements:
item = item_elements[0]

link = None

if item.select_one(UNAVAILABLE) == None:
    link = item.select_one(NAME_SELECTOR).select_one("a")['href']

print(link)

/20000mAh-Sandstone-Triple-Charging-Delivery/dp/B08HV83HL3/ref=sr_1_4?qid=1701970046&s=electronics&sr=1-4


In [33]:
driver.get("https://amazon.in" + link)

In [34]:
container = WebDriverWait(driver, 10).until(
  EC.presence_of_element_located((By.CSS_SELECTOR, "#merchant-info"))
)

In [36]:
soup = BeautifulSoup(container.get_attribute("innerHTML"), "html.parser")

In [37]:
driver.quit()

In [45]:
soup.select_one("a").text

'Cocoblu Retail'

There we go! Now we have everything we need.