---

# **Importing of Libraries**

- In this section, we have imported all the libraries that we will used to scrap amazon products.

---

In [1]:
import pandas as pd
from selenium import webdriver
from bs4 import BeautifulSoup

---

# **Amazon Scraper**

- In this section, we will scrap the items as highlighted in the red box in the following image.

![img](./images/amazon-webpage.PNG?raw=true)

- We will use the following user defined function to extract products from amazon website.

---

In [2]:
def get_url(search_item, page):
    """Search the item using the url."""

    init_url = "https://www.amazon.in/s?k={}"
    search_item = search_item.replace(" ", "+")
    final_url = init_url.format(search_item)
    final_url = final_url + "&page={}"
    return final_url.format(page)

def extractRow(item):
    """Extract details of each row of Amazon Product."""

    atag = item.h2.a
    title = atag.text.strip()
    url = 'https://www.amazon.in' + atag.get('href')

    try:
        price = item.find('span', 'a-price')
        price = price.find('span', 'a-offscreen').text
    except AttributeError:
        return 
    
    try:
        ratings = item.i.text
        review_count = item.find('span', {'class': 'a-size-base'}).text
    except AttributeError:
        ratings = ''
        review_count = ''

    return (title, price, ratings, review_count, url)

records = list()

def main(search, pages):
    """Executes the main program."""

    # Initialize the Chrome driver
    driver = webdriver.Chrome("chromedriver")

    record = list()

    for page in range(1, pages+1):
        driver.get(get_url(search, page))
        soup = BeautifulSoup(driver.page_source, 'html.parser')
        results = soup.findAll('div', {'data-component-type': 's-search-result'})

        for result in results:
            record = extractRow(result)

            if record:
                records.append(record)

    driver.close()

    # # Saving file to a CSV
    data = pd.DataFrame(data=records, columns=['Title', 'Price', 'Ratings', 'Review Count', 'URL'])
    data.to_csv('results.csv', index=False)
    return data.head()

In [3]:
# Search and get the data associated with the product
search = input("Enter the Product:")
main(search=search, pages=21)

Unnamed: 0,Title,Price,Ratings,Review Count,URL
0,HP 15 Ryzen 3 Thin & Light 15.6-inch (39.6 cms...,"₹38,990",4.2 out of 5 stars,828,https://www.amazon.in/gp/slredirect/picassoRed...
1,HP 15 10th Gen Intel Core i3 Thin and Light 15...,"₹40,933",3.9 out of 5 stars,1392,https://www.amazon.in/gp/slredirect/picassoRed...
2,Lenovo IdeaPad Slim 1 Intel Celeron N4020 11.6...,"₹25,990",3.8 out of 5 stars,73,https://www.amazon.in/Lenovo-IdeaPad-Celeron-P...
3,ASUS VivoBook 14 (2021) 14.0-inch (35.56 cms) ...,"₹28,990",4.6 out of 5 stars,3,https://www.amazon.in/ASUS-14-0-inch-Pentium-S...
4,HP 15 (2021) Thin & Light 11th Gen Core i3 Lap...,"₹41,990",3.9 out of 5 stars,1392,https://www.amazon.in/HP-Laptop-15-6-inch-Wind...
