In [1]:
from bs4 import BeautifulSoup
import requests
import pandas as pd
import numpy as np

In [2]:
#Function to extract product title
def get_title(soup):
    try:
        title = soup.find("span", attrs={"id":'productTitle'}).text.strip()
    except AttributeError:
        title = ""
    return title


#Function to extract price
def get_price(soup):
    try:
        price_whole = soup.find("span", attrs={'class':'a-price-whole'}).text
        price_decimal = soup.find("span", attrs={'class':'a-price-fraction'}).text
        price = price_whole + price_decimal
    except AttributeError:
        price = ""
    return price


#Function to extract the ratings
def get_rating(soup):
    try:
        rating = soup.find("span", attrs={'class':'a-icon-alt'}).text
    except AttributeError:
        rating = ""
    return rating


#Function to extract the number of reviews
def get_review_count(soup):
    try:
        review_count = soup.find("span", attrs={'id': 'acrCustomerReviewText'}).text
    except AttributeError:
        review_count = ""
    return review_count

#Function to extract Availability status
def get_availability(soup):
    try:
        available = soup.find("span", attrs = {'class': 'a-size-medium a-color-success'}).text.strip()
    except AttributeError:
        available = "Out of stock"
    return available

In [3]:
#Headers for request
HEADERS = ({'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/113.0.0.0 Safari/537.36', 'Accept-Language': 'en-US, en;q=0.5'})

In [4]:
URL ="https://www.amazon.com/s?k=laptop&crid=22BNRU1NAF8I&sprefix=laptop%2Caps%2C113&ref=nb_sb_noss_2"

In [5]:
#HTTP Request
webpage = requests.get(URL, headers=HEADERS)

In [6]:
webpage

<Response [200]>

In [7]:
#Soup Object containing all data
soup = BeautifulSoup(webpage.content, "html.parser")

In [8]:
#Fetch links as list of tag objects
links = soup.find_all("a", attrs={'class':'a-link-normal s-underline-text s-underline-link-text s-link-style a-text-normal'})

In [9]:
links_list = []

In [10]:
#Looping through all the links to extract data
for link in links:
    links_list.append(link.get('href'))

In [11]:
d = {"title":[], "price":[], "rating":[], "reviews":[],"availability":[]}

In [12]:
#Loop for extracting product details from each link
for link in links_list:
    new_webpage = requests.get("https://www.amazon.com" + link, headers=HEADERS)
    
    new_soup = BeautifulSoup(new_webpage.content, "html.parser")
    
    #Function calls
    d['title'].append(get_title(new_soup))
    d['price'].append(get_price(new_soup))
    d['rating'].append(get_rating(new_soup))
    d['reviews'].append(get_review_count(new_soup))
    d['availability'].append(get_availability(new_soup))

In [13]:
amazon_df = pd.DataFrame.from_dict(d)

In [14]:
amazon_df

Unnamed: 0,title,price,rating,reviews,availability
0,Lenovo IdeaPad Gaming Chromebook - 2022 - Chro...,569.99,3.9 out of 5 stars,12 ratings,Out of stock
1,"Lenovo IdeaPad 15.6"" Laptop Newest, 20GB RAM, ...",399.98,4.1 out of 5 stars,222 ratings,In Stock
2,"SGIN Laptop 15.6 Inch, 4GB DDR4 128GB SSD Wind...",259.98,4.5 out of 5 stars,"1,180 ratings",In Stock
3,"jumper Laptop 16 Inch FHD IPS Display (16:10),...",279.98,4.4 out of 5 stars,"1,488 ratings",Out of stock
4,ASUS Vivobook Go 15 L510 Thin & Light Laptop C...,189.99,4.3 out of 5 stars,"1,594 ratings",In Stock
5,"SGIN Laptop 15.6 Inch, 4GB DDR4 128GB SSD Wind...",259.98,4.5 out of 5 stars,"1,180 ratings",In Stock
6,ASUS Vivobook Go 15 L510 Thin & Light Laptop C...,189.99,4.3 out of 5 stars,"1,594 ratings",In Stock
7,"jumper Laptop 16 Inch FHD IPS Display (16:10),...",279.98,4.4 out of 5 stars,"1,488 ratings",Out of stock
8,"Apple MacBook Air with Intel Core i5, 1.6GHz, ...",215.0,4.2 out of 5 stars,"1,650 ratings",In Stock.
9,Acer Chromebook Spin 314 Convertible Laptop | ...,329.04,4.3 out of 5 stars,473 ratings,In Stock


In [17]:
amazon_df.to_csv("amazon_laptop_data.csv", header=True, index=False)