In [1]:
from bs4 import BeautifulSoup
import requests
import pandas as pd
import numpy as np

In [2]:
#Function to extract Product Title
def get_title(soup):
    try:
        #outer tag object
        title = soup.find("span", attrs={"id":"productTitle"})
        # Inner NavigatableString Object
        title_value = title.text
        # Title as a string value
        title_string = title_value.strip()
    except AttributeError:
        title_string = ""
    return title_string

# Function to extract Product Price
def get_price(soup):
    try:
        price = soup.find("span", attrs={'class':'a-price aok-align-center reinventPricePriceToPayMargin priceToPay'}).find("span",attrs={'class':"a-offscreen"}).text
    except AttributeError:
        price = ""
    return price

# Function to extract Product Rating
def get_rating(soup):
    try:
        rating = soup.find("i", attrs={"class":"a-icon a-icon-star a-star-4-5"}).text[:3]
    except AttributeError:
        try:
            rating = soup.find("span", attrs={"class":"a-icon-alt"}).text[:3]
        except:
            rating = ""
    return rating

# Function to extract Number of User Ratings
def get_rating_count(soup):
    try:
        rating_count = soup.find("span", attrs={'id':'acrCustomerReviewText'}).text[:6].strip()
    except AttributeError:
        rating_count = ""
    return rating_count

In [3]:
if __name__ == '__main__':
    
    # The webpage URL
    URL = "https://www.amazon.in/s?k=led+tv&crid=2N6YCIRJUUGRB&sprefix=LED+TV%2Caps%2C232&ref=nb_sb_ss_ts-doa-p_2_6"
    
     # add your user agent 
    HEADERS = ({'*****************','Accept-Language':'en-US, en;q=0.5'})
    
    # HTTP Request
    webpage = requests.get(URL, headers = HEADERS)
    
    # Soup Object containing all data
    soup = BeautifulSoup(webpage.content, "html.parser")
    
    # Fetch links as List of Tag Objects
    links = soup.find_all("a", attrs={'a-link-normal s-underline-text s-underline-link-text s-link-style a-text-normal'})

    # Store the links
    list_of_links = []
    
    # Loop for extracting links from Tag Objects
    for link in links:
        list_of_links.append(link.get('href'))
        
    d = {"title":[], "price":[], "rating":[], "num_ratings":[]}
    
    # Loop for extracting product details from each link 
    for link in list_of_links:
        new_webpage = requests.get('https://www.amazon.in' + link, headers = HEADERS)
        new_soup = BeautifulSoup(new_webpage.content, "html.parser")
       
        # Function calls to display all necessary product information
        d['title'].append(get_title(new_soup))
        d['price'].append(get_price(new_soup))
        d['rating'].append(get_rating(new_soup))
        d['num_ratings'].append(get_rating_count(new_soup))
    
    amazon_df = pd.DataFrame.from_dict(d)
    amazon_df['title'].replace('', np.nan, inplace=True)
    amazon_df = amazon_df.dropna(subset=['title'])
    amazon_df.to_csv("amazon_data.csv", header = True, index = False)

In [4]:
amazon_df

Unnamed: 0,title,price,rating,num_ratings
0,OnePlus 108 cm (43 inches) Y Series 4K Ultra H...,"₹27,999",,8438
1,OnePlus 138.7 cm (55 inches) U Series 4K LED S...,"₹42,999",,7072
2,VW 80 cm (32 inches) Frameless Series HD Ready...,"₹6,999",4.3,4916
3,OnePlus 80 cm (32 inches) Y Series HD Ready LE...,"₹11,999",4.2,37043
4,Samsung 80 cm (32 Inches) Wondertainment Serie...,"₹13,990",4.4,8970
5,Redmi 80 cm (32 inches) HD Ready Smart LED Fir...,"₹12,999",,
6,LG 80 cm (32 inches) HD Ready Smart LED TV 32L...,"₹13,990",4.3,12571
7,Redmi 80 cm (32 inches) Android 11 Series HD R...,"₹12,999",,47916
8,Acer 100 cm (40 inches) P Series Full HD Andro...,"₹16,999",,8227
9,MI 80 cm (32 inches) 5A Series HD Ready Smart ...,"₹13,999",4.3,34465
