In [7]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

HEADERS = {'User-Agent': '', 'Accept-Language': 'en-US, en;q=0.5'}

def fetch_text_from_element(soup, target_tag, attributes):
    try:
        found_element = soup.find(target_tag, attrs=attributes).string.strip()
    except AttributeError:
        found_element = ""
    return found_element

def fetch_review_count(soup):
    return fetch_text_from_element(soup, "span", {'id': 'acrCustomerReviewText'})

def extract_product_title(soup):
    try:
        title_element = soup.find("span", attrs={"id": 'productTitle'})
        title_value = title_element.text
        formatted_title = title_value.strip()
    except AttributeError as e:
        formatted_title = f"Error: {e}"
    return formatted_title

def extract_product_rating(soup):
    return fetch_text_from_element(soup, "i", {'class': 'a-icon a-icon-star a-star-4-5'}) or fetch_text_from_element(soup, "span", {'class': 'a-icon-alt'})

def check_product_availability(soup):
    try:
        availability_info = soup.find("div", attrs={'id': 'availability'})
        availability_text = availability_info.find("span").string.strip()
    except AttributeError:
        availability_text = "Not Available"
    return availability_text

def extract_product_price(soup):
    price_element = soup.find("span", attrs={"class": "a-price-whole"})
    if price_element:
        price_value = price_element.text
        return price_value
    else:
        return None

if __name__ == '__main__':
    URL = 'https://www.amazon.in/s?i=shoes&bbn=1983518031&rh=n%3A1983518031%2Cp_n_feature_nineteen_browse-bin%3A11301363031%2Cp_89%3APuma&s=shoes&dc&ds=v1%3AOeRhZn4pD6sD9gk%2FOVfSDnmfUL8aTssLDqnY4pL2Kok&pf_rd_i=1983518031&pf_rd_i=1983518031&pf_rd_m=A1VBAL9TL5WCBF&pf_rd_m=A1VBAL9TL5WCBF&pf_rd_p=0a2cc271-509d-4e6c-a55b-f2a271405749&pf_rd_p=f1226c7e-2394-47e8-8298-4069f238c983&pf_rd_r=2FVEK59QSAWC35YANQ00&pf_rd_r=HCWGHCYG4V99QN6YJZBM&pf_rd_s=merchandised-search-6&pf_rd_s=merchandised-search-6&qid=1681034803&rnid=3837712031&ref=sr_nr_p_89_12'
    webpage = requests.get(URL, headers=HEADERS)
    soup = BeautifulSoup(webpage.content, "html.parser")

    links = soup.find_all("a", attrs={'class': 'a-link-normal s-no-outline'})
    links_list = [link.get('href') for link in links]

    data_list = []

    for link in links_list:
        new_webpage = requests.get("https://www.amazon.in" + link, headers=HEADERS)
        new_soup = BeautifulSoup(new_webpage.content, "html.parser")

        data_list.append({
            'title': extract_product_title(new_soup),
            'price': extract_product_price(new_soup),
            'rating': extract_product_rating(new_soup),
            'reviews': fetch_review_count(new_soup),
            'availability': check_product_availability(new_soup)
        })

    df = pd.DataFrame(data_list)


In [8]:
df

Unnamed: 0,title,price,rating,reviews,availability
0,Puma Men's Dazzler Sneakers,1483.0,4.0 out of 5 stars,"4,914 ratings",In stock
1,Puma Unisex-Adult Slide Max,858.0,4.0 out of 5 stars,939 ratings,In stock
2,Puma Unisex-Adult SmashicCasual Shoe,999.0,3.9 out of 5 stars,593 ratings,Not Available
3,Puma Unisex-Adult Geo Slide,858.0,4.0 out of 5 stars,"1,329 ratings",In stock
4,Puma Mens CoarseRunning Shoe,1519.0,3.9 out of 5 stars,392 ratings,In stock
5,Puma Mens Ultimate Ease Walking Shoe,1609.0,4.0 out of 5 stars,"3,203 ratings",In stock
6,Puma Unisex-Adult Wired Rapid Running Shoe,2199.0,3.9 out of 5 stars,211 ratings,In stock
7,Puma mens Snatch V2 Sneaker,1551.0,4.0 out of 5 stars,"2,818 ratings",In stock
8,Puma mens Atlas Walking Shoe,2429.0,3.8 out of 5 stars,"1,065 ratings",In stock
9,Puma Mens Running Shoes,1639.0,4.0 out of 5 stars,206 ratings,In stock
