In [None]:
from bs4 import BeautifulSoup
import requests
import pandas as pd
import numpy as np

In [None]:
# Function to extract Product Title
def get_title(soup):
    try:
        # Outer Tag Object
        title = soup.find("span", attrs={"id":'productTitle'}).text.strip()

    except AttributeError:
        title = ""

    return title

# Function to extract discounted Product Price
def get_discounted_price(soup):
    try:
        discounted_price = soup.find("div", attrs={"class":'a-section a-spacing-micro'}).find("span", attrs={"class": "a-price aok-align-center"}).find("span", attrs={"class": "a-offscreen"}).text.strip()

    except AttributeError:
        discounted_price = ""

    return discounted_price

# Function to extract discount
def get_discount(soup):
    try:
        discount = new_soup.find("div", attrs={"class",'a-section a-spacing-none aok-align-center'}).find("span", attrs={"class":'a-size-large a-color-price savingPriceOverride aok-align-center reinventPriceSavingsPercentageMargin savingsPercentage'}).text.strip()
        
    except AttributeError:
        discount = ""

    return discount

# Function to extract Product Price
def get_price(soup):
    try:
        price = soup.find("div", attrs={"class",'a-section a-spacing-small aok-align-center'}).find("span", attrs={"class",'a-size-small a-color-secondary aok-align-center basisPrice'}).find("span", attrs={"class", 'a-offscreen'}).text.strip()

    except AttributeError:
        price = ""

    return price

# Function to extract Product Rating
def get_rating(soup):
    try:
        rating = soup.find("i", attrs={'class':'a-icon a-icon-star a-star-4-5'}).text.strip()
    
    except AttributeError:
            rating = ""

    return rating

# Function to extract Number of User Reviews
def get_review_count(soup):
    try:
        review_count = soup.find("span", attrs={'id':'acrCustomerReviewText'}).text.strip()

    except AttributeError:
        review_count = ""

    return review_count

# Function to extract Availability Status
def get_availability(soup):
    try:
        available = soup.find("div", attrs={'id':'availability'}).text.strip()

    except AttributeError:
        available = "Not Available"

    return available

In [None]:
# add your user agent 
HEADERS = ({'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0 Safari/537.36', 'Accept-Language': 'en-US, en;q=0.5'})

# The webpage URL
URL = "https://www.amazon.com/s?k=hand+bags&rh=n%3A7141123011%2Cp_89%3AFossil&dc&ds=v1%3Aw5ZdMJJeUUMNrP3PZsYx%2FdzaVyD2ArRTS7hLCXtpaGQ&crid=23XIRL07U9CLD&qid=1671547150&rnid=2528832011&sprefix=hand+bags%2Caps%2C127&ref=sr_nr_p_89_2"

# HTTP Request
webpage = requests.get(URL, headers=HEADERS)

# Soup Object containing all data
soup = BeautifulSoup(webpage.content, "html.parser")

# Fetch links as List of Tag Objects
links = soup.find_all("a", attrs={'class':'a-link-normal s-underline-text s-underline-link-text s-link-style a-text-normal'})

# Store the links
links_list = []

d = {"title":[], "discounted_price":[], "discount_percent":[], "price":[], "rating":[], "reviews":[],"availability":[]}

# Loop for extracting links from Tag Objects
for link in links:
        links_list.append(link.get('href'))

# Loop for extracting product details from each link 
for link in links_list:
    new_webpage = requests.get("https://www.amazon.com" + link, headers=HEADERS)

    new_soup = BeautifulSoup(new_webpage.content, "html.parser")

    # Function calls to display all necessary product information
    d['title'].append(get_title(new_soup))
    d['discounted_price'].append(get_discounted_price(new_soup))
    d['discount_percent'].append(get_discount(new_soup))
    d['price'].append(get_price(new_soup))
    d['rating'].append(get_rating(new_soup))
    d['reviews'].append(get_review_count(new_soup))
    d['availability'].append(get_availability(new_soup))

# clean up
df = pd.DataFrame.from_dict(d)
df['title'].replace('', np.nan, inplace=True)
df = df.dropna(subset=['title'])

df['discounted_price'] = pd.to_numeric(df['discounted_price'].replace(r'\$', '',regex=True))
df['discount_percent'] = pd.to_numeric(df['discount_percent'].map(lambda x: x.rstrip('%')))
df['price'] = pd.to_numeric(df['price'].replace(r'\$', '',regex=True))
df['rating'] = df['rating'].replace(r'\ out of 5 stars', '',regex=True)
df['reviews'] = pd.to_numeric(df['reviews'].replace(r'\ ratings', '',regex=True).replace(r'\ rating','',regex=True).replace(r'\,','',regex=True),downcast ='signed')
df['availability'] = df['availability'].replace(r'\.', '',regex=True)

df.to_csv("web_data.csv", header=True, index=False)