In [1]:
from bs4 import BeautifulSoup
import requests
import pandas as pd
import numpy as np

In [7]:
# Function to extract Product Title
def get_title(soup):

    try:
        # Outer Tag Object
        title = soup.find('span',attrs={"id":"productTitle"}).text.strip()

    except AttributeError:
        title = "not found"

    return title

# Function to extract Product Price
def get_price(soup):

    try:
        price = soup.find('span', attrs={"class": "a-price-whole"}).text.strip()
        dec = soup.find('span', attrs={"class": "a-price-fraction"}).text.strip()
        curr = soup.find('span', attrs={"class": "a-price-symbol"}).text.strip()
        prix = curr+price + dec

    except AttributeError:
        prix = 'not found'

    return prix

# Function to extract Product Rating
def get_rating(soup):

    try:
        rating = soup.find('span',attrs={"class":"a-icon-alt"}).text
    
    except AttributeError:
        try:
            rating = soup.find("span", attrs={'class':'a-icon-alt'}).string.strip()
        except:
            rating = "not found"	

    return rating

# Function to extract Number of User Reviews
def get_review_count(soup):
    try:
        review_count = soup.find('span',attrs={"id":"acrCustomerReviewText"}).text.strip()

    except AttributeError:
        review_count = "not found"	

    return review_count

# Function to extract Availability Status
def get_availability(soup):
    try:
        available = soup.find('span',attrs={"class":"a-size-medium a-color-success"}).text.strip()

    except AttributeError:
        available = "Not Available"	

    return available

In [17]:
if __name__ == '__main__':

    # Define HTTPS headers
    HEADERS = ({'User-Agent': "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36","Accept-Language":'en-US, en;q=0.5'})


    # The webpage URL
    URL  = "https://www.amazon.com/s?k=gaming&crid=3FA82T929BN9L&sprefix=gaming%2Caps%2C209&ref=nb_sb_noss_1"

    # HTTP Request
    webpage = requests.get(URL, headers=HEADERS)

    # Soup Object containing all data
    soup = BeautifulSoup(webpage.content, "html.parser")

    # Fetch links as List of Tag Objects
    links = soup.find_all("a", attrs={'class':'a-link-normal s-underline-text s-underline-link-text s-link-style a-text-normal'})

    # Store the links
    links_list = []

    # Loop for extracting links from Tag Objects
    for link in links:
            links_list.append(link.get('href'))

    d = {"title":[], "price":[], "rating":[], "reviews":[],"availability":[]}
    
    # Loop for extracting product details from each link 
    for link in links_list:
        url = "https://www.amazon.com/" + link
        new_webpage = requests.get(url, headers=HEADERS)
        print(f"Scraping sur  : {url} ")

        new_soup = BeautifulSoup(new_webpage.content, "html.parser")

        # Function calls to display all necessary product information
        d['title'].append(get_title(new_soup))
        d['price'].append(get_price(new_soup))
        d['rating'].append(get_rating(new_soup))
        d['reviews'].append(get_review_count(new_soup))
        d['availability'].append(get_availability(new_soup))

    
    amazon_df = pd.DataFrame.from_dict(d)
    amazon_df['availability'] = amazon_df['availability'].replace('', np.nan)
    amazon_df.to_csv("amazon_data.csv", header=True, index=False)
    print("Données sauvegardées avec succes !!")

Scraping sur  : https://www.amazon.com//sspa/click?ie=UTF8&spc=MTo3NjAyNzQ0NjY0NjcwNDc1OjE3MzIwNjQ1OTc6c3BfYXRmOjIwMDA5NDUyMzQzNzY4MTo6MDo6&url=%2FAA-Products-Ergonomic-Computer-Adjustable%2Fdp%2FB095NZ4CQT%2Fref%3Dsr_1_1_sspa%3Fcrid%3D3FA82T929BN9L%26dib%3DeyJ2IjoiMSJ9.1utwmIQwwJeb7CrXHXlrCwQz8bCuMTgdNGqe93qRVv9xliEiPvq68UZ4G3XOs2UWCs6QXr2IIvHh2ByD-QXnC2rfO7IpCPUkEKTIg_-Tvq-tHju8N6fgO6yFQW8SThV3oPQBqNjFi7YK115qCg_gE5IYoIjLO493KUJkTscR9wGH8pSWgITZRHgPHWX6xy3HenizUlB7OwpZuXAQj4F9YPsLAmTT6pVteT_ZtgCNhc6plKJupOH7z9qLdy_hvy6BRoEdtxPb-rCFJQMlk44NCfDLMVIrW8D15Ec9eqBJE9Q.0cxHMJ7_hpgHBqEW8O1LdHMjeOc0yf0Z8ow2vDm_wZg%26dib_tag%3Dse%26keywords%3Dgaming%26qid%3D1732064597%26sprefix%3Dgaming%252Caps%252C209%26sr%3D8-1-spons%26sp_csd%3Dd2lkZ2V0TmFtZT1zcF9hdGY%26psc%3D1 
Scraping sur  : https://www.amazon.com//Xbox-Elite-Wireless-Controller-Core-Controllers/dp/B0B789CGGQ/ref=sr_1_2?crid=3FA82T929BN9L&dib=eyJ2IjoiMSJ9.1utwmIQwwJeb7CrXHXlrCwQz8bCuMTgdNGqe93qRVv9xliEiPvq68UZ4G3XOs2UWCs6QXr2IIvHh2ByD-QXn