In [7]:
import pandas as pd
from bs4 import BeautifulSoup
import requests
import datetime
import csv

## Get the links off of a page of a product category

In a page of a product category (for example smartphones, AirPods, T-shirts...), copy the link of the page into "search_url_prefix", and then the function will extract all the links of each product and store them in the list "product\_links".


In [13]:
base_url = "https://www.amazon.fr"
search_url = input("Enter the url after following the user guide")
search_url_prefix= search_url[:-1]
headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/115.0.0.0 Safari/537.36",
    "Accept-Language": "en-US,en;q=0.9"
}
product_links = [] 
num_pages=int(input("Enter number of pages you want to scrap"))
for page_num in range(1, num_pages+1):  # Pages 1 to 5
    url = search_url_prefix + str(page_num)
    print(f"Scraping page {page_num}: {url}")

    response = requests.get(url, headers=headers)
    soup = BeautifulSoup(response.content, 'html.parser')

    products = soup.find_all("div", {"data-component-type": "s-search-result"})

    for product in products:
        link_tag = product.find("a", class_="a-link-normal s-no-outline")
        if link_tag and "href" in link_tag.attrs:
            href = link_tag["href"]

            if "/dp/" in href:
                clean_link = base_url + href.split("?")[0]
                product_links.append(clean_link)

print(f"Found {len(product_links)} product links in total.")
for link in product_links[:10]:
    print(link)

Scraping page 1: https://www.amazon.fr/s?i=computers&rh=n%3A429879031&s=popularity-rank&fs=true&page=2&xpid=bbklrfaWjxp1E&qid=1753084293&ref=sr_pg_1
Scraping page 2: https://www.amazon.fr/s?i=computers&rh=n%3A429879031&s=popularity-rank&fs=true&page=2&xpid=bbklrfaWjxp1E&qid=1753084293&ref=sr_pg_2
Scraping page 3: https://www.amazon.fr/s?i=computers&rh=n%3A429879031&s=popularity-rank&fs=true&page=2&xpid=bbklrfaWjxp1E&qid=1753084293&ref=sr_pg_3
Scraping page 4: https://www.amazon.fr/s?i=computers&rh=n%3A429879031&s=popularity-rank&fs=true&page=2&xpid=bbklrfaWjxp1E&qid=1753084293&ref=sr_pg_4
Scraping page 5: https://www.amazon.fr/s?i=computers&rh=n%3A429879031&s=popularity-rank&fs=true&page=2&xpid=bbklrfaWjxp1E&qid=1753084293&ref=sr_pg_5
Scraping page 6: https://www.amazon.fr/s?i=computers&rh=n%3A429879031&s=popularity-rank&fs=true&page=2&xpid=bbklrfaWjxp1E&qid=1753084293&ref=sr_pg_6
Found 144 product links in total.
https://www.amazon.fr/-/en/Vivobook-S1504VA-NJ2580W-i5-1334U-Processor-G

## Extract the name, caracteristics and the price 

In [None]:
file= input("Enter the name of a csv file to store the data")
file=file+'.csv'
with open(file, 'w', newline='', encoding='UTF8') as f:
    writer = csv.writer(f)
    writer.writerow(['Title', 'Price', 'Date'])
    
for link in product_links:
    page_prods = requests.get(link, headers=headers)
    soup2_prods = BeautifulSoup(page_prods.content, 'html.parser')

    # Get title safely
    title_tag = soup2_prods.find(id="productTitle")
    title = title_tag.get_text(strip=True) if title_tag else "No title found"

    # Get price safely
    whole_tag = soup2_prods.find('span', class_='a-price-whole')
    fraction_tag = soup2_prods.find('span', class_='a-price-fraction')

    if whole_tag and fraction_tag:
        price = whole_tag.get_text(strip=True) + ',' + fraction_tag.get_text(strip=True) + ' €'
    else:
        price = "Price not found"

    today = datetime.date.today()
    data = [title, price, today]

    # Append only the data, not the header every time
    with open(file, 'a+', newline='', encoding='UTF8') as f:
        writer = csv.writer(f)
        writer.writerow(data)



## Preview of the data

In [15]:
df=pd.read_csv(file)
print(df.head(10))

                                               Title   Price in €        Date
0  ASUS Vivobook 15 S1504VA-NJ2580W 15.6 Inch FHD...    599.,99 €  2025-07-21
1  Lenovo IdeaPad 1 15IJL7 – 15.6 Inch FHD Laptop...    199.,99 €  2025-07-21
2  HP 15s-fq0003sf 15.6 inch FHD Laptop (Intel Ce...    239.,00 €  2025-07-21
3  ASUS Zenbook 14 UM3406HA-QD048W OLED 14 WUXGA ...  1,299.,99 €  2025-07-21
4  Dell Inspiron 15 3535 Laptop 15.6 Inches FHD (...    499.,00 €  2025-07-21
5  HP Laptop 15-fd0004sf, Laptop, Intel Core i3 N...    379.,00 €  2025-07-21
6  Lenovo IdeaPad Slim 3 Chromebook 14M868 – 14 I...    249.,99 €  2025-07-21
7  Acer Aspire 3 A315-24P-R6TR, 15.6 Inch Full HD...    479.,00 €  2025-07-21
8  Lenovo (17.3 Zoll IPS FHD Laptop (Intel Core U...    499.,00 €  2025-07-21
9  Lenovo IdeaPad Slim 5 14ABR8 – 14 Inch WUXGA O...    629.,99 €  2025-07-21
