In [2]:
import pandas as pd
from bs4 import BeautifulSoup
import requests
import datetime
import csv

## Get the links off of a page of a product category

In a page of a product category (for example smartphones, AirPods, T-shirts...), copy the link of the page into "search_url_prefix", and then the function will extract all the links of each product and store them in the list "product\_links".


In [5]:
search_url = input("Enter the Amazon search URL: ").strip()
base_url = "https://www.amazon.fr"

# Step 1: Remove any existing "&page=" and everything after it
if "&page=" in search_url:
    search_url = search_url.split("&page=")[0]

# Step 2: Make sure the URL ends properly before appending page number
if search_url.endswith("&"):
    search_url_prefix = search_url + "page="
else:
    search_url_prefix = search_url + "&page="
headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/115.0.0.0 Safari/537.36",
    "Accept-Language": "en-US,en;q=0.9"
}
product_links = [] 
num_pages=int(input("Enter number of pages you want to scrap"))
for page_num in range(1, num_pages+1): 
    url = search_url_prefix + str(page_num)
    print(f"Scraping page {page_num}: {url}") 

    response = requests.get(url, headers=headers)
    soup = BeautifulSoup(response.content, 'html.parser') 

    products = soup.find_all("div", {"data-component-type": "s-search-result"})

    for product in products:
        link_tag = product.find("a", class_="a-link-normal s-no-outline")
        if link_tag and "href" in link_tag.attrs:
            href = link_tag["href"]

            if "/dp/" in href:
                clean_link = base_url + href.split("?")[0]
                product_links.append(clean_link)

print(f"Found {len(product_links)} product links in total.")
for link in product_links[:10]:
    print(link) 

Scraping page 1: https://www.amazon.fr/s?k=laptops&__mk_fr_FR=%C3%85M%C3%85%C5%BD%C3%95%C3%91&crid=1MYLPFEZSUEND&sprefix=laptops%2Caps%2C190&ref=nb_sb_noss_1&page=1
Scraping page 2: https://www.amazon.fr/s?k=laptops&__mk_fr_FR=%C3%85M%C3%85%C5%BD%C3%95%C3%91&crid=1MYLPFEZSUEND&sprefix=laptops%2Caps%2C190&ref=nb_sb_noss_1&page=2
Scraping page 3: https://www.amazon.fr/s?k=laptops&__mk_fr_FR=%C3%85M%C3%85%C5%BD%C3%95%C3%91&crid=1MYLPFEZSUEND&sprefix=laptops%2Caps%2C190&ref=nb_sb_noss_1&page=3
Scraping page 4: https://www.amazon.fr/s?k=laptops&__mk_fr_FR=%C3%85M%C3%85%C5%BD%C3%95%C3%91&crid=1MYLPFEZSUEND&sprefix=laptops%2Caps%2C190&ref=nb_sb_noss_1&page=4
Scraping page 5: https://www.amazon.fr/s?k=laptops&__mk_fr_FR=%C3%85M%C3%85%C5%BD%C3%95%C3%91&crid=1MYLPFEZSUEND&sprefix=laptops%2Caps%2C190&ref=nb_sb_noss_1&page=5
Scraping page 6: https://www.amazon.fr/s?k=laptops&__mk_fr_FR=%C3%85M%C3%85%C5%BD%C3%95%C3%91&crid=1MYLPFEZSUEND&sprefix=laptops%2Caps%2C190&ref=nb_sb_noss_1&page=6
Scraping p

## Extract the name, caracteristics and the price 

In [6]:
file= input("Enter the name of a csv file to store the data")
file=file+'.csv'
with open(file, 'w', newline='', encoding='UTF8') as f:
    writer = csv.writer(f)
    writer.writerow(['Title', 'Price','link', 'Date'])
    
for link in product_links:
    page_prods = requests.get(link, headers=headers)
    soup2_prods = BeautifulSoup(page_prods.content, 'html.parser')

    # Get title safely
    title_tag = soup2_prods.find(id="productTitle")
    title = title_tag.get_text(strip=True) if title_tag else "No title found"

    # Get price safely
    whole_tag = soup2_prods.find('span', class_='a-price-whole')
    fraction_tag = soup2_prods.find('span', class_='a-price-fraction')

    if whole_tag and fraction_tag:
        price = whole_tag.get_text(strip=True) + ',' + fraction_tag.get_text(strip=True) + ' €'
    else:
        price = "Price not found"

    

    today = datetime.date.today()
    data = [title, price,link, today]

    # Append only the data, not the header every time
    with open(file, 'a+', newline='', encoding='UTF8') as f:
        writer = csv.writer(f)
        writer.writerow(data)



## Preview of the data

In [7]:
df=pd.read_csv(file)
print(df.head(10))

                                               Title      Price  \
0  ACEMAGIC Ordinateur Portable 17,3 Pouces PC Po...  448,,97 €   
1  Lenovo (FullHD 15,6 Zoll Ordinateur Portable (...  299,,00 €   
2  Ordinateur Portable 11.6 pouces 6Go RAM 128Go ...  165,,99 €   
3  Laptop 14.1 inches, Core i3-1115G4 CPU(up to 4...  269.,99 €   
4  PINSTONE 15.6 inch Laptop - 16GB RAM 512GB SSD...  329.,99 €   
5  Ordinateur portable 15,6 pouces, 24 Go RAM SSD...  299,,99 €   
6  Ordinateur Portable 14,1" Pouces laptop, 6 Go ...  184,,34 €   
7  MSI Modern 15 B7M-220FR: AMD Ryzen 7 7730U - 1...  499.,00 €   
8  Lenovo IdeaPad Slim 3 Chromebook 14M868 – 14 I...  279.,99 €   
9  NOTODD Celeron N5095 14 Inch Laptop Computer 4...  229.,49 €   

                                                link        Date  
0  https://www.amazon.fr/ACEMAGIC-Ordinateur-Port...  2025-07-23  
1  https://www.amazon.fr/Lenovo-Ordinateur-Portab...  2025-07-23  
2  https://www.amazon.fr/Ordinateur-Portable-Blue...  2025-07