## Scraping Data from Amazon

# Column names
- Title
- Brand
- Price
- Storage
- Color
- Rating
- Ram
- Windows
- Processor

In [4]:
import requests
import pandas as pd
from bs4 import BeautifulSoup
import re
import time

### Import the URL

In [5]:
url = "https://www.amazon.in/s?k=laptop&s=relevanceblender&ds=v1%3ASM1%2F6tWi%2FUjM%2Fv%2B%2FwAxJFESvcdhv3W5fN5oH0K2IKsk"

### Header

In [6]:
header = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/145.0.0.0 Safari/537.36"
}

### Create an Empty list to store the data

In [7]:
data = []

### Scrape the data from the website

In [8]:
response = requests.get(url, headers=header)
response.status_code

503

In [9]:
for page in range(1,2):
    params={'k':'laptop','page':page}
    # make the request with parameters
    response=requests.get(url,headers=header,params=params)

    soup=BeautifulSoup(response.content,'html.parser')

    #Find all the products on the page
    products=soup.find_all('div',{'data-component-type':'s-search-result'})
    
    #Extract all the data
    for product in products:
        
        title_tag=product.find("h2")
        if not title_tag:
            continue
        title_text=title_tag.get_text(strip=True)
        
        price_tag=product.find("span",{"class":"a-price-whole"})
        if not price_tag:
            continue
        price_text=price_tag.get_text(strip=True)

        match=re.match(r'^\W*([A-Za-z]+)',title_text)
        brand=match.group(1).upper() if match else 'UNKNOWN'

        
        rating_tag = product.find("span", {"class": "a-icon-alt"})
        if rating_tag:
            rating_match = re.search(r'(\d+\.?\d*)', rating_tag.get_text())
            rating = rating_match.group(1) if rating_match else "N/A"
        else:
            rating = "N/A"
        
        data.append({
            'Title':title_text,
            'Price':price_text,
            'Brand':brand,
            'Rating':rating,
        })
    print(f"page {page} scraped")
    time.sleep(1)

page 1 scraped


In [10]:
for product in data:
    print("Title:",product['Title'])
    print("Price:",product['Price'])
    print("Brand:",product['Brand'])
    print("Rating",product['Rating'])
    
    print('-'*50)