In [1]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import time

In [3]:
# Define headers to mimic a real browser
headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
}

# Base Flipkart search URL 
base_url = "https://www.flipkart.com/search?q=mobiles&otracker=search&otracker1=search&marketplace=FLIPKART&as-show=on&as=off"

# Define number of pages to scrape
max_pages = 150  # Increase pages to ensure 800+ rows
data = []  # List to store product data

for page in range(1, max_pages + 1):
    print(f"Scraping page {page}...")

    # Construct the URL for the current page
    search_url = base_url + str(page)
    
    # Send GET request
    response = requests.get(search_url, headers=headers)

    if response.status_code == 200:
        soup = BeautifulSoup(response.text, "html.parser")

        # Find product containers
        products = soup.find_all("div", class_="tUxRFH")

        # Extract product details
        for product in products:
            try:
                name = product.find("div", class_="KzDlHZ").text.strip() if product.find("div", class_="KzDlHZ") else "No Name"
                price = product.find("div", class_="Nx9bqj _4b5DiR").text.strip() if product.find("div", class_="Nx9bqj _4b5DiR") else "No Price"
                rating = product.find("div", class_="XQDdHH").text.strip() if product.find("div", class_="XQDdHH") else "No Rating"
                
                # Extract additional details
                discount = product.find("div", class_="UkUFwK").text.strip() if product.find("div", class_="UkUFwK") else "No Discount"
                reviews = product.find("span", class_="Wphh3N").text.split("&")[0].strip() if product.find("span", class_="Wphh3N") else "No Reviews"
                features = product.find_all("ul", class_="G4BRas")  # Feature container
                
                if features:
                    specs = features[0].find_all("li")  # List of specs inside the container
                    
                    storage = specs[0].text.strip() if len(specs) > 0 else "No Info"
                    processor = specs[1].text.strip() if len(specs) > 1 else "No Info"
                    camera = specs[2].text.strip() if len(specs) > 2 else "No Info"
                    battery = specs[3].text.strip() if len(specs) > 3 else "No Info"
                    display = specs[4].text.strip() if len(specs) > 4 else "No Info"
                else:
                    storage, processor, camera, battery, display = "No Info", "No Info", "No Info", "No Info", "No Info"

                # Append data to list
                data.append([name, price, rating, discount, reviews, storage, processor, camera, battery, display])
                
                # Stop once we reach 1500+ rows
                if len(data) >= 1500:
                    break
            except Exception as e:
                pass  # Skip if data is missing
        
        # Stop scraping if we have enough rows
        if len(data) >= 1500:
            break

        # Delay to avoid getting blocked
        time.sleep(2)

    else:
        print(f"Failed to fetch page {page}. Status code: {response.status_code}")
        break  # Stop if request fails

# Create DataFrame with 14+ columns
df = pd.DataFrame(data, columns=["Product Name", "Price", "Rating", "Discount", "Reviews", "Storage", "Processor", "Camera", "Battery", "Display"])

# Save to CSV
df.to_csv("flipkart_products_1501.csv", index=False)

print(f"Scraping complete! Total products scraped: {len(df)}")
print(df.head())

Scraping page 1...
Scraping page 2...
Scraping page 3...
Scraping page 4...
Scraping page 5...
Scraping page 6...
Scraping page 7...
Scraping page 8...
Scraping page 9...
Scraping page 10...
Scraping page 11...
Scraping page 12...
Scraping page 13...
Scraping page 14...
Scraping page 15...
Scraping page 16...
Scraping page 17...
Scraping page 18...
Scraping page 19...
Scraping page 20...
Scraping page 21...
Scraping page 22...
Scraping page 23...
Scraping page 24...
Scraping page 25...
Scraping page 26...
Scraping page 27...
Scraping page 28...
Scraping page 29...
Scraping page 30...
Scraping page 31...
Scraping page 32...
Scraping page 33...
Scraping page 34...
Scraping page 35...
Scraping page 36...
Scraping page 37...
Scraping page 38...
Scraping page 39...
Scraping page 40...
Scraping page 41...
Scraping page 42...
Scraping page 43...
Scraping page 44...
Scraping page 45...
Scraping page 46...
Scraping page 47...
Scraping page 48...
Scraping page 49...
Scraping page 50...
Scraping 