In [3]:
headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36",
    "Accept-Language": "en-IN,en-US;q=0.9,en;q=0.8",
    "Referer": "https://www.flipkart.com/"
}


1. HTTP headers are metadata sent with a web request that describe the client making the request, such as the browser type and language preferences.
2. Including headers (especially the User-Agent) helps the server identify the request as coming from a legitimate browser rather than a bot(bots are rejected) by servers
3. This is important in web scraping because many websites restrict or block requests that do not contain proper headers.

In [6]:
import requests, time, re
# requests: A Python library used to send HTTP requests and fetch web pages or APIs easily.

# time: A built-in Python module used to control execution timing, such as adding delays between requests to avoid overloading servers.

# re: A Python library for working with regular expressions, used to search, extract, and validate text patterns from data.

import pandas as pd
# pandas: A Python data analysis library used to store, clean, manipulate, and export structured data using DataFrames and Series.
from bs4 import BeautifulSoup
# BeautifulSoup (from bs4): A Python library used to parse HTML/XML documents and extract required data from web pages in a structured and readable way.

laptops = pd.DataFrame() # 

for page in range(1, 51): # to request and fetch all pages present
    print(f"Scraping page {page}...")  # just to check that scrapinf is on

    url = f"https://www.flipkart.com/laptops/pr?sid=6bo%2Cb5g&q=laptop&p%5B%5D=facets.price_range.from%3D40000&otracker=categorytree&p%5B%5D=facets.price_range.to%3D75000&p%5B%5D=facets.brand%255B%255D%3DHP&p%5B%5D=facets.brand%255B%255D%3DDELL&p%5B%5D=facets.brand%255B%255D%3DLenovo&p%5B%5D=facets.brand%255B%255D%3DASUS&p%5B%5D=facets.brand%255B%255D%3DAcer&p%5B%5D=facets.brand%255B%255D%3DApple&p%5B%5D=facets.brand%255B%255D%3DSamsung&sort=recency_desc&page={page}"
    # url here is a dynamic string to later make request to server for a particular page
    webpage = requests.get(url, headers=headers).text 
    #  webpage = requests.get(url, headers=headers).text: Sends an HTTP GET request to the given URL with browser-like headers and --
    #stores the returned HTML content of the webpage as text.
    soup = BeautifulSoup(webpage, "lxml")
    #  soup = BeautifulSoup(webpage, "lxml"): Parses the raw HTML content using the fast lxml parser and converts 
    #it into a searchable BeautifulSoup object for easy data extraction
    time.sleep(2) # pause the program for 2 sec to avoid overloading

    dabba = soup.find_all("div", class_="jIjQ8S") #Finds and stores all HTML div elements with class jIjQ8S, which represent individual laptop product cards on the webpage.
                                                    # it stored in form of list
    # ðŸš¨ Stop if page has no products
    if not dabba:
        print(f"No products found on page {page}. Stopping scraping.")
        break
    # created a dictionary value as empty list
    data = {
        "name": [],
        "avg_rating": [],
        "rating_count": [],
        "reviews_count": [],
        "processor": [],
        "ram": [],
        "os": [],
        "storage": [],
        "display": [],
        "others": [],
        "price": [],
        "discount": [],
        "flipkart_assured": [],
        "product_link": []
    }

    for item in dabba:

        # ---------- NAME ----------
        name_tag = item.find("div", class_="RG5Slk") # to Searches within a single product card get HTML element .that element is div element with given class 
        data["name"].append(name_tag.get_text(strip=True) if name_tag else None) # Extracts and stores the product name text if the HTML tag exists; otherwise, it appends None to keep the data aligned and avoid errors.
        # strip true is to remove extra blank spaces
        # ---------------------------------------------------
        
        # ---------- AVG RATING ----------
        rating_tag = item.find("div", class_="MKiFS6")
        data["avg_rating"].append(rating_tag.get_text(strip=True) if rating_tag else None)

        # ---------- RATING & REVIEWS ----------
        span = item.find("span", class_="PvbNMB")
        if span:
            nums = re.findall(r'[\d,]+', span.text)
            # Hereâ€™s how r'[\d,]+' works step-by-step, in a simple way:

            # The regex engine scans the text from left to right, one character at a time.

            # When it finds a character that is either a digit (0â€“9) or a comma, it starts a match.

            # The + tells it to keep consuming characters as long as they are digits or commas.

            # As soon as a character is found that is not a digit or comma (like a space or letter), the match stops.

            # This process repeats until the entire string is scanned.
            # num is a list of 2 elements
            data["rating_count"].append(int(nums[0].replace(',', '')) if len(nums) > 0 else 0) # if 1st element has a comma then remove it --> change its datatype to int. do this only if something(numbers are present there)  otherwise 0.
            data["reviews_count"].append(int(nums[1].replace(',', '')) if len(nums) > 1 else 0) # same as 1st element
        
        else: # to mention thaat there is no rating count and reviews 
            data["rating_count"].append(0)
            data["reviews_count"].append(0)

        # ---------- FEATURES ----------
        features = []
        ul = item.find("ul", class_="HwRTzP")
        if ul: # if ul has some HTML elements
            li_tags = ul.find_all("li", class_="DTBslk") 
            features = [li.text.strip() for li in li_tags] # features has a list of feature in 1 product card

        data["processor"].append(features[0] if len(features) > 0 else None) # 1st element of feature is processor detail which is extracted here and appended to processor column
        data["ram"].append(features[1] if len(features) > 1 else None) # same as processor
        data["os"].append(features[2] if len(features) > 2 else None) # same (2 else none to avoid error)
        data["storage"].append(features[3] if len(features) > 3 else None) # same 
        data["display"].append(features[4] if len(features) > 4 else None) # same
        data["others"].append(features[5:] if len(features) > 5 else []) # store rest features

        # ---------- PRICE ----------
        price_tag = item.find("div", class_="hZ3P6w DeU9vF")
        data["price"].append(price_tag.text.strip() if price_tag else None)

        # ---------- DISCOUNT ----------
        disc_tag = item.find("div", class_="HQe8jr")
        data["discount"].append(disc_tag.find("span").text.strip() if disc_tag and disc_tag.find("span") else None)

        # ---------- FLIPKART ASSURED ----------
        fa_div = item.find("div", class_="qYp2rh")
        data["flipkart_assured"].append(True if fa_div else False) # to mention if the product is flipkart assured or not

        # ---------- PRODUCT LINK ----------
        a_tag = item.find("a", class_="k7wcnx")
        data["product_link"].append(
            "https://www.flipkart.com" + a_tag["href"] if a_tag else None
        )

    page_df = pd.DataFrame(data)
    laptops = pd.concat([laptops, page_df], ignore_index=True)

print("Scraping completed âœ…")


Scraping page 1...
Scraping page 2...
Scraping page 3...
Scraping page 4...
Scraping page 5...
Scraping page 6...
Scraping page 7...
Scraping page 8...
Scraping page 9...
Scraping page 10...
Scraping page 11...
Scraping page 12...
Scraping page 13...
Scraping page 14...
Scraping page 15...
Scraping page 16...
Scraping page 17...
Scraping page 18...
Scraping page 19...
Scraping page 20...
Scraping page 21...
Scraping page 22...
Scraping page 23...
Scraping page 24...
Scraping page 25...
Scraping page 26...
Scraping page 27...
Scraping page 28...
Scraping page 29...
Scraping page 30...
Scraping page 31...
Scraping page 32...
Scraping page 33...
Scraping page 34...
Scraping page 35...
Scraping page 36...
Scraping page 37...
Scraping page 38...
Scraping page 39...
Scraping page 40...
Scraping page 41...
Scraping page 42...
No products found on page 42. Stopping scraping.
Scraping completed âœ…


In [8]:
len(laptops)

984

In [18]:
laptops

Unnamed: 0,name,avg_rating,rating_count,reviews_count,processor,ram,os,storage,display,others,price,discount,flipkart_assured,product_link
0,ASUS Expertbook P1 Intel Core i3 13th Gen 1315...,4.3,4716,399,Intel Core i3 Processor (13th Gen),16 GB DDR5 RAM,Windows 11 Home Operating System,512 GB SSD,35.56 cm (14 Inch) Display,[Included Software- Microsoft Office Home 2024...,"â‚¹55,990",25% off,False,https://www.flipkart.com/asus-expertbook-p1-in...
1,HP 15 Intel Core i3 13th Gen 1315U - (16 GB/51...,4.2,780,49,Intel Core i3 Processor (13th Gen),16 GB DDR4 RAM,Windows 11 Home Operating System,512 GB SSD,39.62 cm (15.6 Inch) Display,[1 Year Onsite Warranty],"â‚¹44,900",21% off,False,https://www.flipkart.com/hp-15-intel-core-i3-1...
2,DELL 15 Intel Core i3 13th Gen 1305U - (16 GB/...,4.2,663,46,Intel Core i3 Processor (13th Gen),16 GB DDR4 RAM,Windows 11 Home Operating System,512 GB SSD,39.62 cm (15.6 Inch) Display,"[Microsoft Office H&S 2024, My Dell (Dell Powe...","â‚¹40,490",23% off,True,https://www.flipkart.com/dell-15-intel-core-i3...
3,DELL 15 AMD Ryzen 5 Hexa Core 7530U - (16 GB/5...,4.3,2731,182,AMD Ryzen 5 Hexa Core Processor,16 GB DDR4 RAM,Windows 11 Operating System,512 GB SSD,39.62 cm (15.6 Inch) Display,"[My Dell (Dell Power Manager,Dell Support Assi...","â‚¹45,990",19% off,True,https://www.flipkart.com/dell-15-amd-ryzen-5-h...
4,ASUS Vivobook Go 15 (2025) with Office 2024 + ...,4.3,1160,55,AMD Ryzen 5 Quad Core Processor,16 GB LPDDR5 RAM,Windows 11 Home Operating System,512 GB SSD,39.62 cm (15.6 Inch) Display,[Microsoft Office Home 2024 (Lifetime Validity...,"â‚¹42,990",18% off,True,https://www.flipkart.com/asus-vivobook-go-15-2...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
979,Lenovo IdeaPad Slim 3 Intel Core i5 12th Gen 1...,4.1,1868,141,Intel Core i5 Processor (12th Gen),16 GB DDR4 RAM,64 bit Windows 11 Operating System,512 GB SSD,39.62 cm (15.6 Inch) Display,[2 Yr Warranty],"â‚¹51,990",37% off,False,https://www.flipkart.com/lenovo-ideapad-slim-3...
980,ASUS Vivobook 15 Intel Core i5 11th Gen 1135G7...,4.3,8906,619,Intel Core i5 Processor (11th Gen),8 GB DDR4 RAM,64 bit Windows 11 Operating System,512 GB SSD,39.62 cm (15.6 Inch) Display,[1 Year Onsite Warranty],"â‚¹55,873",15% off,False,https://www.flipkart.com/asus-vivobook-15-inte...
981,Lenovo IdeaPad Slim 5 Intel Core i5 12th Gen 1...,3.7,77,6,Intel Core i5 Processor (12th Gen),16 GB DDR4 RAM,64 bit Windows 11 Operating System,512 GB SSD,39.62 cm (15.6 Inch) Display,[1 Year Onsite warranty + 1 Year Accidental Da...,"â‚¹58,491",40% off,False,https://www.flipkart.com/lenovo-ideapad-slim-5...
982,DELL Inspiron AMD Ryzen 5 Hexa Core 5625U - (8...,3.9,68,10,Processor: R5-5625U (2.30 GHz up to 4.30 GHz),RAM & Storage: 8GB DDR4 & 512GB SSD,"Display: 15.6"" FHD WVA AG Narrow Border 120Hz ...",Software: Win 11 + Office H&S 2021,"Ports: 1 USB 3.2 Gen 1 port, 1 USB 3.2 Gen 1 T...","[AMD Ryzen 5 Hexa Core Processor, 8 GB DDR4 RA...","â‚¹54,501",21% off,True,https://www.flipkart.com/dell-inspiron-amd-ryz...


In [12]:
laptops

Unnamed: 0,name,avg_rating,rating_count,reviews_count,processor,ram,os,storage,display,others,price,discount,flipkart_assured,product_link
0,ASUS Expertbook P1 Intel Core i3 13th Gen 1315...,4.3,4716,399,Intel Core i3 Processor (13th Gen),16 GB DDR5 RAM,Windows 11 Home Operating System,512 GB SSD,35.56 cm (14 Inch) Display,[Included Software- Microsoft Office Home 2024...,"â‚¹55,990",25% off,False,https://www.flipkart.com/asus-expertbook-p1-in...
1,HP 15 Intel Core i3 13th Gen 1315U - (16 GB/51...,4.2,780,49,Intel Core i3 Processor (13th Gen),16 GB DDR4 RAM,Windows 11 Home Operating System,512 GB SSD,39.62 cm (15.6 Inch) Display,[1 Year Onsite Warranty],"â‚¹44,900",21% off,False,https://www.flipkart.com/hp-15-intel-core-i3-1...
2,DELL 15 Intel Core i3 13th Gen 1305U - (16 GB/...,4.2,663,46,Intel Core i3 Processor (13th Gen),16 GB DDR4 RAM,Windows 11 Home Operating System,512 GB SSD,39.62 cm (15.6 Inch) Display,"[Microsoft Office H&S 2024, My Dell (Dell Powe...","â‚¹40,490",23% off,True,https://www.flipkart.com/dell-15-intel-core-i3...
3,DELL 15 AMD Ryzen 5 Hexa Core 7530U - (16 GB/5...,4.3,2731,182,AMD Ryzen 5 Hexa Core Processor,16 GB DDR4 RAM,Windows 11 Operating System,512 GB SSD,39.62 cm (15.6 Inch) Display,"[My Dell (Dell Power Manager,Dell Support Assi...","â‚¹45,990",19% off,True,https://www.flipkart.com/dell-15-amd-ryzen-5-h...
4,ASUS Vivobook Go 15 (2025) with Office 2024 + ...,4.3,1160,55,AMD Ryzen 5 Quad Core Processor,16 GB LPDDR5 RAM,Windows 11 Home Operating System,512 GB SSD,39.62 cm (15.6 Inch) Display,[Microsoft Office Home 2024 (Lifetime Validity...,"â‚¹42,990",18% off,True,https://www.flipkart.com/asus-vivobook-go-15-2...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
979,Lenovo IdeaPad Slim 3 Intel Core i5 12th Gen 1...,4.1,1868,141,Intel Core i5 Processor (12th Gen),16 GB DDR4 RAM,64 bit Windows 11 Operating System,512 GB SSD,39.62 cm (15.6 Inch) Display,[2 Yr Warranty],"â‚¹51,990",37% off,False,https://www.flipkart.com/lenovo-ideapad-slim-3...
980,ASUS Vivobook 15 Intel Core i5 11th Gen 1135G7...,4.3,8906,619,Intel Core i5 Processor (11th Gen),8 GB DDR4 RAM,64 bit Windows 11 Operating System,512 GB SSD,39.62 cm (15.6 Inch) Display,[1 Year Onsite Warranty],"â‚¹55,873",15% off,False,https://www.flipkart.com/asus-vivobook-15-inte...
981,Lenovo IdeaPad Slim 5 Intel Core i5 12th Gen 1...,3.7,77,6,Intel Core i5 Processor (12th Gen),16 GB DDR4 RAM,64 bit Windows 11 Operating System,512 GB SSD,39.62 cm (15.6 Inch) Display,[1 Year Onsite warranty + 1 Year Accidental Da...,"â‚¹58,491",40% off,False,https://www.flipkart.com/lenovo-ideapad-slim-5...
982,DELL Inspiron AMD Ryzen 5 Hexa Core 5625U - (8...,3.9,68,10,Processor: R5-5625U (2.30 GHz up to 4.30 GHz),RAM & Storage: 8GB DDR4 & 512GB SSD,"Display: 15.6"" FHD WVA AG Narrow Border 120Hz ...",Software: Win 11 + Office H&S 2021,"Ports: 1 USB 3.2 Gen 1 port, 1 USB 3.2 Gen 1 T...","[AMD Ryzen 5 Hexa Core Processor, 8 GB DDR4 RA...","â‚¹54,501",21% off,True,https://www.flipkart.com/dell-inspiron-amd-ryz...


In [13]:
laptops.to_excel("flipkart_laptops.xlsx", index=False)