# Ajio Men Grooming Product Scraper
This Jupyter Notebook automates the extraction of product details from Ajio's Men's Grooming section. It uses Web Automation to Scrape infinite Scrolling on Ajio using Python and Selenium

In [101]:
# Import Required Libraries
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.keys import Keys
import time
import pandas as pd

# Path to ChromeDriver
path = r"C:\Users\mshiv\Downloads\chromedriver-win64\chromedriver-win64\chromedriver.exe"
service = Service(path)

# Initialize WebDriver
driver = webdriver.Chrome(service=service)

In [102]:
# Step 1: Go to Google and Search "Ajio Men Grooming"
driver.get("https://www.google.com/")
time.sleep(2)

# Locate the Google Search Box and Enter the Query
search_box = driver.find_element("xpath", '//*[@id="APjFqb"]')
search_box.send_keys("Ajio Men Grooming")
search_box.send_keys(Keys.ENTER)
time.sleep(3)

# Click the Ajio Website Link from the Search Results
driver.find_element("xpath", '//*[@id="AyvPbf"]/div/div/div[1]/div/div/span/a').click()
time.sleep(2)

# Step 2: Scroll Down to Load All Products
height = driver.execute_script("return document.body.scrollHeight")
while True:
    print("Page length scrolled",height)
    driver.execute_script("window.scrollTo(0, document.body.scrollHeight)")
    time.sleep(2)
    new_height = driver.execute_script("return document.body.scrollHeight")
    if height == new_height:
        break
    height = new_height

print("Scrolling completed!")

Page length scrolled 9756
Page length scrolled 27156
Page length scrolled 53256
Page length scrolled 79356
Page length scrolled 96756
Page length scrolled 114156
Page length scrolled 131556
Page length scrolled 148956
Page length scrolled 166356
Page length scrolled 183756
Page length scrolled 201156
Page length scrolled 218556
Page length scrolled 235956
Page length scrolled 253356
Page length scrolled 270756
Page length scrolled 288156
Page length scrolled 303236
Scrolling completed!


In [103]:
# Step 3: Locate All Parent Containers of the Products
product_containers = driver.find_elements("xpath", "//div[@class='preview']")

# Initialize Dataset and Counter for Missing Products
filtered_data = []
no_of_element_missed = 0

# Loop Through Each Product Container to Extract Data
for container in product_containers:
    try:
        # Extract Brand, Name, Rating, and Price
        brand = container.find_element("xpath", ".//div[@class='brand']").text.strip()
        name = container.find_element("xpath", ".//div[@class='nameCls']").text.strip()
        rating = container.find_element("xpath", ".//p[@class='_3I65V']").text.strip()
        price = container.find_element("xpath", ".//span[@class='price  ']//strong").text.strip()[1:]

        # Append Data to the Dataset
        filtered_data.append({
            "Product Name": name,
            "Brand": brand,
            "Rating": rating,
            "Price (INR)": price
        })
    except Exception as e:
        # Increment Counter for Missing Data
        no_of_element_missed += 1

print(f"Number of complete products: {len(filtered_data)}")
print(f"Number of products ignored due to missing elements: {no_of_element_missed}")

Number of complete products: 847
Number of products ignored due to missing elements: 715


In [104]:
# Step 4: Save Extracted Data to a dataframe
df=0
df = pd.DataFrame(filtered_data)
print(df)

                                          Product Name            Brand  \
0            Vegan Body Spray - Vetiver Bergamot Woods         SUPERDRY   
1    Power White Anti-Pollution Double Action Facew...          GARNIER   
2        Vegan Body Spray - Ceaderwood Mandar In Amber         SUPERDRY   
3                           Forest Spice Eau De Parfum       WILD STONE   
4                        Sport Vegan Revive Body Spray         SUPERDRY   
..                                                 ...              ...   
842                       Booster Essence Lotion 50 ml             MUJI   
843                                   Hand Soap Refill             MUJI   
844                               Men Thrill Deodorant             Envy   
845                     Jamaican Rum Perfumed Soap Set           Beardo   
846  Activated Charcoal Face Wash For Ace Oil Contr...  The Man Company   

    Rating Price (INR)  
0        4         200  
1      3.5         218  
2        4         200  

In [105]:
# Step 4: Save Extracted Data to a CSV File, Ensure output directory exists
import os
os.makedirs("output", exist_ok=True)
output_path = "output/ajio_men_grooming_products.csv"
df.to_csv(output_path, index=False)
print(f"Scraped data saved to {output_path}")

Scraped data saved to output/ajio_men_grooming_products.csv


In [106]:
# Step 5: Load the Data for Verification
df = pd.read_csv("output/ajio_men_grooming_products.csv")
df.head(10)  # Display the First 10 Records

Unnamed: 0,Product Name,Brand,Rating,Price (INR)
0,Vegan Body Spray - Vetiver Bergamot Woods,SUPERDRY,4.0,200
1,Power White Anti-Pollution Double Action Facew...,GARNIER,3.5,218
2,Vegan Body Spray - Ceaderwood Mandar In Amber,SUPERDRY,4.0,200
3,Forest Spice Eau De Parfum,WILD STONE,3.9,325
4,Sport Vegan Revive Body Spray,SUPERDRY,3.8,200
5,Vegan Body Spray - Moss Aqua Patchoul,SUPERDRY,3.9,200
6,Men's Beard Colour - B104 Natural Brown,BIGEN,4.4,383
7,Classic Black Eau de Toilette,JAGUAR,3.9,2132
8,3 Niacinamide Toner with Rice Water,Plum,4.3,420
9,Men Fresh Active Deodorant Spray,NIVEA,3.9,143
