**MOTOROLA**

In [9]:
from selenium import webdriver
from selenium.webdriver.common.by import By
import time
import pandas as pd

# URL of the website
url = "https://www.flipkart.com/search?q=motorola+mobile&otracker=search&otracker1=search&marketplace=FLIPKART&as-show=on&as=off&sort=recency_desc&p%5B%5D=facets.price_range.from%3D20000&p%5B%5D=facets.price_range.to%3DMax"
def initialize_driver():
    driver = webdriver.Chrome()  # Ensure ChromeDriver is in your PATH
    driver.maximize_window()
    return driver

def load_page(driver, url):
    driver.get(url)
    time.sleep(5)  # Wait for the page to load

# Function to scrape product names, links, and prices
def scrape_product_data(driver):
    product_names = [name.text for name in driver.find_elements(By.CLASS_NAME, 'KzDlHZ')]
    product_links = [link.get_attribute('href') for link in driver.find_elements(By.XPATH, '//a[@class="CGtC98"]')]
    product_prices = [price.text for price in driver.find_elements(By.CLASS_NAME, 'hl05eU')]  # Adjusted class name for prices
    
    # Return product data
    return product_names, product_links, product_prices

# Function to scrape multiple pages
def scrape_multiple_pages(driver, base_url, num_pages):
    all_product_names = []
    all_product_links = []
    all_product_prices = []
    
    for page in range(1, num_pages + 1):
        load_page(driver, f"{base_url}&page={page}")  # Update the URL to include the page number
        product_names, product_links, product_prices = scrape_product_data(driver)
        
        # Ensure the lists are of the same length before extending
        if len(product_names) == len(product_links) == len(product_prices):
            all_product_names.extend(product_names)
            all_product_links.extend(product_links)
            all_product_prices.extend(product_prices)
        else:
            print(f"Warning: Mismatched data on page {page}. Names: {len(product_names)}, Links: {len(product_links)}, Prices: {len(product_prices)}")

        time.sleep(5)  # Wait before loading the next page
    
    return all_product_names, all_product_links, all_product_prices

# Initialize WebDriver and scrape multiple pages
driver = initialize_driver()
all_product_names, all_product_links, all_product_prices = scrape_multiple_pages(driver, url, 4)  # Adjust number of pages as needed

# Close the driver
driver.quit()

# Create a DataFrame to store the results
df = pd.DataFrame({
    'Product_Name': all_product_names,
    'Product_Link': all_product_links,
    'Product_Price': all_product_prices  # Updated to include product prices
})

# Display or save the scraped data
df.head()  # Display the DataFrame
Output_path = r"C:\Users\HP\OneDrive\Documents\Desktop\Flipkart_scapping\motorola.csv"
df.to_csv(Output_path, index=False, encoding="utf-8-sig")

In [10]:
df

Unnamed: 0,Product_Name,Product_Link,Product_Price
0,MOTOROLA Edge 60 Fusion 5G (PANTONE Slipstream...,https://www.flipkart.com/motorola-edge-60-fusi...,"₹22,999\n₹25,99911% off"
1,"Motorola Edge 50 Fusion (Hot Pink, 256 GB)",https://www.flipkart.com/motorola-edge-50-fusi...,"₹20,999\n₹27,99925% off"
2,"Motorola Edge 50 Fusion (Forest Blue, 256 GB)",https://www.flipkart.com/motorola-edge-50-fusi...,"₹20,999\n₹27,99925% off"
3,"MOTOROLA Edge 60 Fusion 5G (PANTONE Zephyr, 25...",https://www.flipkart.com/motorola-edge-60-fusi...,"₹22,999\n₹25,99911% off"
4,"Motorola Edge 50 Fusion (Marshmallow Blue, 256...",https://www.flipkart.com/motorola-edge-50-fusi...,"₹20,999\n₹27,99925% off"
...,...,...,...
91,"MOTOROLA Moto G 5G (Frosted Silver, 128 GB)",https://www.flipkart.com/motorola-moto-g-5g-fr...,"₹24,999"
92,"MOTOROLA razr 40 (Vanilla Cream, 256 GB)",https://www.flipkart.com/motorola-razr-40-vani...,"₹45,999\n₹99,99954% off"
93,"MOTOROLA Razar 40 (Sage Green, 256 GB)",https://www.flipkart.com/motorola-razar-40-sag...,"₹88,888\n₹99,99911% off"
94,"Motorola G60 (Frosted Champagne, 128 GB)",https://www.flipkart.com/motorola-g60-frosted-...,"₹20,999\n₹21,9994% off"


In [11]:
df.info

<bound method DataFrame.info of                                          Product_Name  \
0   MOTOROLA Edge 60 Fusion 5G (PANTONE Slipstream...   
1          Motorola Edge 50 Fusion (Hot Pink, 256 GB)   
2       Motorola Edge 50 Fusion (Forest Blue, 256 GB)   
3   MOTOROLA Edge 60 Fusion 5G (PANTONE Zephyr, 25...   
4   Motorola Edge 50 Fusion (Marshmallow Blue, 256...   
..                                                ...   
91        MOTOROLA Moto G 5G (Frosted Silver, 128 GB)   
92           MOTOROLA razr 40 (Vanilla Cream, 256 GB)   
93             MOTOROLA Razar 40 (Sage Green, 256 GB)   
94           Motorola G60 (Frosted Champagne, 128 GB)   
95    MOTOROLA Razr 60 (PANTONE Lightest Sky, 256 GB)   

                                         Product_Link            Product_Price  
0   https://www.flipkart.com/motorola-edge-60-fusi...  ₹22,999\n₹25,99911% off  
1   https://www.flipkart.com/motorola-edge-50-fusi...  ₹20,999\n₹27,99925% off  
2   https://www.flipkart.com/motorola-ed

In [12]:
import re

def clean_price(price: str):
    """Convert price like '₹23,499' to integer 23499."""
    match = re.search(r'₹\s?([\d,]+)', str(price))
    return int(match.group(1).replace(',', '')) if match else None

df['Product_Price'] = df['Product_Price'].apply(clean_price)


In [13]:
# Define unwanted product names
products_to_remove = [
    "(Refurbished) MOTOROLA Edge 40 (Viva Magenta, 256 GB)",
    "(Refurbished) MOTOROLA Edge 50 Pro 5G (Luxe Lavender, 256 GB)",
    "(Refurbished) MOTOROLA Edge 30 Ultra (Interstellar Black, 256 GB)",
    "(Refurbished) MOTOROLA Edge 20 Pro 5G (Iridescent Cloud, 128 GB)"
]

# Remove rows where 'Product_Name' matches unwanted items
df = df.loc[~df["Product_Name"].isin(products_to_remove)].reset_index(drop=True)


In [14]:
df = df[df['Product_Price'] <= 40000]
Output_path = r"C:\Users\HP\OneDrive\Documents\Desktop\Flipkart_scapping\motorola_cleaned.csv"
df.to_csv(Output_path, index=False)

**ONEPLUS**

In [30]:
from selenium import webdriver
from selenium.webdriver.common.by import By
import time
import pandas as pd

# URL of the website
url = "https://www.flipkart.com/search?q=mobile+phone&sid=tyy%2C4io&as=on&as-show=on&otracker=AS_QueryStore_OrganicAutoSuggest_2_12_na_na_na&otracker1=AS_QueryStore_OrganicAutoSuggest_2_12_na_na_na&as-pos=2&as-type=HISTORY&suggestionId=mobile+phone%7CMobiles&requestId=c16475bf-836e-4831-8ef8-1cbd781451ed&p%5B%5D=facets.brand%255B%255D%3DOnePlus"
def initialize_driver():
    driver = webdriver.Chrome()  # Ensure ChromeDriver is in your PATH
    driver.maximize_window()
    return driver

def load_page(driver, url):
    driver.get(url)
    time.sleep(5)  # Wait for the page to load

# Function to scrape product names, links, and prices
def scrape_product_data(driver):
    product_names = [name.text for name in driver.find_elements(By.CLASS_NAME, 'KzDlHZ')]
    product_links = [link.get_attribute('href') for link in driver.find_elements(By.XPATH, '//a[@class="CGtC98"]')]
    product_prices = [price.text for price in driver.find_elements(By.CLASS_NAME, 'hl05eU')]  # Adjusted class name for prices
    
    # Return product data
    return product_names, product_links, product_prices

# Function to scrape multiple pages
def scrape_multiple_pages(driver, base_url, num_pages):
    all_product_names = []
    all_product_links = []
    all_product_prices = []
    
    for page in range(1, num_pages + 1):
        load_page(driver, f"{base_url}&page={page}")  # Update the URL to include the page number
        product_names, product_links, product_prices = scrape_product_data(driver)
        
        # Ensure the lists are of the same length before extending
        if len(product_names) == len(product_links) == len(product_prices):
            all_product_names.extend(product_names)
            all_product_links.extend(product_links)
            all_product_prices.extend(product_prices)
        else:
            print(f"Warning: Mismatched data on page {page}. Names: {len(product_names)}, Links: {len(product_links)}, Prices: {len(product_prices)}")

        time.sleep(5)  # Wait before loading the next page
    
    return all_product_names, all_product_links, all_product_prices

# Initialize WebDriver and scrape multiple pages
driver = initialize_driver()
all_product_names, all_product_links, all_product_prices = scrape_multiple_pages(driver, url, 4)  # Adjust number of pages as needed

# Close the driver
driver.quit()

# Create a DataFrame to store the results
df = pd.DataFrame({
    'Product_Name': all_product_names,
    'Product_Link': all_product_links,
    'Product_Price': all_product_prices  # Updated to include product prices
})

# Display or save the scraped data
df.head()  # Display the DataFrame
Output_path = r"C:\Users\HP\OneDrive\Documents\Desktop\Flipkart_scapping\oneplus.csv"
df.to_csv(Output_path, index=False)
df.to_csv('flipkart_scrape_oneplus.csv', index=False)  # Save the scraped data to a CSV file

In [31]:
df

Unnamed: 0,Product_Name,Product_Link,Product_Price
0,"OnePlus Nord CE5 5G (Marble Mist, 256 GB)",https://www.flipkart.com/oneplus-nord-ce5-5g-m...,"₹26,113\n₹26,9993% off"
1,"OnePlus Nord CE5 5G (Black Infinity, 128 GB)",https://www.flipkart.com/oneplus-nord-ce5-5g-b...,"₹24,113\n₹24,9993% off"
2,"OnePlus Nord CE4 lite 5G (MEGA BLUE, 128 GB)",https://www.flipkart.com/oneplus-nord-ce4-lite...,"₹16,815\n₹20,99919% off"
3,"OnePlus Nord CE4 lite 5G (SUPER SILVER, 128 GB)",https://www.flipkart.com/oneplus-nord-ce4-lite...,"₹16,860\n₹20,99919% off"
4,"OnePlus Nord CE4 lite 5G (ULTRA ORANGE, 128 GB)",https://www.flipkart.com/oneplus-nord-ce4-lite...,"₹16,980\n₹20,99919% off"
...,...,...,...
91,"OnePlus 11R 5G (Galactic Silver, 128 GB)",https://www.flipkart.com/oneplus-11r-5g-galact...,"₹28,999\n₹39,99927% off"
92,"OnePlus 12R (Sunset Dune, 256 GB)",https://www.flipkart.com/oneplus-12r-sunset-du...,"₹33,168\n₹42,99922% off"
93,"OnePlus 9 5G (Arctic Sky, 128 GB)",https://www.flipkart.com/oneplus-9-5g-arctic-s...,"₹19,499\n₹37,29047% off"
94,"OnePlus Nord CE 3 Lite 5G (Pastel Lime, 256 GB)",https://www.flipkart.com/oneplus-nord-ce-3-lit...,"₹17,580\n₹21,99920% off"


In [32]:
df.info

<bound method DataFrame.info of                                        Product_Name  \
0         OnePlus Nord CE5 5G (Marble Mist, 256 GB)   
1      OnePlus Nord CE5 5G (Black Infinity, 128 GB)   
2      OnePlus Nord CE4 lite 5G (MEGA BLUE, 128 GB)   
3   OnePlus Nord CE4 lite 5G (SUPER SILVER, 128 GB)   
4   OnePlus Nord CE4 lite 5G (ULTRA ORANGE, 128 GB)   
..                                              ...   
91         OnePlus 11R 5G (Galactic Silver, 128 GB)   
92                OnePlus 12R (Sunset Dune, 256 GB)   
93                OnePlus 9 5G (Arctic Sky, 128 GB)   
94  OnePlus Nord CE 3 Lite 5G (Pastel Lime, 256 GB)   
95        OnePlus Nord CE5 5G (Marble Mist, 256 GB)   

                                         Product_Link            Product_Price  
0   https://www.flipkart.com/oneplus-nord-ce5-5g-m...   ₹26,113\n₹26,9993% off  
1   https://www.flipkart.com/oneplus-nord-ce5-5g-b...   ₹24,113\n₹24,9993% off  
2   https://www.flipkart.com/oneplus-nord-ce4-lite...  ₹16,815\n

In [33]:
import re

def clean_price(price: str):
    """Convert price like '₹23,499' to integer 23499."""
    match = re.search(r'₹\s?([\d,]+)', str(price))
    return int(match.group(1).replace(',', '')) if match else None

df['Product_Price'] = df['Product_Price'].apply(clean_price)

In [34]:
df = df[(df['Product_Price'] > 20000) & (df['Product_Price'] <= 40000)]
Output_path = r"C:\Users\HP\OneDrive\Documents\Desktop\Flipkart_scapping\oneplus_cleaned.csv"
df.to_csv(Output_path, index=False)

**OPPO**

In [25]:
from selenium import webdriver
from selenium.webdriver.common.by import By
import time
import pandas as pd

# URL of the website
url = "https://www.flipkart.com/search?q=mobile+phone&sid=tyy%2C4io&as=on&as-show=on&otracker=AS_QueryStore_HistoryAutoSuggest_1_4_na_na_na&otracker1=AS_QueryStore_HistoryAutoSuggest_1_4_na_na_na&as-pos=1&as-type=HISTORY&suggestionId=mobile+phone%7CMobiles&requestId=9c7ecdfa-25d5-4ca3-8a0b-18f1e30dd1d4&p%5B%5D=facets.brand%255B%255D%3DOPPO"
def initialize_driver():
    driver = webdriver.Chrome()  # Ensure ChromeDriver is in your PATH
    driver.maximize_window()
    return driver

def load_page(driver, url):
    driver.get(url)
    time.sleep(5)  # Wait for the page to load

# Function to scrape product names, links, and prices
def scrape_product_data(driver):
    product_names = [name.text for name in driver.find_elements(By.CLASS_NAME, 'KzDlHZ')]
    product_links = [link.get_attribute('href') for link in driver.find_elements(By.XPATH, '//a[@class="CGtC98"]')]
    product_prices = [price.text for price in driver.find_elements(By.CLASS_NAME, 'hl05eU')]  # Adjusted class name for prices
    
    # Return product data
    return product_names, product_links, product_prices

# Function to scrape multiple pages
def scrape_multiple_pages(driver, base_url, num_pages):
    all_product_names = []
    all_product_links = []
    all_product_prices = []
    
    for page in range(1, num_pages + 1):
        load_page(driver, f"{base_url}&page={page}")  # Update the URL to include the page number
        product_names, product_links, product_prices = scrape_product_data(driver)
        
        # Ensure the lists are of the same length before extending
        if len(product_names) == len(product_links) == len(product_prices):
            all_product_names.extend(product_names)
            all_product_links.extend(product_links)
            all_product_prices.extend(product_prices)
        else:
            print(f"Warning: Mismatched data on page {page}. Names: {len(product_names)}, Links: {len(product_links)}, Prices: {len(product_prices)}")

        time.sleep(5)  # Wait before loading the next page
    
    return all_product_names, all_product_links, all_product_prices

# Initialize WebDriver and scrape multiple pages
driver = initialize_driver()
all_product_names, all_product_links, all_product_prices = scrape_multiple_pages(driver, url, 4)  # Adjust number of pages as needed

# Close the driver
driver.quit()

# Create a DataFrame to store the results
df = pd.DataFrame({
    'Product_Name': all_product_names,
    'Product_Link': all_product_links,
    'Product_Price': all_product_prices  # Updated to include product prices
})

# Display or save the scraped data
df.head()  # Display the DataFrame
Output_path = r"C:\Users\HP\OneDrive\Documents\Desktop\Flipkart_scapping\oppo.csv"
df.to_csv(Output_path, index=False, encoding="utf-8-sig")

In [26]:
df

Unnamed: 0,Product_Name,Product_Link,Product_Price
0,OPPO K13 5G with 7000mAh and 80W SUPERVOOC Cha...,https://www.flipkart.com/oppo-k13-5g-7000mah-8...,"₹19,999\n₹24,99920% off"
1,OPPO K13x 5G 6000mAh and 45W SUPERVOOC Charger...,https://www.flipkart.com/oppo-k13x-5g-6000mah-...,"₹11,999\n₹15,99925% off"
2,OPPO K13x 5G 6000mAh and 45W SUPERVOOC Charger...,https://www.flipkart.com/oppo-k13x-5g-6000mah-...,"₹12,999\n₹16,99923% off"
3,OPPO K13x 5G 6000mAh and 45W SUPERVOOC Charger...,https://www.flipkart.com/oppo-k13x-5g-6000mah-...,"₹12,999\n₹16,99923% off"
4,OPPO K13 5G with 7000mAh and 80W SUPERVOOC Cha...,https://www.flipkart.com/oppo-k13-5g-7000mah-8...,"₹17,999\n₹22,99921% off"
...,...,...,...
91,"OPPO Reno 12 Pro 5G (Space Brown, 512 GB)",https://www.flipkart.com/oppo-reno-12-pro-5g-s...,"₹33,600\n₹55,99939% off"
92,OPPO Reno 12 Pro 5G (Manish Malhotra Limited E...,https://www.flipkart.com/oppo-reno-12-pro-5g-m...,"₹31,990\n₹53,99940% off"
93,"OPPO F25 Pro 5G (Coral Purple, 128 GB)",https://www.flipkart.com/oppo-f25-pro-5g-coral...,"₹23,999\n₹28,99917% off"
94,"OPPO A58 (Glowing Black, 128 GB)",https://www.flipkart.com/oppo-a58-glowing-blac...,"₹12,999\n₹15,49916% off"


In [27]:
df.info

<bound method DataFrame.info of                                          Product_Name  \
0   OPPO K13 5G with 7000mAh and 80W SUPERVOOC Cha...   
1   OPPO K13x 5G 6000mAh and 45W SUPERVOOC Charger...   
2   OPPO K13x 5G 6000mAh and 45W SUPERVOOC Charger...   
3   OPPO K13x 5G 6000mAh and 45W SUPERVOOC Charger...   
4   OPPO K13 5G with 7000mAh and 80W SUPERVOOC Cha...   
..                                                ...   
91          OPPO Reno 12 Pro 5G (Space Brown, 512 GB)   
92  OPPO Reno 12 Pro 5G (Manish Malhotra Limited E...   
93             OPPO F25 Pro 5G (Coral Purple, 128 GB)   
94                   OPPO A58 (Glowing Black, 128 GB)   
95                   OPPO A5x (Midnight Blue, 128 GB)   

                                         Product_Link            Product_Price  
0   https://www.flipkart.com/oppo-k13-5g-7000mah-8...  ₹19,999\n₹24,99920% off  
1   https://www.flipkart.com/oppo-k13x-5g-6000mah-...  ₹11,999\n₹15,99925% off  
2   https://www.flipkart.com/oppo-k13x-5

In [28]:
import re

def clean_price(price: str):
    """Convert price like '₹23,499' to integer 23499."""
    match = re.search(r'₹\s?([\d,]+)', str(price))
    return int(match.group(1).replace(',', '')) if match else None

df['Product_Price'] = df['Product_Price'].apply(clean_price)

In [29]:
df = df[(df['Product_Price'] > 20000) & (df['Product_Price'] <= 40000)]
Output_path = r"C:\Users\HP\OneDrive\Documents\Desktop\Flipkart_scapping\oppo_cleaned.csv"
df.to_csv(Output_path, index=False)

*ONEPLUS DROPED*

In [35]:
import pandas as pd

# Load the CSV file containing product links
file_path = r"C:\Users\HP\OneDrive\Documents\Desktop\Flipkart_scapping\oneplus_cleaned.csv"
df = pd.read_csv(file_path)

# List of product names to be dropped
products_to_drop = [
    'OnePlus Nord (Gray Onyx, 256 GB)',
    'OnePlus Nord (Gray Onyx, 128 GB)',
    'OnePlus Nord (Gray Onyx, 64 GB)'
]

# Drop rows with the specified product names
df_cleaned = df[~df['Product_Name'].isin(products_to_drop)]

# Save the cleaned DataFrame back to a CSV file
cleaned_file_path = r"C:\Users\HP\OneDrive\Documents\Desktop\Flipkart_scapping\oneplus_cleaned_dropped.csv"
df_cleaned.to_csv(cleaned_file_path, index=False)

print(f"Rows with specified product names dropped and cleaned data saved to {cleaned_file_path}")

Rows with specified product names dropped and cleaned data saved to C:\Users\HP\OneDrive\Documents\Desktop\Flipkart_scapping\oneplus_cleaned_dropped.csv


In [2]:
import pandas as pd
import time
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException, StaleElementReferenceException

# Load the CSV file containing product links
file_path = r"C:\Users\HP\OneDrive\Documents\Desktop\Flipkart_scapping\oneplus_cleaned_dropped.csv"
df = pd.read_csv(file_path)

# Initialize the Chrome driver
driver = webdriver.Chrome()

# Function to extract reviews and ratings from a product page
def extract_reviews_and_ratings(driver):
    reviews = []
    ratings = []

    try:
        # Wait for the reviews section to load
        wait = WebDriverWait(driver, 20)
        wait.until(EC.presence_of_element_located((By.CLASS_NAME, 'ZmyHeo')))

        # Extract reviews
        review_elements = driver.find_elements(By.CLASS_NAME, "ZmyHeo")
        for element in review_elements:
            try:
                # Click "Read More" if available
                read_more = element.find_elements(By.CLASS_NAME, "b4x-fr")
                if read_more:
                    driver.execute_script("arguments[0].click();", read_more[0])
                    time.sleep(1)  # Wait for the full review to load
                
                reviews.append(element.text)
            except StaleElementReferenceException:
                continue

        # Extract star ratings
        rating_elements = driver.find_elements(By.CLASS_NAME, "XQDdHH.Ga3i8K")
        for i in range(max(len(reviews), len(rating_elements))):
            # Append ratings or None if not available
            if i < len(rating_elements):
                ratings.append(rating_elements[i].text)
            else:
                ratings.append(None)  # Placeholder for missing ratings

    except TimeoutException:
        print("Timed out waiting for reviews to load")

    return reviews, ratings


# Function to load the page with the correct page number in the URL
def load_page(driver, url):
    driver.get(url)
    try:
        wait = WebDriverWait(driver, 20)
        wait.until(EC.presence_of_element_located((By.CLASS_NAME, 'ZmyHeo')))
    except TimeoutException:
        print(f"Timed out waiting for page {url} to load")

# Scrape reviews and ratings for all product links
all_data = []

num_pages_reviews = 20  # Number of review pages to scrape per product

# Loop through each product link in the DataFrame
for index, product_link in df['Product_Link'].items():
    if pd.isna(product_link) or product_link.strip() == "":
        print(f"Skipping empty product link at index {index + 1}.")
        continue

    print(f"Scraping product {index + 1}/{len(df)}: {product_link}")
    
    # Scrape reviews for the current product
    driver.get(product_link)
    time.sleep(5)  # Wait for the product page to load

    # Click on the 'All Reviews' button if it exists
    try:
        wait = WebDriverWait(driver, 20)
        all_reviews_button = wait.until(EC.element_to_be_clickable((By.CLASS_NAME, '_23J90q.RcXBOT')))
        all_reviews_button.click()
        time.sleep(5)  # Wait for the reviews page to load
    except TimeoutException:
        print(f"Timed out waiting for 'All Reviews' button to load for {product_link}")
    except Exception as e:
        print(f"Could not find 'All Reviews' button for {product_link}: {e}")

     # Scrape reviews and ratings across multiple review pages
    for page in range(1, num_pages_reviews + 1):
        if page > 1:
            # Update the URL to navigate to the next page of reviews
            page_url = f"{driver.current_url}&page={page}"
            load_page(driver, page_url)

        reviews, ratings = extract_reviews_and_ratings(driver)
        
        # Check if reviews are empty, and if so, stop scraping further pages and move to the next product
        if not reviews:
            print(f"No reviews found on page {page} for {product_link}. Moving to next product.")
            break

        # Append reviews and ratings to the all_data list along with the product link
        for review, rating in zip(reviews, ratings):
            all_data.append({
                'Product_Link': product_link,
                'Review': review,
                'Rating': rating
            })

        time.sleep(5)  # Wait before loading the next reviews page

# Close the driver after scraping is complete
driver.quit()

# Convert the collected data into a DataFrame and save it as a CSV file
result_df = pd.DataFrame(all_data)

# Save to CSV
output_file = r"C:\Users\HP\OneDrive\Documents\Desktop\Flipkart_scapping\oneplus_reviews_ratings.csv"
result_df.to_csv(output_file, index=False)

print(f"Scraping complete. Data saved to {output_file}")

Scraping product 1/63: https://www.flipkart.com/oneplus-nord-ce5-5g-marble-mist-256-gb/p/itm9259708fe4e3c?pid=MOBHEYWZXBFXUBZZ&lid=LSTMOBHEYWZXBFXUBZZS9IZTZ&marketplace=FLIPKART&q=mobile+phone&store=tyy%2F4io&srno=s_1_1&otracker=AS_QueryStore_OrganicAutoSuggest_2_12_na_na_na&otracker1=AS_QueryStore_OrganicAutoSuggest_2_12_na_na_na&fm=organic&iid=bd31c0d0-7dd2-4e4f-a5e9-d8c001350c52.MOBHEYWZXBFXUBZZ.SEARCH&ppt=None&ppn=None&ssid=j4nuaavceo0000001757344335000&qH=37695f7554f510f0
Timed out waiting for page https://www.flipkart.com/oneplus-nord-ce5-5g-marble-mist-256-gb/product-reviews/itm9259708fe4e3c?pid=MOBHEYWZXBFXUBZZ&lid=LSTMOBHEYWZXBFXUBZZS9IZTZ&marketplace=FLIPKART&page=2&page=3&page=4&page=5&page=6&page=7&page=8&page=9&page=10&page=11 to load
Timed out waiting for reviews to load
No reviews found on page 11 for https://www.flipkart.com/oneplus-nord-ce5-5g-marble-mist-256-gb/p/itm9259708fe4e3c?pid=MOBHEYWZXBFXUBZZ&lid=LSTMOBHEYWZXBFXUBZZS9IZTZ&marketplace=FLIPKART&q=mobile+phone&st

In [3]:
result_df

Unnamed: 0,Product_Link,Review,Rating
0,https://www.flipkart.com/oneplus-nord-ce5-5g-m...,Very good product.\nI am using Nord CE2 since ...,5
1,https://www.flipkart.com/oneplus-nord-ce5-5g-m...,Very good product,5
2,https://www.flipkart.com/oneplus-nord-ce5-5g-m...,OnePlus Quality Good,5
3,https://www.flipkart.com/oneplus-nord-ce5-5g-m...,The screen quality is immersing and the sound ...,5
4,https://www.flipkart.com/oneplus-nord-ce5-5g-m...,Excellent,5
...,...,...,...
7415,https://www.flipkart.com/oneplus-nord-ce5-5g-m...,Overall good 👍,4
7416,https://www.flipkart.com/oneplus-nord-ce5-5g-m...,"Some app not working properly, crashed, batter...",3
7417,https://www.flipkart.com/oneplus-nord-ce5-5g-m...,Nice,5
7418,https://www.flipkart.com/oneplus-nord-ce5-5g-m...,"Just go for it, no hitting issue.",5


*OPPO DROPED*

In [5]:
import pandas as pd
import time
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException, StaleElementReferenceException

# File paths
input_file = r"C:\Users\HP\OneDrive\Documents\Desktop\Flipkart_scapping\oppo_cleaned.csv"
output_file = r"C:\Users\HP\OneDrive\Documents\Desktop\Flipkart_scapping\oppo_reviews_ratings.csv"

# Load product links
df = pd.read_csv(input_file)

# Initialize Chrome driver
driver = webdriver.Chrome()

# Function to extract reviews and ratings
def extract_reviews_and_ratings(driver):
    reviews, ratings = [], []
    try:
        wait = WebDriverWait(driver, 10)
        wait.until(EC.presence_of_all_elements_located((By.CLASS_NAME, "ZmyHeo")))

        review_elements = driver.find_elements(By.CLASS_NAME, "ZmyHeo")
        rating_elements = driver.find_elements(By.CLASS_NAME, "XQDdHH")

        for i, review_element in enumerate(review_elements):
            try:
                # Click "Read More" if available
                read_more = review_element.find_elements(By.CLASS_NAME, "b4x-fr")
                if read_more:
                    driver.execute_script("arguments[0].click();", read_more[0])
                    time.sleep(1)
                reviews.append(review_element.text)
            except StaleElementReferenceException:
                reviews.append("")

            # Match rating with review if available
            if i < len(rating_elements):
                ratings.append(rating_elements[i].text)
            else:
                ratings.append(None)

    except TimeoutException:
        print("⚠️ No reviews found on this page.")

    return reviews, ratings


# Function to safely load a page
def load_page(url):
    try:
        driver.get(url)
        WebDriverWait(driver, 10).until(
            EC.presence_of_element_located((By.CLASS_NAME, "ZmyHeo"))
        )
        return True
    except TimeoutException:
        print(f"⚠️ Timeout while loading {url}")
        return False


# Scrape data
all_data = []
num_pages_reviews = 20  # reviews pages per product

for idx, product_link in df['Product_Link'].items():
    if pd.isna(product_link) or not product_link.strip():
        print(f"⏩ Skipping empty link at index {idx + 1}")
        continue

    print(f"\n🔎 Scraping product {idx + 1}/{len(df)}: {product_link}")
    driver.get(product_link)
    time.sleep(3)

    # Click "All Reviews" button
    try:
        all_reviews_button = WebDriverWait(driver, 10).until(
            EC.element_to_be_clickable((By.CLASS_NAME, "_23J90q"))
        )
        all_reviews_button.click()
        time.sleep(3)
    except Exception:
        print("⚠️ Could not open 'All Reviews' page. Skipping product.")
        continue

    # Loop through review pages
    base_url = driver.current_url.split("&page=")[0]
    for page in range(1, num_pages_reviews + 1):
        page_url = f"{base_url}&page={page}" if "page=" not in base_url else f"{base_url.split('&page=')[0]}&page={page}"
        if not load_page(page_url):
            break

        reviews, ratings = extract_reviews_and_ratings(driver)
        if not reviews:
            print(f"⚠️ No reviews on page {page}, moving to next product.")
            break

        for review, rating in zip(reviews, ratings):
            all_data.append({
                "Product_Link": product_link,
                "Review": review,
                "Rating": rating
            })

        time.sleep(2)  # be gentle with Flipkart’s servers


# Save results
driver.quit()
result_df = pd.DataFrame(all_data)
result_df.to_csv(output_file, index=False, encoding="utf-8-sig")

print(f"\n✅ Scraping complete. Data saved to {output_file}")



🔎 Scraping product 1/37: https://www.flipkart.com/oppo-reno-13-5g-luminous-blue-128-gb/p/itm9cf979582f79d?pid=MOBHCTVGEHCVF3XU&lid=LSTMOBHCTVGEHCVF3XUYN9IAZ&marketplace=FLIPKART&q=mobile+phone&store=tyy%2F4io&srno=s_1_12&otracker=AS_QueryStore_HistoryAutoSuggest_1_4_na_na_na&otracker1=AS_QueryStore_HistoryAutoSuggest_1_4_na_na_na&fm=organic&iid=440096b1-0699-499c-b05d-f908bed9c615.MOBHCTVGEHCVF3XU.SEARCH&ppt=None&ppn=None&ssid=vp4s3olaf40000001757344239567&qH=37695f7554f510f0
⚠️ Timeout while loading https://www.flipkart.com/oppo-reno-13-5g-luminous-blue-128-gb/product-reviews/itm9cf979582f79d?pid=MOBHCTVGEHCVF3XU&lid=LSTMOBHCTVGEHCVF3XU0GKWBQ&marketplace=FLIPKART&page=2

🔎 Scraping product 2/37: https://www.flipkart.com/oppo-f27-pro-dusk-pink-256-gb/p/itm5235c3c836cc9?pid=MOBHF3DJ8CREZW7M&lid=LSTMOBHF3DJ8CREZW7MP8G0L1&marketplace=FLIPKART&q=mobile+phone&store=tyy%2F4io&srno=s_1_13&otracker=AS_QueryStore_HistoryAutoSuggest_1_4_na_na_na&otracker1=AS_QueryStore_HistoryAutoSuggest_1_4_na

In [6]:
result_df

Unnamed: 0,Product_Link,Review,Rating
0,https://www.flipkart.com/oppo-reno-13-5g-lumin...,"Solid phone good touch and feel in hand, very ...",4.4
1,https://www.flipkart.com/oppo-reno-13-5g-lumin...,Exceelent,5
2,https://www.flipkart.com/oppo-reno-13-5g-lumin...,Awesome under 27k. It's a balanced pakage.,5
3,https://www.flipkart.com/oppo-reno-13-5g-lumin...,Excellent Phone My choice,5
4,https://www.flipkart.com/oppo-f27-pro-dusk-pin...,I bought this under 17k. So in this range this...,4.4
...,...,...,...
3441,https://www.flipkart.com/oppo-f25-pro-5g-coral...,Nice,5
3442,https://www.flipkart.com/oppo-f25-pro-5g-coral...,Nice product under 24k .... overall good 👍,5
3443,https://www.flipkart.com/oppo-f25-pro-5g-coral...,Good mobile,5
3444,https://www.flipkart.com/oppo-f25-pro-5g-coral...,Very good product,5


*MOTOROLA DROPPED*

In [2]:
import pandas as pd
import time
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException, StaleElementReferenceException

# File paths
input_file = r"C:\Users\HP\OneDrive\Documents\Desktop\Flipkart_scapping\motorola_cleaned.csv"
output_file = r"C:\Users\HP\OneDrive\Documents\Desktop\Flipkart_scapping\motorola_reviews_ratings.csv"

# Load product links
df = pd.read_csv(input_file)

# Initialize Chrome driver
driver = webdriver.Chrome()

# Function to extract reviews and ratings
def extract_reviews_and_ratings(driver):
    reviews, ratings = [], []
    try:
        wait = WebDriverWait(driver, 15)
        wait.until(EC.presence_of_all_elements_located((By.CLASS_NAME, "ZmyHeo")))

        review_elements = driver.find_elements(By.CLASS_NAME, "ZmyHeo")
        rating_elements = driver.find_elements(By.CLASS_NAME, "XQDdHH")

        for i, review_element in enumerate(review_elements):
            try:
                # Expand "Read More" if present
                read_more = review_element.find_elements(By.CLASS_NAME, "b4x-fr")
                if read_more:
                    driver.execute_script("arguments[0].click();", read_more[0])
                    time.sleep(1)
                reviews.append(review_element.text)
            except StaleElementReferenceException:
                reviews.append("")

            # Match rating if available
            if i < len(rating_elements):
                ratings.append(rating_elements[i].text)
            else:
                ratings.append(None)

    except TimeoutException:
        print("⚠️ No reviews found on this page.")

    return reviews, ratings


# Function to safely load a page
def load_page(url):
    try:
        driver.get(url)
        WebDriverWait(driver, 15).until(
            EC.presence_of_element_located((By.CLASS_NAME, "ZmyHeo"))
        )
        return True
    except TimeoutException:
        print(f"⚠️ Timeout while loading {url}")
        return False


# Scrape data
all_data = []
num_pages_reviews = 5  # reviews pages per product

for idx, product_link in df['Product_Link'].items():
    if pd.isna(product_link) or not product_link.strip():
        print(f"⏩ Skipping empty link at index {idx + 1}")
        continue

    print(f"\n🔎 Scraping product {idx + 1}/{len(df)}: {product_link}")
    driver.get(product_link)
    time.sleep(3)

    # Click "All Reviews" button
    try:
        all_reviews_button = WebDriverWait(driver, 15).until(
            EC.element_to_be_clickable((By.CLASS_NAME, "_23J90q"))
        )
        all_reviews_button.click()
        time.sleep(3)
    except Exception:
        print("⚠️ Could not open 'All Reviews' page. Skipping product.")
        continue

    # Loop through review pages
    base_url = driver.current_url.split("&page=")[0]
    for page in range(1, num_pages_reviews + 1):
        page_url = f"{base_url}&page={page}" if "page=" not in base_url else f"{base_url.split('&page=')[0]}&page={page}"
        if not load_page(page_url):
            break

        reviews, ratings = extract_reviews_and_ratings(driver)
        if not reviews:
            print(f"⚠️ No reviews on page {page}, moving to next product.")
            break

        for review, rating in zip(reviews, ratings):
            all_data.append({
                "Product_Link": product_link,
                "Review": review,
                "Rating": rating
            })

        time.sleep(2)  # Be gentle with Flipkart’s servers


# Save results
driver.quit()
result_df = pd.DataFrame(all_data)
result_df.to_csv(output_file, index=False, encoding="utf-8-sig")

print(f"\n✅ Scraping complete. Data saved to {output_file}")



🔎 Scraping product 1/78: https://www.flipkart.com/motorola-edge-60-fusion-5g-pantone-slipstream-256-gb/p/itm8553dc1ee56ee?pid=MOBH9ARFZHXSRYMA&lid=LSTMOBH9ARFZHXSRYMAFEL0WA&marketplace=FLIPKART&q=motorola+mobile&store=tyy%2F4io&spotlightTagId=default_BestsellerId_tyy%2F4io&srno=s_1_1&otracker=search&otracker1=search&fm=Search&iid=094fe9f8-d5bb-4184-aadb-1fc48f2ba215.MOBH9ARFZHXSRYMA.SEARCH&ppt=sp&ppn=sp&ssid=cre9khnhio0000001757343043826&qH=979dd593b8c05a74

🔎 Scraping product 2/78: https://www.flipkart.com/motorola-edge-50-fusion-hot-pink-256-gb/p/itm93d002e8a2eb1?pid=MOBGXTYZZBUPYFEC&lid=LSTMOBGXTYZZBUPYFECU3Q95H&marketplace=FLIPKART&q=motorola+mobile&store=tyy%2F4io&srno=s_1_2&otracker=search&otracker1=search&fm=organic&iid=094fe9f8-d5bb-4184-aadb-1fc48f2ba215.MOBGXTYZZBUPYFEC.SEARCH&ppt=None&ppn=None&ssid=cre9khnhio0000001757343043826&qH=979dd593b8c05a74

🔎 Scraping product 3/78: https://www.flipkart.com/motorola-edge-50-fusion-forest-blue-256-gb/p/itme13bbe694468e?pid=MOBGXTYZZEW

In [3]:
result_df

Unnamed: 0,Product_Link,Review,Rating
0,https://www.flipkart.com/motorola-edge-60-fusi...,Nice product 🥰🥰🥰,4.5
1,https://www.flipkart.com/motorola-edge-60-fusi...,Everything is good but Motorola should also la...,5
2,https://www.flipkart.com/motorola-edge-60-fusi...,Very good image quality 😁😍😍,5
3,https://www.flipkart.com/motorola-edge-60-fusi...,Phone is good but there is less one gallery ap...,5
4,https://www.flipkart.com/motorola-edge-60-fusi...,Super fully satisfied,4
...,...,...,...
3715,https://www.flipkart.com/motorola-g60-frosted-...,"Pros :\n1) Stock android experience, no ads, n...",4
3716,https://www.flipkart.com/motorola-g60-frosted-...,Best camera.,4
3717,https://www.flipkart.com/motorola-g60-frosted-...,It's the best phone you can get under 20k. inb...,5
3718,https://www.flipkart.com/motorola-g60-frosted-...,Recently I have purchased this mobile because ...,5


**HONOR**

In [4]:
from selenium import webdriver
from selenium.webdriver.common.by import By
import time
import pandas as pd

# URL of the website
url = "https://www.flipkart.com/search?q=mobile+phone&sid=tyy%2C4io&as=on&as-show=on&otracker=AS_QueryStore_HistoryAutoSuggest_1_4_na_na_na&otracker1=AS_QueryStore_HistoryAutoSuggest_1_4_na_na_na&as-pos=1&as-type=HISTORY&suggestionId=mobile+phone%7CMobiles&requestId=9c7ecdfa-25d5-4ca3-8a0b-18f1e30dd1d4&p%5B%5D=facets.brand%255B%255D%3DHonor"
def initialize_driver():
    driver = webdriver.Chrome()  # Ensure ChromeDriver is in your PATH
    driver.maximize_window()
    return driver

def load_page(driver, url):
    driver.get(url)
    time.sleep(5)  # Wait for the page to load

# Function to scrape product names, links, and prices
def scrape_product_data(driver):
    product_names = [name.text for name in driver.find_elements(By.CLASS_NAME, 'KzDlHZ')]
    product_links = [link.get_attribute('href') for link in driver.find_elements(By.XPATH, '//a[@class="CGtC98"]')]
    product_prices = [price.text for price in driver.find_elements(By.CLASS_NAME, 'hl05eU')]  # Adjusted class name for prices
    
    # Return product data
    return product_names, product_links, product_prices

# Function to scrape multiple pages
def scrape_multiple_pages(driver, base_url, num_pages):
    all_product_names = []
    all_product_links = []
    all_product_prices = []
    
    for page in range(1, num_pages + 1):
        load_page(driver, f"{base_url}&page={page}")  # Update the URL to include the page number
        product_names, product_links, product_prices = scrape_product_data(driver)
        
        # Ensure the lists are of the same length before extending
        if len(product_names) == len(product_links) == len(product_prices):
            all_product_names.extend(product_names)
            all_product_links.extend(product_links)
            all_product_prices.extend(product_prices)
        else:
            print(f"Warning: Mismatched data on page {page}. Names: {len(product_names)}, Links: {len(product_links)}, Prices: {len(product_prices)}")

        time.sleep(5)  # Wait before loading the next page
    
    return all_product_names, all_product_links, all_product_prices

# Initialize WebDriver and scrape multiple pages
driver = initialize_driver()
all_product_names, all_product_links, all_product_prices = scrape_multiple_pages(driver, url, 4)  # Adjust number of pages as needed

# Close the driver
driver.quit()

# Create a DataFrame to store the results
df = pd.DataFrame({
    'Product_Name': all_product_names,
    'Product_Link': all_product_links,
    'Product_Price': all_product_prices  # Updated to include product prices
})

# Display or save the scraped data
df.head()  # Display the DataFrame
Output_path = r"C:\Users\HP\OneDrive\Documents\Desktop\Flipkart_scapping\honor.csv"
df.to_csv(Output_path, index=False)

In [8]:
df = pd.read_csv(r"C:\Users\HP\OneDrive\Documents\Desktop\Flipkart_scapping\honor.csv")

In [9]:
import re

# Function to extract and clean the price (remove rupee symbol and commas)
def extract_clean_price(price_string):
    # Find the rupee symbol followed by the price
    match = re.search(r'₹(\d[\d,]*)', price_string)
    if match:
        # Remove the rupee symbol and commas, and convert to an integer
        return int(match.group(1).replace(',', ''))
    return None

# Apply the function to the 'Price' column
df['Product_Price'] = df['Product_Price'].apply(extract_clean_price)

In [10]:
df = df[df['Product_Price'] <= 40000]
Output_path = r"C:\Users\HP\OneDrive\Documents\Desktop\Flipkart_scapping\honor_cleaned.csv"
df.to_csv(Output_path, index=False)

*HONOR DROPPED*

In [32]:
import pandas as pd
import time
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException, StaleElementReferenceException

# File paths (honor)
input_file = r"C:\Users\HP\OneDrive\Documents\Desktop\Flipkart_scapping\honor_cleaned.csv"
output_file = r"C:\Users\HP\OneDrive\Documents\Desktop\Flipkart_scapping\honor_reviews_ratings.csv"

# Load product links
df = pd.read_csv(input_file)

# Initialize Chrome driver
driver = webdriver.Chrome()

# Function to extract reviews and ratings
def extract_reviews_and_ratings(driver):
    reviews, ratings = [], []
    try:
        wait = WebDriverWait(driver, 15)
        wait.until(EC.presence_of_all_elements_located((By.CLASS_NAME, "ZmyHeo")))

        review_elements = driver.find_elements(By.CLASS_NAME, "ZmyHeo")
        rating_elements = driver.find_elements(By.CLASS_NAME, "XQDdHH")

        for i, review_element in enumerate(review_elements):
            try:
                # Expand "Read More" if present
                read_more = review_element.find_elements(By.CLASS_NAME, "b4x-fr")
                if read_more:
                    driver.execute_script("arguments[0].click();", read_more[0])
                    time.sleep(1)
                reviews.append(review_element.text)
            except StaleElementReferenceException:
                reviews.append("")

            # Match rating if available
            if i < len(rating_elements):
                ratings.append(rating_elements[i].text)
            else:
                ratings.append(None)

    except TimeoutException:
        print("⚠️ No reviews found on this page.")

    return reviews, ratings


# Function to safely load a page
def load_page(url):
    try:
        driver.get(url)
        WebDriverWait(driver, 15).until(
            EC.presence_of_element_located((By.CLASS_NAME, "ZmyHeo"))
        )
        return True
    except TimeoutException:
        print(f"⚠️ Timeout while loading {url}")
        return False


# Scrape data
all_data = []
num_pages_reviews = 5  # reviews pages per product

for idx, product_link in df['Product_Link'].items():
    if pd.isna(product_link) or not product_link.strip():
        print(f"⏩ Skipping empty link at index {idx + 1}")
        continue

    print(f"\n🔎 Scraping product {idx + 1}/{len(df)}: {product_link}")
    driver.get(product_link)
    time.sleep(3)

    # Click "All Reviews" button
    try:
        all_reviews_button = WebDriverWait(driver, 15).until(
            EC.element_to_be_clickable((By.CLASS_NAME, "_23J90q"))
        )
        all_reviews_button.click()
        time.sleep(3)
    except Exception:
        print("⚠️ Could not open 'All Reviews' page. Skipping product.")
        continue

    # Loop through review pages
    base_url = driver.current_url.split("&page=")[0]
    for page in range(1, num_pages_reviews + 1):
        page_url = f"{base_url}&page={page}" if "page=" not in base_url else f"{base_url.split('&page=')[0]}&page={page}"
        if not load_page(page_url):
            break

        reviews, ratings = extract_reviews_and_ratings(driver)
        if not reviews:
            print(f"⚠️ No reviews on page {page}, moving to next product.")
            break

        for review, rating in zip(reviews, ratings):
            all_data.append({
                "Product_Link": product_link,
                "Review": review,
                "Rating": rating
            })

        time.sleep(2)  # Be gentle with Flipkart’s servers


# Save results
driver.quit()
result_df = pd.DataFrame(all_data)
result_df.to_csv(output_file, index=False, encoding="utf-8-sig")

print(f"\n✅ Scraping complete. Data saved to {output_file}")



🔎 Scraping product 1/38: https://www.flipkart.com/honor-200-5g-moonlight-white-512-gb/p/itm6c73c53970f43?pid=MOBH32HGXDBGQNBT&lid=LSTMOBH32HGXDBGQNBTLU7OIM&marketplace=FLIPKART&q=mobile+phone&store=tyy%2F4io&srno=s_1_1&otracker=AS_QueryStore_HistoryAutoSuggest_1_4_na_na_na&otracker1=AS_QueryStore_HistoryAutoSuggest_1_4_na_na_na&fm=organic&iid=45733c24-1b3d-4299-b434-a9842e7e3ce8.MOBH32HGXDBGQNBT.SEARCH&ppt=None&ppn=None&ssid=pp6fyyauds0000001757426938552&qH=37695f7554f510f0
⚠️ Timeout while loading https://www.flipkart.com/honor-200-5g-moonlight-white-512-gb/product-reviews/itm6c73c53970f43?pid=MOBH32HGXDBGQNBT&lid=LSTMOBH32HGXDBGQNBTLU7OIM&marketplace=FLIPKART&page=2

🔎 Scraping product 2/38: https://www.flipkart.com/honor-200-5g-black-512-gb/p/itm6c73c53970f43?pid=MOBH34RU4S8Q5CBJ&lid=LSTMOBH34RU4S8Q5CBJI5PQRX&marketplace=FLIPKART&q=mobile+phone&store=tyy%2F4io&srno=s_1_2&otracker=AS_QueryStore_HistoryAutoSuggest_1_4_na_na_na&otracker1=AS_QueryStore_HistoryAutoSuggest_1_4_na_na_na&f

In [33]:
result_df

Unnamed: 0,Product_Link,Review,Rating
0,https://www.flipkart.com/honor-200-5g-moonlight-white-512-gb/p/itm6c73c53970f43?pid=MOBH32HGXDBGQNBT&lid=LSTMOBH32HGXDBGQNBTLU7OIM&marketplace=FLIPKART&q=mobile+phone&store=tyy%2F4io&srno=s_1_1&otracker=AS_QueryStore_HistoryAutoSuggest_1_4_na_na_na&otracker1=AS_QueryStore_HistoryAutoSuggest_1_4_na_na_na&fm=organic&iid=45733c24-1b3d-4299-b434-a9842e7e3ce8.MOBH32HGXDBGQNBT.SEARCH&ppt=None&ppn=None&ssid=pp6fyyauds0000001757426938552&qH=37695f7554f510f0,Lovely camera,4.4
1,https://www.flipkart.com/honor-200-5g-moonlight-white-512-gb/p/itm6c73c53970f43?pid=MOBH32HGXDBGQNBT&lid=LSTMOBH32HGXDBGQNBTLU7OIM&marketplace=FLIPKART&q=mobile+phone&store=tyy%2F4io&srno=s_1_1&otracker=AS_QueryStore_HistoryAutoSuggest_1_4_na_na_na&otracker1=AS_QueryStore_HistoryAutoSuggest_1_4_na_na_na&fm=organic&iid=45733c24-1b3d-4299-b434-a9842e7e3ce8.MOBH32HGXDBGQNBT.SEARCH&ppt=None&ppn=None&ssid=pp6fyyauds0000001757426938552&qH=37695f7554f510f0,Very good product nice quality\n\n\nVery fast delivery\n\nThanku,5
2,https://www.flipkart.com/honor-200-5g-moonlight-white-512-gb/p/itm6c73c53970f43?pid=MOBH32HGXDBGQNBT&lid=LSTMOBH32HGXDBGQNBTLU7OIM&marketplace=FLIPKART&q=mobile+phone&store=tyy%2F4io&srno=s_1_1&otracker=AS_QueryStore_HistoryAutoSuggest_1_4_na_na_na&otracker1=AS_QueryStore_HistoryAutoSuggest_1_4_na_na_na&fm=organic&iid=45733c24-1b3d-4299-b434-a9842e7e3ce8.MOBH32HGXDBGQNBT.SEARCH&ppt=None&ppn=None&ssid=pp6fyyauds0000001757426938552&qH=37695f7554f510f0,Awesome phone! I recommend it for below 30K budget.,5
3,https://www.flipkart.com/honor-200-5g-moonlight-white-512-gb/p/itm6c73c53970f43?pid=MOBH32HGXDBGQNBT&lid=LSTMOBH32HGXDBGQNBTLU7OIM&marketplace=FLIPKART&q=mobile+phone&store=tyy%2F4io&srno=s_1_1&otracker=AS_QueryStore_HistoryAutoSuggest_1_4_na_na_na&otracker1=AS_QueryStore_HistoryAutoSuggest_1_4_na_na_na&fm=organic&iid=45733c24-1b3d-4299-b434-a9842e7e3ce8.MOBH32HGXDBGQNBT.SEARCH&ppt=None&ppn=None&ssid=pp6fyyauds0000001757426938552&qH=37695f7554f510f0,This product is very good quality is a good product,5
4,https://www.flipkart.com/honor-200-5g-black-512-gb/p/itm6c73c53970f43?pid=MOBH34RU4S8Q5CBJ&lid=LSTMOBH34RU4S8Q5CBJI5PQRX&marketplace=FLIPKART&q=mobile+phone&store=tyy%2F4io&srno=s_1_2&otracker=AS_QueryStore_HistoryAutoSuggest_1_4_na_na_na&otracker1=AS_QueryStore_HistoryAutoSuggest_1_4_na_na_na&fm=organic&iid=45733c24-1b3d-4299-b434-a9842e7e3ce8.MOBH34RU4S8Q5CBJ.SEARCH&ppt=None&ppn=None&ssid=pp6fyyauds0000001757426938552&qH=37695f7554f510f0,Lovely camera,4.4
...,...,...,...
1330,https://www.flipkart.com/honor-9n-sapphire-blue-64-gb/p/itmf9pgsehv6nmss?pid=MOBF6NARU2TTTXBG&lid=LSTMOBF6NARU2TTTXBGDWWCIJ&marketplace=FLIPKART&q=mobile+phone&store=tyy%2F4io&srno=s_2_40&otracker=AS_QueryStore_HistoryAutoSuggest_1_4_na_na_na&otracker1=AS_QueryStore_HistoryAutoSuggest_1_4_na_na_na&fm=search-autosuggest&iid=ff6c29a9-1295-4c1e-a7fe-71128fb6681f.MOBF6NARU2TTTXBG.SEARCH&ppt=sp&ppn=sp&ssid=cmjbmuh4ps0000001757426957342&qH=37695f7554f510f0,Nice peice from huwaei...,5
1331,https://www.flipkart.com/honor-9n-sapphire-blue-64-gb/p/itmf9pgsehv6nmss?pid=MOBF6NARU2TTTXBG&lid=LSTMOBF6NARU2TTTXBGDWWCIJ&marketplace=FLIPKART&q=mobile+phone&store=tyy%2F4io&srno=s_2_40&otracker=AS_QueryStore_HistoryAutoSuggest_1_4_na_na_na&otracker1=AS_QueryStore_HistoryAutoSuggest_1_4_na_na_na&fm=search-autosuggest&iid=ff6c29a9-1295-4c1e-a7fe-71128fb6681f.MOBF6NARU2TTTXBG.SEARCH&ppt=sp&ppn=sp&ssid=cmjbmuh4ps0000001757426957342&qH=37695f7554f510f0,Front camera..... 4/5 Rear camera...... 3.5/5 battery...... 3.5/5 software.... 3.7/5 display.... 4.6/5 Rear camera should be more better. Some times found software float like freezing for 1 or 2 seconds.... Processing rate should be little more in this price rang... And hybrid sim slot why?... Battery should be around 4000 mah in this price range.... Only single sim 4g are you kidding Honor? Good for mid level gaming because this phone is not for high level gaming...... Expect this all over experience is good... I like this phone Honor,4
1332,https://www.flipkart.com/honor-9n-sapphire-blue-64-gb/p/itmf9pgsehv6nmss?pid=MOBF6NARU2TTTXBG&lid=LSTMOBF6NARU2TTTXBGDWWCIJ&marketplace=FLIPKART&q=mobile+phone&store=tyy%2F4io&srno=s_2_40&otracker=AS_QueryStore_HistoryAutoSuggest_1_4_na_na_na&otracker1=AS_QueryStore_HistoryAutoSuggest_1_4_na_na_na&fm=search-autosuggest&iid=ff6c29a9-1295-4c1e-a7fe-71128fb6681f.MOBF6NARU2TTTXBG.SEARCH&ppt=sp&ppn=sp&ssid=cmjbmuh4ps0000001757426957342&qH=37695f7554f510f0,awesome mobile...i like it.\nmust buy,4
1333,https://www.flipkart.com/honor-9n-sapphire-blue-64-gb/p/itmf9pgsehv6nmss?pid=MOBF6NARU2TTTXBG&lid=LSTMOBF6NARU2TTTXBGDWWCIJ&marketplace=FLIPKART&q=mobile+phone&store=tyy%2F4io&srno=s_2_40&otracker=AS_QueryStore_HistoryAutoSuggest_1_4_na_na_na&otracker1=AS_QueryStore_HistoryAutoSuggest_1_4_na_na_na&fm=search-autosuggest&iid=ff6c29a9-1295-4c1e-a7fe-71128fb6681f.MOBF6NARU2TTTXBG.SEARCH&ppt=sp&ppn=sp&ssid=cmjbmuh4ps0000001757426957342&qH=37695f7554f510f0,Delivered on next day of booking.... Wow good choice for 14k....,5


**GOOGLE**

In [11]:
from selenium import webdriver
from selenium.webdriver.common.by import By
import time
import pandas as pd

# URL of the website
url = "https://www.flipkart.com/search?q=mobile+phone&sid=tyy%2C4io&as=on&as-show=on&otracker=AS_QueryStore_HistoryAutoSuggest_1_4_na_na_na&otracker1=AS_QueryStore_HistoryAutoSuggest_1_4_na_na_na&as-pos=1&as-type=HISTORY&suggestionId=mobile+phone%7CMobiles&requestId=9c7ecdfa-25d5-4ca3-8a0b-18f1e30dd1d4&p%5B%5D=facets.brand%255B%255D%3DGoogle"
def initialize_driver():
    driver = webdriver.Chrome()  # Ensure ChromeDriver is in your PATH
    driver.maximize_window()
    return driver

def load_page(driver, url):
    driver.get(url)
    time.sleep(5)  # Wait for the page to load

# Function to scrape product names, links, and prices
def scrape_product_data(driver):
    product_names = [name.text for name in driver.find_elements(By.CLASS_NAME, 'KzDlHZ')]
    product_links = [link.get_attribute('href') for link in driver.find_elements(By.XPATH, '//a[@class="CGtC98"]')]
    product_prices = [price.text for price in driver.find_elements(By.CLASS_NAME, 'hl05eU')]  # Adjusted class name for prices
    
    # Return product data
    return product_names, product_links, product_prices

# Function to scrape multiple pages
def scrape_multiple_pages(driver, base_url, num_pages):
    all_product_names = []
    all_product_links = []
    all_product_prices = []
    
    for page in range(1, num_pages + 1):
        load_page(driver, f"{base_url}&page={page}")  # Update the URL to include the page number
        product_names, product_links, product_prices = scrape_product_data(driver)
        
        # Ensure the lists are of the same length before extending
        if len(product_names) == len(product_links) == len(product_prices):
            all_product_names.extend(product_names)
            all_product_links.extend(product_links)
            all_product_prices.extend(product_prices)
        else:
            print(f"Warning: Mismatched data on page {page}. Names: {len(product_names)}, Links: {len(product_links)}, Prices: {len(product_prices)}")

        time.sleep(5)  # Wait before loading the next page
    
    return all_product_names, all_product_links, all_product_prices

# Initialize WebDriver and scrape multiple pages
driver = initialize_driver()
all_product_names, all_product_links, all_product_prices = scrape_multiple_pages(driver, url, 4)  # Adjust number of pages as needed

# Close the driver
driver.quit()

# Create a DataFrame to store the results
df = pd.DataFrame({
    'Product_Name': all_product_names,
    'Product_Link': all_product_links,
    'Product_Price': all_product_prices  # Updated to include product prices
})

# Display or save the scraped data
df.head()  # Display the DataFrame
Output_path = r"C:\Users\HP\OneDrive\Documents\Desktop\Flipkart_scapping\google.csv"
df.to_csv(Output_path, index=False)

In [12]:
import re

# Function to extract and clean the price (remove rupee symbol and commas)
def extract_clean_price(price_string):
    # Find the rupee symbol followed by the price
    match = re.search(r'₹(\d[\d,]*)', price_string)
    if match:
        # Remove the rupee symbol and commas, and convert to an integer
        return int(match.group(1).replace(',', ''))
    return None

# Apply the function to the 'Price' column
df['Product_Price'] = df['Product_Price'].apply(extract_clean_price)

In [13]:
df = df[df['Product_Price'] <= 40000]
Output_path = r"C:\Users\HP\OneDrive\Documents\Desktop\Flipkart_scapping\google_cleaned.csv"
df.to_csv(Output_path, index=False)

*GOOGLE DROPPED*

In [27]:
import pandas as pd
import time
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException, StaleElementReferenceException

# File paths (motorola → google)
input_file = r"C:\Users\HP\OneDrive\Documents\Desktop\Flipkart_scapping\google_cleaned.csv"
output_file = r"C:\Users\HP\OneDrive\Documents\Desktop\Flipkart_scapping\google_reviews_ratings.csv"

# Load product links
df = pd.read_csv(input_file)

# Initialize Chrome driver
driver = webdriver.Chrome()

# Function to extract reviews and ratings
def extract_reviews_and_ratings(driver):
    reviews, ratings = [], []
    try:
        wait = WebDriverWait(driver, 15)
        wait.until(EC.presence_of_all_elements_located((By.CLASS_NAME, "ZmyHeo")))

        review_elements = driver.find_elements(By.CLASS_NAME, "ZmyHeo")
        rating_elements = driver.find_elements(By.CLASS_NAME, "XQDdHH")

        for i, review_element in enumerate(review_elements):
            try:
                # Expand "Read More" if present
                read_more = review_element.find_elements(By.CLASS_NAME, "b4x-fr")
                if read_more:
                    driver.execute_script("arguments[0].click();", read_more[0])
                    time.sleep(1)
                reviews.append(review_element.text)
            except StaleElementReferenceException:
                reviews.append("")

            # Match rating if available
            if i < len(rating_elements):
                ratings.append(rating_elements[i].text)
            else:
                ratings.append(None)

    except TimeoutException:
        print("⚠️ No reviews found on this page.")

    return reviews, ratings


# Function to safely load a page
def load_page(url):
    try:
        driver.get(url)
        WebDriverWait(driver, 15).until(
            EC.presence_of_element_located((By.CLASS_NAME, "ZmyHeo"))
        )
        return True
    except TimeoutException:
        print(f"⚠️ Timeout while loading {url}")
        return False


# Scrape data
all_data = []
num_pages_reviews = 5  # reviews pages per product

for idx, product_link in df['Product_Link'].items():
    if pd.isna(product_link) or not product_link.strip():
        print(f"⏩ Skipping empty link at index {idx + 1}")
        continue

    print(f"\n🔎 Scraping product {idx + 1}/{len(df)}: {product_link}")
    driver.get(product_link)
    time.sleep(3)

    # Click "All Reviews" button
    try:
        all_reviews_button = WebDriverWait(driver, 15).until(
            EC.element_to_be_clickable((By.CLASS_NAME, "_23J90q"))
        )
        all_reviews_button.click()
        time.sleep(3)
    except Exception:
        print("⚠️ Could not open 'All Reviews' page. Skipping product.")
        continue

    # Loop through review pages
    base_url = driver.current_url.split("&page=")[0]
    for page in range(1, num_pages_reviews + 1):
        page_url = f"{base_url}&page={page}" if "page=" not in base_url else f"{base_url.split('&page=')[0]}&page={page}"
        if not load_page(page_url):
            break

        reviews, ratings = extract_reviews_and_ratings(driver)
        if not reviews:
            print(f"⚠️ No reviews on page {page}, moving to next product.")
            break

        for review, rating in zip(reviews, ratings):
            all_data.append({
                "Product_Link": product_link,
                "Review": review,
                "Rating": rating
            })

        time.sleep(2)  # Be gentle with Flipkart’s servers


# Save results
driver.quit()
result_df = pd.DataFrame(all_data)
result_df.to_csv(output_file, index=False, encoding="utf-8-sig")

print(f"\n✅ Scraping complete. Data saved to {output_file}")



🔎 Scraping product 1/17: https://www.flipkart.com/google-pixel-7-lemongrass-128-gb/p/itm45d75002be0e7?pid=MOBGHW44ZSN5EPGU&lid=LSTMOBGHW44ZSN5EPGURSVVHV&marketplace=FLIPKART&q=mobile+phone&store=tyy%2F4io&srno=s_1_1&otracker=AS_QueryStore_HistoryAutoSuggest_1_4_na_na_na&otracker1=AS_QueryStore_HistoryAutoSuggest_1_4_na_na_na&fm=organic&iid=a19eba61-c14b-4918-b58f-038b60db4cdd.MOBGHW44ZSN5EPGU.SEARCH&ppt=None&ppn=None&ssid=0g3xr6atnk0000001757427391662&qH=37695f7554f510f0

🔎 Scraping product 2/17: https://www.flipkart.com/google-pixel-8a-obsidian-128-gb/p/itm5c791e407a330?pid=MOBGYQ2MNHZWXRNY&lid=LSTMOBGYQ2MNHZWXRNYGSN6LA&marketplace=FLIPKART&q=mobile+phone&store=tyy%2F4io&srno=s_1_2&otracker=AS_QueryStore_HistoryAutoSuggest_1_4_na_na_na&otracker1=AS_QueryStore_HistoryAutoSuggest_1_4_na_na_na&fm=organic&iid=a19eba61-c14b-4918-b58f-038b60db4cdd.MOBGYQ2MNHZWXRNY.SEARCH&ppt=None&ppn=None&ssid=0g3xr6atnk0000001757427391662&qH=37695f7554f510f0

🔎 Scraping product 3/17: https://www.flipkart.

In [28]:
result_df

Unnamed: 0,Product_Link,Review,Rating
0,https://www.flipkart.com/google-pixel-7-lemongrass-128-gb/p/itm45d75002be0e7?pid=MOBGHW44ZSN5EPGU&lid=LSTMOBGHW44ZSN5EPGURSVVHV&marketplace=FLIPKART&q=mobile+phone&store=tyy%2F4io&srno=s_1_1&otracker=AS_QueryStore_HistoryAutoSuggest_1_4_na_na_na&otracker1=AS_QueryStore_HistoryAutoSuggest_1_4_na_na_na&fm=organic&iid=a19eba61-c14b-4918-b58f-038b60db4cdd.MOBGHW44ZSN5EPGU.SEARCH&ppt=None&ppn=None&ssid=0g3xr6atnk0000001757427391662&qH=37695f7554f510f0,"The real smart phone in the market.....\nLets talk about it.....\n\nCamera: In this range two best camera in the market are pixel 7 and s22. Portarit blur is more perfect in s22 but pixel 7 is real smart in image processing. This camera doesn't have manual mode so i was lil concern but when i started i never felt any need. Oh my god the night mode is just awesome.\nA few cons of camera are 1. Wide angle is not that wide and edge detection in portrait is not good. Cinematic blur is a beta feature only.\n\nPerformance: Not the best processor in the market in this price tag but definitely the smartest processor in any price range. Not even iphone is close to its smartness. This is not a gaming phone rather one should buy this phone for AI experience.\n\nDesign: 9/10 because of its unique design. 1 minus for glossy finish, matte would have been more premium. Though built quality is very good n premium.\nThis phone never lagged after 2 weeks of experience. Very few minor bugs. Best android experience.\n\nBattery: In comparison to other flagships this is a great battery. Full day battery for casual users.Heating issue solved after update.\n*Other than this fingerprint sensor was failing frequently but it is decently fast. After software updating 80% issue is now solved.\nOther experience are very good.\n*Best Haptic engine, better than any phone.",4.3
1,https://www.flipkart.com/google-pixel-7-lemongrass-128-gb/p/itm45d75002be0e7?pid=MOBGHW44ZSN5EPGU&lid=LSTMOBGHW44ZSN5EPGURSVVHV&marketplace=FLIPKART&q=mobile+phone&store=tyy%2F4io&srno=s_1_1&otracker=AS_QueryStore_HistoryAutoSuggest_1_4_na_na_na&otracker1=AS_QueryStore_HistoryAutoSuggest_1_4_na_na_na&fm=organic&iid=a19eba61-c14b-4918-b58f-038b60db4cdd.MOBGHW44ZSN5EPGU.SEARCH&ppt=None&ppn=None&ssid=0g3xr6atnk0000001757427391662&qH=37695f7554f510f0,Night mode and Indoor clicks are awesome,4
2,https://www.flipkart.com/google-pixel-7-lemongrass-128-gb/p/itm45d75002be0e7?pid=MOBGHW44ZSN5EPGU&lid=LSTMOBGHW44ZSN5EPGURSVVHV&marketplace=FLIPKART&q=mobile+phone&store=tyy%2F4io&srno=s_1_1&otracker=AS_QueryStore_HistoryAutoSuggest_1_4_na_na_na&otracker1=AS_QueryStore_HistoryAutoSuggest_1_4_na_na_na&fm=organic&iid=a19eba61-c14b-4918-b58f-038b60db4cdd.MOBGHW44ZSN5EPGU.SEARCH&ppt=None&ppn=None&ssid=0g3xr6atnk0000001757427391662&qH=37695f7554f510f0,"Perfect phone, one star less because of its weight.",5
3,https://www.flipkart.com/google-pixel-7-lemongrass-128-gb/p/itm45d75002be0e7?pid=MOBGHW44ZSN5EPGU&lid=LSTMOBGHW44ZSN5EPGURSVVHV&marketplace=FLIPKART&q=mobile+phone&store=tyy%2F4io&srno=s_1_1&otracker=AS_QueryStore_HistoryAutoSuggest_1_4_na_na_na&otracker1=AS_QueryStore_HistoryAutoSuggest_1_4_na_na_na&fm=organic&iid=a19eba61-c14b-4918-b58f-038b60db4cdd.MOBGHW44ZSN5EPGU.SEARCH&ppt=None&ppn=None&ssid=0g3xr6atnk0000001757427391662&qH=37695f7554f510f0,"Writing this review after almost a month of use\nAmazing cameras, Excellent performance, Great display, haven't seen any bugs in OS yet, great design\nWas having a heating issue initially, temprature used to goes upto 43 Celcius. I was worried and was thinking to replace. But after a week of use everything went to normal haven't faced heating problem afterwards. Maybe because of optimization. Same goes for battery performance. Initially I was getting 4 to 4.5 hour of screen on time. But after week or 10 days of usage it started getting improved, now I almost get 5.5 to 6 hours of screen on time.\nNo other complaints so far. Highly recommended.",4
4,https://www.flipkart.com/google-pixel-7-lemongrass-128-gb/p/itm45d75002be0e7?pid=MOBGHW44ZSN5EPGU&lid=LSTMOBGHW44ZSN5EPGURSVVHV&marketplace=FLIPKART&q=mobile+phone&store=tyy%2F4io&srno=s_1_1&otracker=AS_QueryStore_HistoryAutoSuggest_1_4_na_na_na&otracker1=AS_QueryStore_HistoryAutoSuggest_1_4_na_na_na&fm=organic&iid=a19eba61-c14b-4918-b58f-038b60db4cdd.MOBGHW44ZSN5EPGU.SEARCH&ppt=None&ppn=None&ssid=0g3xr6atnk0000001757427391662&qH=37695f7554f510f0,"No words, simply superb camera and overall, very rich and smart. Just buy, no need to think twice.",5
...,...,...,...
595,https://www.flipkart.com/google-pixel-7a-coral-128-gb/p/itmb4d7b100b1a4d?pid=MOBGT5F26QJYZUZS&lid=LSTMOBGT5F26QJYZUZSUUTWOI&marketplace=FLIPKART&q=mobile+phone&store=tyy%2F4io&srno=s_3_49&otracker=AS_QueryStore_HistoryAutoSuggest_1_4_na_na_na&otracker1=AS_QueryStore_HistoryAutoSuggest_1_4_na_na_na&fm=search-autosuggest&iid=62172703-5582-49ef-99fb-4b23982a905c.MOBGT5F26QJYZUZS.SEARCH&ppt=sp&ppn=sp&ssid=9kxu3hoc3k0000001757427419567&qH=37695f7554f510f0,Pros:\n1)Good clean Android UI experience\n2)good camera\n3)Google updates\n\nCons:\n1)phone heats up temperature 40DegC\n2)battery drains fast need to charge frequently\n3)Connectivity is very bad results in call drop no Internet\n\nFinal verdict waste for money,5
596,https://www.flipkart.com/google-pixel-7a-coral-128-gb/p/itmb4d7b100b1a4d?pid=MOBGT5F26QJYZUZS&lid=LSTMOBGT5F26QJYZUZSUUTWOI&marketplace=FLIPKART&q=mobile+phone&store=tyy%2F4io&srno=s_3_49&otracker=AS_QueryStore_HistoryAutoSuggest_1_4_na_na_na&otracker1=AS_QueryStore_HistoryAutoSuggest_1_4_na_na_na&fm=search-autosuggest&iid=62172703-5582-49ef-99fb-4b23982a905c.MOBGT5F26QJYZUZS.SEARCH&ppt=sp&ppn=sp&ssid=9kxu3hoc3k0000001757427419567&qH=37695f7554f510f0,Best phone at its price point.\nAwesome camera and flawless software experience. No complaints of heating and battery drain after upgrading to Android 14.\nPremium built quality and haptic feedback. Call quality and network reception is top-notch. Totally loving the coral colour,3
597,https://www.flipkart.com/google-pixel-7a-coral-128-gb/p/itmb4d7b100b1a4d?pid=MOBGT5F26QJYZUZS&lid=LSTMOBGT5F26QJYZUZSUUTWOI&marketplace=FLIPKART&q=mobile+phone&store=tyy%2F4io&srno=s_3_49&otracker=AS_QueryStore_HistoryAutoSuggest_1_4_na_na_na&otracker1=AS_QueryStore_HistoryAutoSuggest_1_4_na_na_na&fm=search-autosuggest&iid=62172703-5582-49ef-99fb-4b23982a905c.MOBGT5F26QJYZUZS.SEARCH&ppt=sp&ppn=sp&ssid=9kxu3hoc3k0000001757427419567&qH=37695f7554f510f0,"Pixal 7a (sea,128) is a remarkable device that combines top-notch camera quality with a compact and portable design. Its primary camera captures stunning photos with clarity, vibrant colors, and impressive detail. The phone's low-light performance is exceptional, allowing you to capture memorable moments even in challenging lighting conditions. With its versatile camera modes and settings, including manual mode and AI-enhanced features, this phone caters to a variety of photography styles. Additionally, the front-facing camera delivers excellent selfies with accurate skin tones and sharp details. Overall, pixal 7a is a perfect blend of compactness and excellent camera quality, making it an ideal choice for photography enthusiasts on the go.",5
598,https://www.flipkart.com/google-pixel-7a-coral-128-gb/p/itmb4d7b100b1a4d?pid=MOBGT5F26QJYZUZS&lid=LSTMOBGT5F26QJYZUZSUUTWOI&marketplace=FLIPKART&q=mobile+phone&store=tyy%2F4io&srno=s_3_49&otracker=AS_QueryStore_HistoryAutoSuggest_1_4_na_na_na&otracker1=AS_QueryStore_HistoryAutoSuggest_1_4_na_na_na&fm=search-autosuggest&iid=62172703-5582-49ef-99fb-4b23982a905c.MOBGT5F26QJYZUZS.SEARCH&ppt=sp&ppn=sp&ssid=9kxu3hoc3k0000001757427419567&qH=37695f7554f510f0,"Reviewing this phone after one month of usage.\n\nCamera- 9/10 . It captures detailed photos but a dedicated zoom lens is missing so zoomed photos are not clear.\n\nPerformance - 10/10. This phone performs great in every day tasks. Multitasking and ram management is great. I don't do heavy gaming but you can play games like pubg and cod with no issues.\n\nBattery -8/10. After tweaking some settings, I get about 6 hours of SOT every day. Battery charging is not too slow but not fast either. I get no heating issues till now.\n\nDesign - 10/10. I like it's compact design. It's very easy for one hand use. Phone don't feel too big in pocket. Build quality is also great. Plastic on the back mimic like glass and have good quality.\n\nDisplay 9/10. Display colours are vibrant. Movies look good in this display. Although it struggles when you use in direct sunlight. Display is not bright enough.",5


**MERGING**

*MOTOROLA*

In [14]:
import pandas as pd

# File paths
cleaned_file_path = r"C:\Users\HP\OneDrive\Documents\Desktop\Flipkart_scapping\motorola_cleaned.csv"
reviews_ratings_file_path = r"C:\Users\HP\OneDrive\Documents\Desktop\Flipkart_scapping\motorola_reviews_ratings.csv"
output_file_path = r"C:\Users\HP\OneDrive\Documents\Desktop\Flipkart_scapping\motorola_merged.csv"

# Read the CSV files
cleaned_df = pd.read_csv(cleaned_file_path)
reviews_ratings_df = pd.read_csv(reviews_ratings_file_path)

# Merge the DataFrames on the 'Product_Link' column
merged_df = pd.merge(cleaned_df, reviews_ratings_df, on='Product_Link', how='left')

# Save the merged DataFrame to a new CSV file
merged_df.to_csv(output_file_path, index=False)

print(f"Merged file saved to {output_file_path}")

Merged file saved to C:\Users\HP\OneDrive\Documents\Desktop\Flipkart_scapping\motorola_merged.csv


In [16]:
df = pd.read_csv(r"C:\Users\HP\OneDrive\Documents\Desktop\Flipkart_scapping\motorola_merged.csv")

In [17]:
df

Unnamed: 0,Product_Name,Product_Link,Product_Price,Review,Rating
0,MOTOROLA Edge 60 Fusion 5G (PANTONE Slipstream...,https://www.flipkart.com/motorola-edge-60-fusi...,22999,Nice product 🥰🥰🥰,4.5
1,MOTOROLA Edge 60 Fusion 5G (PANTONE Slipstream...,https://www.flipkart.com/motorola-edge-60-fusi...,22999,Everything is good but Motorola should also la...,5.0
2,MOTOROLA Edge 60 Fusion 5G (PANTONE Slipstream...,https://www.flipkart.com/motorola-edge-60-fusi...,22999,Very good image quality 😁😍😍,5.0
3,MOTOROLA Edge 60 Fusion 5G (PANTONE Slipstream...,https://www.flipkart.com/motorola-edge-60-fusi...,22999,Phone is good but there is less one gallery ap...,5.0
4,MOTOROLA Edge 60 Fusion 5G (PANTONE Slipstream...,https://www.flipkart.com/motorola-edge-60-fusi...,22999,Super fully satisfied,4.0
...,...,...,...,...,...
3717,"Motorola G60 (Frosted Champagne, 128 GB)",https://www.flipkart.com/motorola-g60-frosted-...,20999,"Pros :\n1) Stock android experience, no ads, n...",4.0
3718,"Motorola G60 (Frosted Champagne, 128 GB)",https://www.flipkart.com/motorola-g60-frosted-...,20999,Best camera.,4.0
3719,"Motorola G60 (Frosted Champagne, 128 GB)",https://www.flipkart.com/motorola-g60-frosted-...,20999,It's the best phone you can get under 20k. inb...,5.0
3720,"Motorola G60 (Frosted Champagne, 128 GB)",https://www.flipkart.com/motorola-g60-frosted-...,20999,Recently I have purchased this mobile because ...,5.0


In [18]:
# Set options to display more characters
pd.set_option('display.max_colwidth', None)  

*ONEPLUS*

In [19]:
import pandas as pd

# File paths
cleaned_file_path = r"C:\Users\HP\OneDrive\Documents\Desktop\Flipkart_scapping\oneplus_cleaned.csv"
reviews_ratings_file_path = r"C:\Users\HP\OneDrive\Documents\Desktop\Flipkart_scapping\oneplus_reviews_ratings.csv"
output_file_path = r"C:\Users\HP\OneDrive\Documents\Desktop\Flipkart_scapping\oneplus_merged.csv"

# Read the CSV files
cleaned_df = pd.read_csv(cleaned_file_path)
reviews_ratings_df = pd.read_csv(reviews_ratings_file_path)

# Merge the DataFrames on the 'Product_Link' column
merged_df = pd.merge(cleaned_df, reviews_ratings_df, on='Product_Link', how='left')

# Save the merged DataFrame to a new CSV file
merged_df.to_csv(output_file_path, index=False)

print(f"Merged file saved to {output_file_path}")

Merged file saved to C:\Users\HP\OneDrive\Documents\Desktop\Flipkart_scapping\oneplus_merged.csv


In [20]:
df=pd.read_csv(r"C:\Users\HP\OneDrive\Documents\Desktop\Flipkart_scapping\oneplus_merged.csv")

In [21]:
df

Unnamed: 0,Product_Name,Product_Link,Product_Price,Review,Rating
0,"OnePlus Nord CE5 5G (Marble Mist, 256 GB)",https://www.flipkart.com/oneplus-nord-ce5-5g-marble-mist-256-gb/p/itm9259708fe4e3c?pid=MOBHEYWZXBFXUBZZ&lid=LSTMOBHEYWZXBFXUBZZS9IZTZ&marketplace=FLIPKART&q=mobile+phone&store=tyy%2F4io&srno=s_1_1&otracker=AS_QueryStore_OrganicAutoSuggest_2_12_na_na_na&otracker1=AS_QueryStore_OrganicAutoSuggest_2_12_na_na_na&fm=organic&iid=bd31c0d0-7dd2-4e4f-a5e9-d8c001350c52.MOBHEYWZXBFXUBZZ.SEARCH&ppt=None&ppn=None&ssid=j4nuaavceo0000001757344335000&qH=37695f7554f510f0,26113,Very good product.\nI am using Nord CE2 since 2020. Since 5 years its working superbly. Recently display has broken. Then I go for Nord CE5 last week. Nord CE series mobiles are excellent working condition.\nOnly one problem for me only that.... Earphones port is not available in this mobile..... Remaining all excellent.,5.0
1,"OnePlus Nord CE5 5G (Marble Mist, 256 GB)",https://www.flipkart.com/oneplus-nord-ce5-5g-marble-mist-256-gb/p/itm9259708fe4e3c?pid=MOBHEYWZXBFXUBZZ&lid=LSTMOBHEYWZXBFXUBZZS9IZTZ&marketplace=FLIPKART&q=mobile+phone&store=tyy%2F4io&srno=s_1_1&otracker=AS_QueryStore_OrganicAutoSuggest_2_12_na_na_na&otracker1=AS_QueryStore_OrganicAutoSuggest_2_12_na_na_na&fm=organic&iid=bd31c0d0-7dd2-4e4f-a5e9-d8c001350c52.MOBHEYWZXBFXUBZZ.SEARCH&ppt=None&ppn=None&ssid=j4nuaavceo0000001757344335000&qH=37695f7554f510f0,26113,Very good product,5.0
2,"OnePlus Nord CE5 5G (Marble Mist, 256 GB)",https://www.flipkart.com/oneplus-nord-ce5-5g-marble-mist-256-gb/p/itm9259708fe4e3c?pid=MOBHEYWZXBFXUBZZ&lid=LSTMOBHEYWZXBFXUBZZS9IZTZ&marketplace=FLIPKART&q=mobile+phone&store=tyy%2F4io&srno=s_1_1&otracker=AS_QueryStore_OrganicAutoSuggest_2_12_na_na_na&otracker1=AS_QueryStore_OrganicAutoSuggest_2_12_na_na_na&fm=organic&iid=bd31c0d0-7dd2-4e4f-a5e9-d8c001350c52.MOBHEYWZXBFXUBZZ.SEARCH&ppt=None&ppn=None&ssid=j4nuaavceo0000001757344335000&qH=37695f7554f510f0,26113,OnePlus Quality Good,5.0
3,"OnePlus Nord CE5 5G (Marble Mist, 256 GB)",https://www.flipkart.com/oneplus-nord-ce5-5g-marble-mist-256-gb/p/itm9259708fe4e3c?pid=MOBHEYWZXBFXUBZZ&lid=LSTMOBHEYWZXBFXUBZZS9IZTZ&marketplace=FLIPKART&q=mobile+phone&store=tyy%2F4io&srno=s_1_1&otracker=AS_QueryStore_OrganicAutoSuggest_2_12_na_na_na&otracker1=AS_QueryStore_OrganicAutoSuggest_2_12_na_na_na&fm=organic&iid=bd31c0d0-7dd2-4e4f-a5e9-d8c001350c52.MOBHEYWZXBFXUBZZ.SEARCH&ppt=None&ppn=None&ssid=j4nuaavceo0000001757344335000&qH=37695f7554f510f0,26113,The screen quality is immersing and the sound is astonishingly loud and clear.,5.0
4,"OnePlus Nord CE5 5G (Marble Mist, 256 GB)",https://www.flipkart.com/oneplus-nord-ce5-5g-marble-mist-256-gb/p/itm9259708fe4e3c?pid=MOBHEYWZXBFXUBZZ&lid=LSTMOBHEYWZXBFXUBZZS9IZTZ&marketplace=FLIPKART&q=mobile+phone&store=tyy%2F4io&srno=s_1_1&otracker=AS_QueryStore_OrganicAutoSuggest_2_12_na_na_na&otracker1=AS_QueryStore_OrganicAutoSuggest_2_12_na_na_na&fm=organic&iid=bd31c0d0-7dd2-4e4f-a5e9-d8c001350c52.MOBHEYWZXBFXUBZZ.SEARCH&ppt=None&ppn=None&ssid=j4nuaavceo0000001757344335000&qH=37695f7554f510f0,26113,Excellent,5.0
...,...,...,...,...,...
7417,"OnePlus Nord CE5 5G (Marble Mist, 256 GB)",https://www.flipkart.com/oneplus-nord-ce5-5g-marble-mist-256-gb/p/itm9259708fe4e3c?pid=MOBHE23NDP6XQN7U&lid=LSTMOBHE23NDP6XQN7UDKVMDA&marketplace=FLIPKART&q=mobile+phone&store=tyy%2F4io&srno=s_4_96&otracker=AS_QueryStore_OrganicAutoSuggest_2_12_na_na_na&otracker1=AS_QueryStore_OrganicAutoSuggest_2_12_na_na_na&fm=search-autosuggest&iid=c900fa79-78b1-41e8-8851-276b3d344482.MOBHE23NDP6XQN7U.SEARCH&ppt=sp&ppn=sp&ssid=0b44d0gncw0000001757344373936&qH=37695f7554f510f0,26500,Overall good 👍,4.0
7418,"OnePlus Nord CE5 5G (Marble Mist, 256 GB)",https://www.flipkart.com/oneplus-nord-ce5-5g-marble-mist-256-gb/p/itm9259708fe4e3c?pid=MOBHE23NDP6XQN7U&lid=LSTMOBHE23NDP6XQN7UDKVMDA&marketplace=FLIPKART&q=mobile+phone&store=tyy%2F4io&srno=s_4_96&otracker=AS_QueryStore_OrganicAutoSuggest_2_12_na_na_na&otracker1=AS_QueryStore_OrganicAutoSuggest_2_12_na_na_na&fm=search-autosuggest&iid=c900fa79-78b1-41e8-8851-276b3d344482.MOBHE23NDP6XQN7U.SEARCH&ppt=sp&ppn=sp&ssid=0b44d0gncw0000001757344373936&qH=37695f7554f510f0,26500,"Some app not working properly, crashed, battery drained likely 5000 mah, front camera average, bloating apps looks like 10k mobile. I am not gamer and not tested games",3.0
7419,"OnePlus Nord CE5 5G (Marble Mist, 256 GB)",https://www.flipkart.com/oneplus-nord-ce5-5g-marble-mist-256-gb/p/itm9259708fe4e3c?pid=MOBHE23NDP6XQN7U&lid=LSTMOBHE23NDP6XQN7UDKVMDA&marketplace=FLIPKART&q=mobile+phone&store=tyy%2F4io&srno=s_4_96&otracker=AS_QueryStore_OrganicAutoSuggest_2_12_na_na_na&otracker1=AS_QueryStore_OrganicAutoSuggest_2_12_na_na_na&fm=search-autosuggest&iid=c900fa79-78b1-41e8-8851-276b3d344482.MOBHE23NDP6XQN7U.SEARCH&ppt=sp&ppn=sp&ssid=0b44d0gncw0000001757344373936&qH=37695f7554f510f0,26500,Nice,5.0
7420,"OnePlus Nord CE5 5G (Marble Mist, 256 GB)",https://www.flipkart.com/oneplus-nord-ce5-5g-marble-mist-256-gb/p/itm9259708fe4e3c?pid=MOBHE23NDP6XQN7U&lid=LSTMOBHE23NDP6XQN7UDKVMDA&marketplace=FLIPKART&q=mobile+phone&store=tyy%2F4io&srno=s_4_96&otracker=AS_QueryStore_OrganicAutoSuggest_2_12_na_na_na&otracker1=AS_QueryStore_OrganicAutoSuggest_2_12_na_na_na&fm=search-autosuggest&iid=c900fa79-78b1-41e8-8851-276b3d344482.MOBHE23NDP6XQN7U.SEARCH&ppt=sp&ppn=sp&ssid=0b44d0gncw0000001757344373936&qH=37695f7554f510f0,26500,"Just go for it, no hitting issue.",5.0


*OPPO*

In [22]:
import pandas as pd

# File paths
cleaned_file_path = r"C:\Users\HP\OneDrive\Documents\Desktop\Flipkart_scapping\oppo_cleaned.csv"
reviews_ratings_file_path = r"C:\Users\HP\OneDrive\Documents\Desktop\Flipkart_scapping\oppo_reviews_ratings.csv"
output_file_path = r"C:\Users\HP\OneDrive\Documents\Desktop\Flipkart_scapping\oppo_merged.csv"

# Read the CSV files
cleaned_df = pd.read_csv(cleaned_file_path)
reviews_ratings_df = pd.read_csv(reviews_ratings_file_path)

# Merge the DataFrames on the 'Product_Link' column
merged_df = pd.merge(cleaned_df, reviews_ratings_df, on='Product_Link', how='left')

# Save the merged DataFrame to a new CSV file
merged_df.to_csv(output_file_path, index=False)

print(f"Merged file saved to {output_file_path}")

Merged file saved to C:\Users\HP\OneDrive\Documents\Desktop\Flipkart_scapping\oppo_merged.csv


In [23]:
df=pd.read_csv(r"C:\Users\HP\OneDrive\Documents\Desktop\Flipkart_scapping\oppo_merged.csv")

In [24]:
df

Unnamed: 0,Product_Name,Product_Link,Product_Price,Review,Rating
0,"OPPO RENO 13 5G (Luminous Blue, 128 GB)",https://www.flipkart.com/oppo-reno-13-5g-luminous-blue-128-gb/p/itm9cf979582f79d?pid=MOBHCTVGEHCVF3XU&lid=LSTMOBHCTVGEHCVF3XUYN9IAZ&marketplace=FLIPKART&q=mobile+phone&store=tyy%2F4io&srno=s_1_12&otracker=AS_QueryStore_HistoryAutoSuggest_1_4_na_na_na&otracker1=AS_QueryStore_HistoryAutoSuggest_1_4_na_na_na&fm=organic&iid=440096b1-0699-499c-b05d-f908bed9c615.MOBHCTVGEHCVF3XU.SEARCH&ppt=None&ppn=None&ssid=vp4s3olaf40000001757344239567&qH=37695f7554f510f0,26448,"Solid phone good touch and feel in hand, very good cameras, amazing display, stereo speakers, powerful chipset, smooth os, overall smooth and Good experience, only thing is I felt battery backup is less. Feels premium for sure.",4.4
1,"OPPO RENO 13 5G (Luminous Blue, 128 GB)",https://www.flipkart.com/oppo-reno-13-5g-luminous-blue-128-gb/p/itm9cf979582f79d?pid=MOBHCTVGEHCVF3XU&lid=LSTMOBHCTVGEHCVF3XUYN9IAZ&marketplace=FLIPKART&q=mobile+phone&store=tyy%2F4io&srno=s_1_12&otracker=AS_QueryStore_HistoryAutoSuggest_1_4_na_na_na&otracker1=AS_QueryStore_HistoryAutoSuggest_1_4_na_na_na&fm=organic&iid=440096b1-0699-499c-b05d-f908bed9c615.MOBHCTVGEHCVF3XU.SEARCH&ppt=None&ppn=None&ssid=vp4s3olaf40000001757344239567&qH=37695f7554f510f0,26448,Exceelent,5.0
2,"OPPO RENO 13 5G (Luminous Blue, 128 GB)",https://www.flipkart.com/oppo-reno-13-5g-luminous-blue-128-gb/p/itm9cf979582f79d?pid=MOBHCTVGEHCVF3XU&lid=LSTMOBHCTVGEHCVF3XUYN9IAZ&marketplace=FLIPKART&q=mobile+phone&store=tyy%2F4io&srno=s_1_12&otracker=AS_QueryStore_HistoryAutoSuggest_1_4_na_na_na&otracker1=AS_QueryStore_HistoryAutoSuggest_1_4_na_na_na&fm=organic&iid=440096b1-0699-499c-b05d-f908bed9c615.MOBHCTVGEHCVF3XU.SEARCH&ppt=None&ppn=None&ssid=vp4s3olaf40000001757344239567&qH=37695f7554f510f0,26448,Awesome under 27k. It's a balanced pakage.,5.0
3,"OPPO RENO 13 5G (Luminous Blue, 128 GB)",https://www.flipkart.com/oppo-reno-13-5g-luminous-blue-128-gb/p/itm9cf979582f79d?pid=MOBHCTVGEHCVF3XU&lid=LSTMOBHCTVGEHCVF3XUYN9IAZ&marketplace=FLIPKART&q=mobile+phone&store=tyy%2F4io&srno=s_1_12&otracker=AS_QueryStore_HistoryAutoSuggest_1_4_na_na_na&otracker1=AS_QueryStore_HistoryAutoSuggest_1_4_na_na_na&fm=organic&iid=440096b1-0699-499c-b05d-f908bed9c615.MOBHCTVGEHCVF3XU.SEARCH&ppt=None&ppn=None&ssid=vp4s3olaf40000001757344239567&qH=37695f7554f510f0,26448,Excellent Phone My choice,5.0
4,"OPPO F27 Pro+ (Dusk Pink, 256 GB)",https://www.flipkart.com/oppo-f27-pro-dusk-pink-256-gb/p/itm5235c3c836cc9?pid=MOBHF3DJ8CREZW7M&lid=LSTMOBHF3DJ8CREZW7MP8G0L1&marketplace=FLIPKART&q=mobile+phone&store=tyy%2F4io&srno=s_1_13&otracker=AS_QueryStore_HistoryAutoSuggest_1_4_na_na_na&otracker1=AS_QueryStore_HistoryAutoSuggest_1_4_na_na_na&fm=organic&iid=440096b1-0699-499c-b05d-f908bed9c615.MOBHF3DJ8CREZW7M.SEARCH&ppt=None&ppn=None&ssid=vp4s3olaf40000001757344239567&qH=37695f7554f510f0,21480,I bought this under 17k. So in this range this phone justifies the price. But if I spent more then 20k it's not a good choice right now. There are too many better options available right now.,4.4
...,...,...,...,...,...
3444,"OPPO F25 Pro 5G (Coral Purple, 128 GB)",https://www.flipkart.com/oppo-f25-pro-5g-coral-purple-128-gb/p/itm9451b9c477991?pid=MOBGZFDWBNU6AM4U&lid=LSTMOBGZFDWBNU6AM4UM8BNNO&marketplace=FLIPKART&q=mobile+phone&store=tyy%2F4io&srno=s_4_94&otracker=AS_QueryStore_HistoryAutoSuggest_1_4_na_na_na&otracker1=AS_QueryStore_HistoryAutoSuggest_1_4_na_na_na&fm=organic&iid=274484e5-46d1-41e4-adef-22143db0ad88.MOBGZFDWBNU6AM4U.SEARCH&ppt=None&ppn=None&ssid=nl3fv89s280000001757344280028&qH=37695f7554f510f0,23999,Nice,5.0
3445,"OPPO F25 Pro 5G (Coral Purple, 128 GB)",https://www.flipkart.com/oppo-f25-pro-5g-coral-purple-128-gb/p/itm9451b9c477991?pid=MOBGZFDWBNU6AM4U&lid=LSTMOBGZFDWBNU6AM4UM8BNNO&marketplace=FLIPKART&q=mobile+phone&store=tyy%2F4io&srno=s_4_94&otracker=AS_QueryStore_HistoryAutoSuggest_1_4_na_na_na&otracker1=AS_QueryStore_HistoryAutoSuggest_1_4_na_na_na&fm=organic&iid=274484e5-46d1-41e4-adef-22143db0ad88.MOBGZFDWBNU6AM4U.SEARCH&ppt=None&ppn=None&ssid=nl3fv89s280000001757344280028&qH=37695f7554f510f0,23999,Nice product under 24k .... overall good 👍,5.0
3446,"OPPO F25 Pro 5G (Coral Purple, 128 GB)",https://www.flipkart.com/oppo-f25-pro-5g-coral-purple-128-gb/p/itm9451b9c477991?pid=MOBGZFDWBNU6AM4U&lid=LSTMOBGZFDWBNU6AM4UM8BNNO&marketplace=FLIPKART&q=mobile+phone&store=tyy%2F4io&srno=s_4_94&otracker=AS_QueryStore_HistoryAutoSuggest_1_4_na_na_na&otracker1=AS_QueryStore_HistoryAutoSuggest_1_4_na_na_na&fm=organic&iid=274484e5-46d1-41e4-adef-22143db0ad88.MOBGZFDWBNU6AM4U.SEARCH&ppt=None&ppn=None&ssid=nl3fv89s280000001757344280028&qH=37695f7554f510f0,23999,Good mobile,5.0
3447,"OPPO F25 Pro 5G (Coral Purple, 128 GB)",https://www.flipkart.com/oppo-f25-pro-5g-coral-purple-128-gb/p/itm9451b9c477991?pid=MOBGZFDWBNU6AM4U&lid=LSTMOBGZFDWBNU6AM4UM8BNNO&marketplace=FLIPKART&q=mobile+phone&store=tyy%2F4io&srno=s_4_94&otracker=AS_QueryStore_HistoryAutoSuggest_1_4_na_na_na&otracker1=AS_QueryStore_HistoryAutoSuggest_1_4_na_na_na&fm=organic&iid=274484e5-46d1-41e4-adef-22143db0ad88.MOBGZFDWBNU6AM4U.SEARCH&ppt=None&ppn=None&ssid=nl3fv89s280000001757344280028&qH=37695f7554f510f0,23999,Very good product,5.0


In [29]:
import pandas as pd

# File paths
cleaned_file_path = r"C:\Users\HP\OneDrive\Documents\Desktop\Flipkart_scapping\google_cleaned.csv"
reviews_ratings_file_path = r"C:\Users\HP\OneDrive\Documents\Desktop\Flipkart_scapping\google_reviews_ratings.csv"
output_file_path = r"C:\Users\HP\OneDrive\Documents\Desktop\Flipkart_scapping\google_merged.csv"

# Read the CSV files
cleaned_df = pd.read_csv(cleaned_file_path)
reviews_ratings_df = pd.read_csv(reviews_ratings_file_path)

# Merge the DataFrames on the 'Product_Link' column
merged_df = pd.merge(cleaned_df, reviews_ratings_df, on='Product_Link', how='left')

# Save the merged DataFrame to a new CSV file
merged_df.to_csv(output_file_path, index=False)

print(f"Merged file saved to {output_file_path}")

Merged file saved to C:\Users\HP\OneDrive\Documents\Desktop\Flipkart_scapping\google_merged.csv


In [30]:
df=pd.read_csv(r"C:\Users\HP\OneDrive\Documents\Desktop\Flipkart_scapping\google_merged.csv")

In [31]:
df

Unnamed: 0,Product_Name,Product_Link,Product_Price,Review,Rating
0,"Google Pixel 7 (Lemongrass, 128 GB)",https://www.flipkart.com/google-pixel-7-lemongrass-128-gb/p/itm45d75002be0e7?pid=MOBGHW44ZSN5EPGU&lid=LSTMOBGHW44ZSN5EPGURSVVHV&marketplace=FLIPKART&q=mobile+phone&store=tyy%2F4io&srno=s_1_1&otracker=AS_QueryStore_HistoryAutoSuggest_1_4_na_na_na&otracker1=AS_QueryStore_HistoryAutoSuggest_1_4_na_na_na&fm=organic&iid=a19eba61-c14b-4918-b58f-038b60db4cdd.MOBGHW44ZSN5EPGU.SEARCH&ppt=None&ppn=None&ssid=0g3xr6atnk0000001757427391662&qH=37695f7554f510f0,30999,"The real smart phone in the market.....\nLets talk about it.....\n\nCamera: In this range two best camera in the market are pixel 7 and s22. Portarit blur is more perfect in s22 but pixel 7 is real smart in image processing. This camera doesn't have manual mode so i was lil concern but when i started i never felt any need. Oh my god the night mode is just awesome.\nA few cons of camera are 1. Wide angle is not that wide and edge detection in portrait is not good. Cinematic blur is a beta feature only.\n\nPerformance: Not the best processor in the market in this price tag but definitely the smartest processor in any price range. Not even iphone is close to its smartness. This is not a gaming phone rather one should buy this phone for AI experience.\n\nDesign: 9/10 because of its unique design. 1 minus for glossy finish, matte would have been more premium. Though built quality is very good n premium.\nThis phone never lagged after 2 weeks of experience. Very few minor bugs. Best android experience.\n\nBattery: In comparison to other flagships this is a great battery. Full day battery for casual users.Heating issue solved after update.\n*Other than this fingerprint sensor was failing frequently but it is decently fast. After software updating 80% issue is now solved.\nOther experience are very good.\n*Best Haptic engine, better than any phone.",4.3
1,"Google Pixel 7 (Lemongrass, 128 GB)",https://www.flipkart.com/google-pixel-7-lemongrass-128-gb/p/itm45d75002be0e7?pid=MOBGHW44ZSN5EPGU&lid=LSTMOBGHW44ZSN5EPGURSVVHV&marketplace=FLIPKART&q=mobile+phone&store=tyy%2F4io&srno=s_1_1&otracker=AS_QueryStore_HistoryAutoSuggest_1_4_na_na_na&otracker1=AS_QueryStore_HistoryAutoSuggest_1_4_na_na_na&fm=organic&iid=a19eba61-c14b-4918-b58f-038b60db4cdd.MOBGHW44ZSN5EPGU.SEARCH&ppt=None&ppn=None&ssid=0g3xr6atnk0000001757427391662&qH=37695f7554f510f0,30999,Night mode and Indoor clicks are awesome,4.0
2,"Google Pixel 7 (Lemongrass, 128 GB)",https://www.flipkart.com/google-pixel-7-lemongrass-128-gb/p/itm45d75002be0e7?pid=MOBGHW44ZSN5EPGU&lid=LSTMOBGHW44ZSN5EPGURSVVHV&marketplace=FLIPKART&q=mobile+phone&store=tyy%2F4io&srno=s_1_1&otracker=AS_QueryStore_HistoryAutoSuggest_1_4_na_na_na&otracker1=AS_QueryStore_HistoryAutoSuggest_1_4_na_na_na&fm=organic&iid=a19eba61-c14b-4918-b58f-038b60db4cdd.MOBGHW44ZSN5EPGU.SEARCH&ppt=None&ppn=None&ssid=0g3xr6atnk0000001757427391662&qH=37695f7554f510f0,30999,"Perfect phone, one star less because of its weight.",5.0
3,"Google Pixel 7 (Lemongrass, 128 GB)",https://www.flipkart.com/google-pixel-7-lemongrass-128-gb/p/itm45d75002be0e7?pid=MOBGHW44ZSN5EPGU&lid=LSTMOBGHW44ZSN5EPGURSVVHV&marketplace=FLIPKART&q=mobile+phone&store=tyy%2F4io&srno=s_1_1&otracker=AS_QueryStore_HistoryAutoSuggest_1_4_na_na_na&otracker1=AS_QueryStore_HistoryAutoSuggest_1_4_na_na_na&fm=organic&iid=a19eba61-c14b-4918-b58f-038b60db4cdd.MOBGHW44ZSN5EPGU.SEARCH&ppt=None&ppn=None&ssid=0g3xr6atnk0000001757427391662&qH=37695f7554f510f0,30999,"Writing this review after almost a month of use\nAmazing cameras, Excellent performance, Great display, haven't seen any bugs in OS yet, great design\nWas having a heating issue initially, temprature used to goes upto 43 Celcius. I was worried and was thinking to replace. But after a week of use everything went to normal haven't faced heating problem afterwards. Maybe because of optimization. Same goes for battery performance. Initially I was getting 4 to 4.5 hour of screen on time. But after week or 10 days of usage it started getting improved, now I almost get 5.5 to 6 hours of screen on time.\nNo other complaints so far. Highly recommended.",4.0
4,"Google Pixel 7 (Lemongrass, 128 GB)",https://www.flipkart.com/google-pixel-7-lemongrass-128-gb/p/itm45d75002be0e7?pid=MOBGHW44ZSN5EPGU&lid=LSTMOBGHW44ZSN5EPGURSVVHV&marketplace=FLIPKART&q=mobile+phone&store=tyy%2F4io&srno=s_1_1&otracker=AS_QueryStore_HistoryAutoSuggest_1_4_na_na_na&otracker1=AS_QueryStore_HistoryAutoSuggest_1_4_na_na_na&fm=organic&iid=a19eba61-c14b-4918-b58f-038b60db4cdd.MOBGHW44ZSN5EPGU.SEARCH&ppt=None&ppn=None&ssid=0g3xr6atnk0000001757427391662&qH=37695f7554f510f0,30999,"No words, simply superb camera and overall, very rich and smart. Just buy, no need to think twice.",5.0
...,...,...,...,...,...
600,"Google Pixel 7a (Coral, 128 GB)",https://www.flipkart.com/google-pixel-7a-coral-128-gb/p/itmb4d7b100b1a4d?pid=MOBGT5F26QJYZUZS&lid=LSTMOBGT5F26QJYZUZSUUTWOI&marketplace=FLIPKART&q=mobile+phone&store=tyy%2F4io&srno=s_3_49&otracker=AS_QueryStore_HistoryAutoSuggest_1_4_na_na_na&otracker1=AS_QueryStore_HistoryAutoSuggest_1_4_na_na_na&fm=search-autosuggest&iid=62172703-5582-49ef-99fb-4b23982a905c.MOBGT5F26QJYZUZS.SEARCH&ppt=sp&ppn=sp&ssid=9kxu3hoc3k0000001757427419567&qH=37695f7554f510f0,27999,"Reviewing after using it for a week!\n\nPROS\n\nCamera : 5/5 Camera is the best. It's better than iPhones. My iPhone user friends are appreciating it's camera. However, I'm yet to test the video. I bought it because of it's Camera and looks.\n\nDesign: 5/5 Excellent design and looks. iPhones are very common these days and being owner of this unique phone brings sense of pride. People just love my phone when they see it.\n\n\nCONS\n\nBattery : 4/5 Battery backup is awesome but not that great. Lasts enough just like iPhones. Charges 60% in just an hour which is great enough for me because I never charge my phone to 100%. I don't understand why people are complaining about it.\n\nFingerprint: 1/5 Fingerprint doesn't work well after recent updates to Android 14. I hope Google find the fix to this issue soon.\n\nDisplay : Not that great, I guess. Sometimes touch didn't work properly for me. I don't know if I got defective one.",5.0
601,"(Refurbished) Google Pixel 7a (Sea, 128 GB)",https://www.flipkart.com/refurbished-google-pixel-7a-sea-128-gb/p/itm87e2c4abcabd1?pid=MFRHYQAFKYUGWKB6&lid=LSTMFRHYQAFKYUGWKB6PDZI82&marketplace=FLIPKART&bu=REFURBISHED&q=mobile+phone&store=tyy%2F4io&srno=s_3_57&otracker=AS_QueryStore_HistoryAutoSuggest_1_4_na_na_na&otracker1=AS_QueryStore_HistoryAutoSuggest_1_4_na_na_na&fm=search-autosuggest&iid=62172703-5582-49ef-99fb-4b23982a905c.MFRHYQAFKYUGWKB6.SEARCH&ppt=sp&ppn=sp&ssid=9kxu3hoc3k0000001757427419567&qH=37695f7554f510f0,29499,,
602,"(Refurbished) Google Pixel 7a (Snow, 128 GB)",https://www.flipkart.com/refurbished-google-pixel-7a-snow-128-gb/p/itmf3206d1d0ea50?pid=MFRHYQAFFGTVBTXW&lid=LSTMFRHYQAFFGTVBTXWU4MFOS&marketplace=FLIPKART&bu=REFURBISHED&q=mobile+phone&store=tyy%2F4io&srno=s_3_58&otracker=AS_QueryStore_HistoryAutoSuggest_1_4_na_na_na&otracker1=AS_QueryStore_HistoryAutoSuggest_1_4_na_na_na&fm=search-autosuggest&iid=62172703-5582-49ef-99fb-4b23982a905c.MFRHYQAFFGTVBTXW.SEARCH&ppt=sp&ppn=sp&ssid=9kxu3hoc3k0000001757427419567&qH=37695f7554f510f0,29499,,
603,"(Refurbished) Google Pixel 7a (Charcoal, 128 GB)",https://www.flipkart.com/refurbished-google-pixel-7a-charcoal-128-gb/p/itmdb58de99c2a4c?pid=MFRHYQAFWGJEDF25&lid=LSTMFRHYQAFWGJEDF25MQ7F1X&marketplace=FLIPKART&bu=REFURBISHED&q=mobile+phone&store=tyy%2F4io&srno=s_3_59&otracker=AS_QueryStore_HistoryAutoSuggest_1_4_na_na_na&otracker1=AS_QueryStore_HistoryAutoSuggest_1_4_na_na_na&fm=search-autosuggest&iid=62172703-5582-49ef-99fb-4b23982a905c.MFRHYQAFWGJEDF25.SEARCH&ppt=sp&ppn=sp&ssid=9kxu3hoc3k0000001757427419567&qH=37695f7554f510f0,29499,,


In [34]:

import pandas as pd

# File paths
cleaned_file_path = r"C:\Users\HP\OneDrive\Documents\Desktop\Flipkart_scapping\honor_cleaned.csv"
reviews_ratings_file_path = r"C:\Users\HP\OneDrive\Documents\Desktop\Flipkart_scapping\honor_reviews_ratings.csv"
output_file_path = r"C:\Users\HP\OneDrive\Documents\Desktop\Flipkart_scapping\honor_merged.csv"

# Read the CSV files
cleaned_df = pd.read_csv(cleaned_file_path)
reviews_ratings_df = pd.read_csv(reviews_ratings_file_path)

# Merge the DataFrames on the 'Product_Link' column
merged_df = pd.merge(cleaned_df, reviews_ratings_df, on='Product_Link', how='left')

# Save the merged DataFrame to a new CSV file
merged_df.to_csv(output_file_path, index=False)

print(f"Merged file saved to {output_file_path}")

Merged file saved to C:\Users\HP\OneDrive\Documents\Desktop\Flipkart_scapping\honor_merged.csv


In [35]:
df=pd.read_csv(r"C:\Users\HP\OneDrive\Documents\Desktop\Flipkart_scapping\honor_merged.csv")

In [36]:
df

Unnamed: 0,Product_Name,Product_Link,Product_Price,Review,Rating
0,"Honor 200 5G (Moonlight White, 512 GB)",https://www.flipkart.com/honor-200-5g-moonlight-white-512-gb/p/itm6c73c53970f43?pid=MOBH32HGXDBGQNBT&lid=LSTMOBH32HGXDBGQNBTLU7OIM&marketplace=FLIPKART&q=mobile+phone&store=tyy%2F4io&srno=s_1_1&otracker=AS_QueryStore_HistoryAutoSuggest_1_4_na_na_na&otracker1=AS_QueryStore_HistoryAutoSuggest_1_4_na_na_na&fm=organic&iid=45733c24-1b3d-4299-b434-a9842e7e3ce8.MOBH32HGXDBGQNBT.SEARCH&ppt=None&ppn=None&ssid=pp6fyyauds0000001757426938552&qH=37695f7554f510f0,28850,Lovely camera,4.4
1,"Honor 200 5G (Moonlight White, 512 GB)",https://www.flipkart.com/honor-200-5g-moonlight-white-512-gb/p/itm6c73c53970f43?pid=MOBH32HGXDBGQNBT&lid=LSTMOBH32HGXDBGQNBTLU7OIM&marketplace=FLIPKART&q=mobile+phone&store=tyy%2F4io&srno=s_1_1&otracker=AS_QueryStore_HistoryAutoSuggest_1_4_na_na_na&otracker1=AS_QueryStore_HistoryAutoSuggest_1_4_na_na_na&fm=organic&iid=45733c24-1b3d-4299-b434-a9842e7e3ce8.MOBH32HGXDBGQNBT.SEARCH&ppt=None&ppn=None&ssid=pp6fyyauds0000001757426938552&qH=37695f7554f510f0,28850,Very good product nice quality\n\n\nVery fast delivery\n\nThanku,5.0
2,"Honor 200 5G (Moonlight White, 512 GB)",https://www.flipkart.com/honor-200-5g-moonlight-white-512-gb/p/itm6c73c53970f43?pid=MOBH32HGXDBGQNBT&lid=LSTMOBH32HGXDBGQNBTLU7OIM&marketplace=FLIPKART&q=mobile+phone&store=tyy%2F4io&srno=s_1_1&otracker=AS_QueryStore_HistoryAutoSuggest_1_4_na_na_na&otracker1=AS_QueryStore_HistoryAutoSuggest_1_4_na_na_na&fm=organic&iid=45733c24-1b3d-4299-b434-a9842e7e3ce8.MOBH32HGXDBGQNBT.SEARCH&ppt=None&ppn=None&ssid=pp6fyyauds0000001757426938552&qH=37695f7554f510f0,28850,Awesome phone! I recommend it for below 30K budget.,5.0
3,"Honor 200 5G (Moonlight White, 512 GB)",https://www.flipkart.com/honor-200-5g-moonlight-white-512-gb/p/itm6c73c53970f43?pid=MOBH32HGXDBGQNBT&lid=LSTMOBH32HGXDBGQNBTLU7OIM&marketplace=FLIPKART&q=mobile+phone&store=tyy%2F4io&srno=s_1_1&otracker=AS_QueryStore_HistoryAutoSuggest_1_4_na_na_na&otracker1=AS_QueryStore_HistoryAutoSuggest_1_4_na_na_na&fm=organic&iid=45733c24-1b3d-4299-b434-a9842e7e3ce8.MOBH32HGXDBGQNBT.SEARCH&ppt=None&ppn=None&ssid=pp6fyyauds0000001757426938552&qH=37695f7554f510f0,28850,This product is very good quality is a good product,5.0
4,"Honor 200 5G (Black, 512 GB)",https://www.flipkart.com/honor-200-5g-black-512-gb/p/itm6c73c53970f43?pid=MOBH34RU4S8Q5CBJ&lid=LSTMOBH34RU4S8Q5CBJI5PQRX&marketplace=FLIPKART&q=mobile+phone&store=tyy%2F4io&srno=s_1_2&otracker=AS_QueryStore_HistoryAutoSuggest_1_4_na_na_na&otracker1=AS_QueryStore_HistoryAutoSuggest_1_4_na_na_na&fm=organic&iid=45733c24-1b3d-4299-b434-a9842e7e3ce8.MOBH34RU4S8Q5CBJ.SEARCH&ppt=None&ppn=None&ssid=pp6fyyauds0000001757426938552&qH=37695f7554f510f0,28899,Lovely camera,4.4
...,...,...,...,...,...
1338,"Honor 9N (Sapphire Blue, 64 GB)",https://www.flipkart.com/honor-9n-sapphire-blue-64-gb/p/itmf9pgsehv6nmss?pid=MOBF6NARU2TTTXBG&lid=LSTMOBF6NARU2TTTXBGDWWCIJ&marketplace=FLIPKART&q=mobile+phone&store=tyy%2F4io&srno=s_2_40&otracker=AS_QueryStore_HistoryAutoSuggest_1_4_na_na_na&otracker1=AS_QueryStore_HistoryAutoSuggest_1_4_na_na_na&fm=search-autosuggest&iid=ff6c29a9-1295-4c1e-a7fe-71128fb6681f.MOBF6NARU2TTTXBG.SEARCH&ppt=sp&ppn=sp&ssid=cmjbmuh4ps0000001757426957342&qH=37695f7554f510f0,15999,Nice peice from huwaei...,5.0
1339,"Honor 9N (Sapphire Blue, 64 GB)",https://www.flipkart.com/honor-9n-sapphire-blue-64-gb/p/itmf9pgsehv6nmss?pid=MOBF6NARU2TTTXBG&lid=LSTMOBF6NARU2TTTXBGDWWCIJ&marketplace=FLIPKART&q=mobile+phone&store=tyy%2F4io&srno=s_2_40&otracker=AS_QueryStore_HistoryAutoSuggest_1_4_na_na_na&otracker1=AS_QueryStore_HistoryAutoSuggest_1_4_na_na_na&fm=search-autosuggest&iid=ff6c29a9-1295-4c1e-a7fe-71128fb6681f.MOBF6NARU2TTTXBG.SEARCH&ppt=sp&ppn=sp&ssid=cmjbmuh4ps0000001757426957342&qH=37695f7554f510f0,15999,Front camera..... 4/5 Rear camera...... 3.5/5 battery...... 3.5/5 software.... 3.7/5 display.... 4.6/5 Rear camera should be more better. Some times found software float like freezing for 1 or 2 seconds.... Processing rate should be little more in this price rang... And hybrid sim slot why?... Battery should be around 4000 mah in this price range.... Only single sim 4g are you kidding Honor? Good for mid level gaming because this phone is not for high level gaming...... Expect this all over experience is good... I like this phone Honor,4.0
1340,"Honor 9N (Sapphire Blue, 64 GB)",https://www.flipkart.com/honor-9n-sapphire-blue-64-gb/p/itmf9pgsehv6nmss?pid=MOBF6NARU2TTTXBG&lid=LSTMOBF6NARU2TTTXBGDWWCIJ&marketplace=FLIPKART&q=mobile+phone&store=tyy%2F4io&srno=s_2_40&otracker=AS_QueryStore_HistoryAutoSuggest_1_4_na_na_na&otracker1=AS_QueryStore_HistoryAutoSuggest_1_4_na_na_na&fm=search-autosuggest&iid=ff6c29a9-1295-4c1e-a7fe-71128fb6681f.MOBF6NARU2TTTXBG.SEARCH&ppt=sp&ppn=sp&ssid=cmjbmuh4ps0000001757426957342&qH=37695f7554f510f0,15999,awesome mobile...i like it.\nmust buy,4.0
1341,"Honor 9N (Sapphire Blue, 64 GB)",https://www.flipkart.com/honor-9n-sapphire-blue-64-gb/p/itmf9pgsehv6nmss?pid=MOBF6NARU2TTTXBG&lid=LSTMOBF6NARU2TTTXBGDWWCIJ&marketplace=FLIPKART&q=mobile+phone&store=tyy%2F4io&srno=s_2_40&otracker=AS_QueryStore_HistoryAutoSuggest_1_4_na_na_na&otracker1=AS_QueryStore_HistoryAutoSuggest_1_4_na_na_na&fm=search-autosuggest&iid=ff6c29a9-1295-4c1e-a7fe-71128fb6681f.MOBF6NARU2TTTXBG.SEARCH&ppt=sp&ppn=sp&ssid=cmjbmuh4ps0000001757426957342&qH=37695f7554f510f0,15999,Delivered on next day of booking.... Wow good choice for 14k....,5.0


In [37]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1343 entries, 0 to 1342
Data columns (total 5 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   Product_Name   1343 non-null   object 
 1   Product_Link   1343 non-null   object 
 2   Product_Price  1343 non-null   int64  
 3   Review         1335 non-null   object 
 4   Rating         1335 non-null   float64
dtypes: float64(1), int64(1), object(3)
memory usage: 52.6+ KB


In [38]:
df.isnull().sum()

Product_Name     0
Product_Link     0
Product_Price    0
Review           8
Rating           8
dtype: int64

In [39]:
# Set options to display more characters
pd.set_option('display.max_colwidth', None)