In [None]:
# Install dependencies in Colab
!apt-get update
!apt-get install -y chromium-chromedriver
!pip install selenium
!cp /usr/lib/chromium-browser/chromedriver /usr/bin

# Import required libraries
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import WebDriverException, TimeoutException
from bs4 import BeautifulSoup
import pandas as pd
import time
import csv
import os
import tempfile
from selenium.webdriver.chrome.options import Options
from google.colab import files

def setup_driver():
    """Initialize Chrome WebDriver with options for Google Colab"""
    options = Options()
    options.add_argument('--headless')  # Run in headless mode
    options.add_argument('--no-sandbox')  # Required for Colab
    options.add_argument('--disable-dev-shm-usage')  # Avoid shared memory issues
    options.add_argument('--disable-gpu')  # Disable GPU in headless mode

    temp_dir = tempfile.mkdtemp()
    options.add_argument(f'--user-data-dir={temp_dir}')

    try:
        driver = webdriver.Chrome(options=options)
        return driver
    except Exception as e:
        print(f"Error setting up WebDriver: {e}")
        return None

def get_price(url, tag, class_name, wait_selector=None):
    """Scrape price from a given URL using the specified tag and class"""
    driver = setup_driver()
    if not driver:
        return "Error: Driver not initialized"

    try:
        driver.get(url)
        if wait_selector:
            wait_time = 10 if 'amazon' in url.lower() else 5
            WebDriverWait(driver, wait_time).until(
                EC.presence_of_element_located((By.CLASS_NAME, wait_selector))
            )
        else:
            time.sleep(2.5)

        soup = BeautifulSoup(driver.page_source, 'html.parser')
        price_element = soup.find(tag, class_=class_name)

        if price_element:
            price = price_element.text.strip()
            price = ''.join(filter(str.isdigit, price))
            return price if price else "Not Available"
        else:
            print(f"Could not find {tag} with class '{class_name}' at {url}")
            if 'amazon' in url.lower():
                for span in soup.find_all('span'):
                    if '₹' in span.text and span.find('span', class_='a-price-whole'):
                        price = span.find('span', class_='a-price-whole').text.strip()
                        return ''.join(filter(str.isdigit, price))
            return "Not Available"

    except TimeoutException:
        print(f"Timeout waiting for element at {url}")
        return "Timeout"
    except Exception as e:
        print(f"Error scraping {url}: {e}")
        return "Error"
    finally:
        driver.quit()

def get_site_config(url):
    """Return scraping configuration based on website"""
    if 'zeptonow' in url.lower():
        return {
            'tag': 'span',
            'class': 'text-[32px] font-medium leading-[30px] text-[#262A33]',
            'wait': None,
            'name': 'Zepto'
        }
    elif 'flipkart' in url.lower():
        return {
            'tag': 'div',
            'class': 'Nx9bqj',
            'wait': 'Nx9bqj',
            'name': 'Flipkart'
        }
    elif 'amazon' in url.lower():
        return {
            'tag': 'span',
            'class': 'a-price-whole',
            'wait': 'a-price-whole',
            'name': 'Amazon'
        }
    return None

def load_products_from_excel(file_path):
    """Load product data from an Excel file"""
    try:
        df = pd.read_excel(file_path)
        # Replace NaN with None for missing links
        df = df.where(pd.notnull(df), None)

        products = [
            {
                'name': row['Product name'],
                'links': {
                    'Zepto': row['Zepto link'],
                    'Flipkart': row['Flipkart link'],
                    'Amazon': row['Amazon link']
                }
            } for _, row in df.iterrows()
        ]
        return products
    except FileNotFoundError:
        print(f"Error: File '{file_path}' not found.")
        return []
    except Exception as e:
        print(f"Error loading Excel file: {e}")
        return []

def main():
    print("Please upload your 'Crawler.xlsx' file:")
    uploaded = files.upload()
    excel_file = 'Crawler.xlsx'  # Ensure this matches the uploaded file name

    products = load_products_from_excel(excel_file)

    if not products:
        print("No products loaded. Exiting.")
        return

    # Collect price data
    price_data = []
    for product in products:
        product_prices = {'Product Name': product['name']}
        print(f"\nScraping prices for: {product['name']}")

        for site_name, url in product['links'].items():
            if url:
                config = get_site_config(url)
                if config:
                    price = get_price(url, config['tag'], config['class'], config['wait'])
                    product_prices[f"{site_name} Price (INR)"] = price
                    print(f"{site_name}: ₹{price}")
                else:
                    product_prices[f"{site_name} Price (INR)"] = "Invalid URL"
            else:
                product_prices[f"{site_name} Price (INR)"] = "Not Available"

        price_data.append(product_prices)

    # Save to CSV
    csv_filename = 'product_price_comparison.csv'
    fieldnames = ['Product Name', 'Zepto Price (INR)', 'Flipkart Price (INR)', 'Amazon Price (INR)']

    with open(csv_filename, 'w', newline='', encoding='utf-8') as csvfile:
        writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
        writer.writeheader()
        for data in price_data:
            writer.writerow(data)

    print(f"\nPrice comparison saved to {csv_filename}")

    files.download(csv_filename)

if __name__ == "__main__":
    main()

Hit:1 http://archive.ubuntu.com/ubuntu jammy InRelease
Get:2 http://archive.ubuntu.com/ubuntu jammy-updates InRelease [128 kB]
Get:3 http://security.ubuntu.com/ubuntu jammy-security InRelease [129 kB]
Get:4 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64  InRelease [1,581 B]
Get:5 http://archive.ubuntu.com/ubuntu jammy-backports InRelease [127 kB]
Get:6 https://r2u.stat.illinois.edu/ubuntu jammy InRelease [6,555 B]
Get:7 https://cloud.r-project.org/bin/linux/ubuntu jammy-cran40/ InRelease [3,632 B]
Hit:8 https://ppa.launchpadcontent.net/deadsnakes/ppa/ubuntu jammy InRelease
Hit:9 https://ppa.launchpadcontent.net/graphics-drivers/ppa/ubuntu jammy InRelease
Hit:10 https://ppa.launchpadcontent.net/ubuntugis/ppa/ubuntu jammy InRelease
Get:11 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64  Packages [1,381 kB]
Get:12 http://archive.ubuntu.com/ubuntu jammy-updates/universe amd64 Packages [1,540 kB]
Get:13 http://archive.ubuntu.com/ubuntu 

Saving Crawler.xlsx to Crawler.xlsx

Scraping prices for: Poco C61 | 4GB RAM | 64GB ROM | 8MP Camera | 5000mAh Battery | Diamond Dust Black
Zepto: ₹6158
Flipkart: ₹7530
Amazon: ₹5799

Scraping prices for: vivo Y18i (Gem Green, 4GB RAM, 64GB Storage) without charger
Zepto: ₹7499
Flipkart: ₹8749
Amazon: ₹9989

Scraping prices for: vivo Y18i (Space Black, 64 GB)  (4 GB RAM)
Zepto: ₹7499
Flipkart: ₹8749

Scraping prices for: Poco C75 | 4GB RAM | 64GB ROM| 50MP Camera | 5160 mAh Battery | Aqua Bliss
Zepto: ₹7999
Flipkart: ₹7999
Amazon: ₹5799

Scraping prices for: Poco C75 | 4GB RAM | 64GB ROM| 50MP Camera | 5160 mAh Battery | Silver Stardust
Flipkart: ₹7999
Amazon: ₹5799

Scraping prices for: Redmi 14C 5G | Stargaze Black | 4GB RAM | 64GB ROM
Zepto: ₹9499
Flipkart: ₹9499
Amazon: ₹9499

Scraping prices for: Redmi 13 5G | Black Diamond | 6GB RAM | 128GB ROM
Zepto: ₹12499
Flipkart: ₹12310

Scraping prices for: vivo Y29 5G | 4GB RAM | 128 GB | Diamond Black
Zepto: ₹13999
Flipkart: ₹13999

Scrap

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>