# Extracting Data For 5 Pages

In [None]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time
import pandas as pd

# Set up Chrome WebDriver
chromedriver_path = "C:/Users/mypc/Desktop/chromedriver.exe"
service = Service(chromedriver_path)
driver = webdriver.Chrome(service=service)

# Define the URL
url = "https://www.91mobiles.com/laptopfinder.php"

# Load the webpage
driver.get(url)
time.sleep(5)  # Wait for the page to load

# Click the "Available In Stores" checkbox if not already checked
try:
    available_in_stores_checkbox = WebDriverWait(driver, 10).until(
        EC.element_to_be_clickable((By.XPATH, '//label[contains(text(), "Available In Stores")]'))
    )
    available_in_stores_checkbox.click()
    time.sleep(3)  # Wait for the page to update
except Exception as e:
    print("Checkbox not found or already selected:", e)

# Function to extract laptop data from a page
def extract_data_from_page(driver):
    laptops = []
    results = driver.find_elements(By.CLASS_NAME, 'finder_snipet_wrap')
    
    for result in results:
        try:
            name = result.find_element(By.CLASS_NAME, 'name').text
            price = result.find_element(By.CLASS_NAME, 'price').text
            specs = result.find_elements(By.CLASS_NAME, 'filter-list-text')
            
            performance = specs[0].text if len(specs) > 0 else "N/A"
            design = specs[1].text if len(specs) > 1 else "N/A"
            storage = specs[2].text if len(specs) > 2 else "N/A"
            battery = specs[3].text if len(specs) > 3 else "N/A"

            laptops.append({
                'Name': name,
                'Price': price,
                'Performance': performance,
                'Design': design,
                'Storage': storage,
                'Battery': battery
            })
        except Exception as e:
            print("Error extracting data:", e)
    
    return laptops

# Iterate through the first 5 pages
all_laptops = []
for i in range(1, 6):
    print(f"Extracting data from page {i}")
    all_laptops.extend(extract_data_from_page(driver))
    try:
        next_button = WebDriverWait(driver, 10).until(
            EC.element_to_be_clickable((By.XPATH, '//span[contains(text(), "Next")]'))
        )
        driver.execute_script("arguments[0].click();", next_button)
        time.sleep(5)  # Wait for the next page to load
    except Exception as e:
        print("Next button not found or error clicking:", e)
        break

# Convert the data to a DataFrame
df = pd.DataFrame(all_laptops)

# Save to CSV
df.to_csv('laptops_data.csv', index=False)

# Close the driver
driver.quit()


# Extracting Data For Whole Pages

In [19]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time
import pandas as pd

# Set up Chrome WebDriver in headless mode
chrome_options = webdriver.ChromeOptions()
chrome_options.add_argument("--headless")
chrome_options.add_argument("--no-sandbox")
chrome_options.add_argument("--disable-dev-shm-usage")
chromedriver_path = "C:/Users/mypc/Desktop/chromedriver.exe"
service = Service(chromedriver_path)
driver = webdriver.Chrome(service=service, options=chrome_options)

# Define the URL
url = "https://www.91mobiles.com/laptopfinder.php"

# Load the webpage
driver.get(url)
time.sleep(5)  # Wait for the page to load

# Click the "Available In Stores" checkbox if not already checked
try:
    available_in_stores_checkbox = WebDriverWait(driver, 10).until(
         EC.element_to_be_clickable((By.XPATH, '//span[text()="Available In Stores"]'))
    )
    available_in_stores_checkbox.click()
    time.sleep(3)  # Wait for the page to update
except Exception as e:
    print("Checkbox not found or already selected:", e)

# Function to extract laptop data from a page
def extract_data_from_page(driver):
    laptops = []
    results = driver.find_elements(By.CLASS_NAME, 'finder_snipet_wrap')
    
    for result in results:
        try:
            name = result.find_element(By.CLASS_NAME, 'name').text
            price = result.find_element(By.CLASS_NAME, 'price').text
            specs = result.find_elements(By.CLASS_NAME, 'filter-list-text')

            performance = specs[0].text if len(specs) > 0 else "N/A"
            design = specs[1].text if len(specs) > 1 else "N/A"
            storage = specs[2].text if len(specs) > 2 else "N/A"
            battery = specs[3].text if len(specs) > 3 else "N/A"

            laptops.append({
                'Name': name,
                'Price': price,
                'Performance': performance,
                'Design': design,
                'Storage': storage,
                'Battery': battery
            })
        except Exception as e:
            print("Error extracting data:", e)
    
    return laptops

# Iterate through all pages until "Next" button is not found
all_laptops = []
page_number = 1
while True:
    print(f"Extracting data from page {page_number}")
    all_laptops.extend(extract_data_from_page(driver))
    try:
        next_button = WebDriverWait(driver, 10).until(
            EC.element_to_be_clickable((By.XPATH, '//span[contains(text(), "Next")]'))
        )
        driver.execute_script("arguments[0].click();", next_button)
        time.sleep(5)  # Wait for the next page to load
        page_number += 1
    except Exception as e:
        print("Next button not found or no more pages:", e)
        break

# Convert the data to a DataFrame
df = pd.DataFrame(all_laptops)

# Save to CSV
df.to_csv('laptops_data.csv', index=False)

# Close the driver
driver.quit()


Extracting data from page 1
Extracting data from page 2
Extracting data from page 3
Extracting data from page 4
Extracting data from page 5
Extracting data from page 6
Extracting data from page 7
Extracting data from page 8
Extracting data from page 9
Extracting data from page 10
Extracting data from page 11
Extracting data from page 12
Extracting data from page 13
Extracting data from page 14
Extracting data from page 15
Extracting data from page 16
Extracting data from page 17
Extracting data from page 18
Extracting data from page 19
Extracting data from page 20
Extracting data from page 21
Extracting data from page 22
Extracting data from page 23
Extracting data from page 24
Extracting data from page 25
Extracting data from page 26
Extracting data from page 27
Extracting data from page 28
Extracting data from page 29
Extracting data from page 30
Extracting data from page 31
Extracting data from page 32
Extracting data from page 33
Extracting data from page 34
Extracting data from pa

# Extension code for more elements

In [29]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import pandas as pd
import time

# Function to initialize the WebDriver
def init_driver():
    chrome_driver_path = "C:/Users/mypc/Desktop/chromedriver.exe"
    service = Service(chrome_driver_path)
    
    # Set Chrome options to handle notifications and run in headless mode
    options = webdriver.ChromeOptions()
    options.add_argument('--headless')
    options.add_argument('--disable-gpu')
    options.add_argument('--no-sandbox')
    prefs = {"profile.default_content_setting_values.notifications": 2}  # Block notifications
    options.add_experimental_option("prefs", prefs)
    
    driver = webdriver.Chrome(service=service, options=options)
    return driver

driver = init_driver()

# Navigate to the website
driver.get("https://www.91mobiles.com/laptopfinder.php")

# Wait for the page to load and click on the "Available In Stores" checkbox if it's not already selected
try:
    available_in_stores_checkbox = WebDriverWait(driver, 10).until(
        EC.element_to_be_clickable((By.XPATH, '//span[text()="Available In Stores"]'))
    )
    available_in_stores_checkbox.click()
    time.sleep(2)  # Wait for the page to reload
except Exception as e:
    print("Checkbox not found or already selected:", e)

# Function to extract data from the current page
def extract_data_from_page():
    laptops = driver.find_elements(By.CSS_SELECTOR, ".finder_snipet_wrap")
    data = []
    for laptop in laptops:
        try:
            name = laptop.find_element(By.CSS_SELECTOR, "a.hover_blue_link").text
            price = laptop.find_element(By.CSS_SELECTOR, ".price").text.replace('Rs.', '').strip()
            try:
                spec_score = laptop.find_element(By.CSS_SELECTOR, ".rating_box_new_list").text.replace('%', '').strip()
            except:
                spec_score = "N/A"
            
            title_link = laptop.find_element(By.CSS_SELECTOR, "a.hover_blue_link")
            driver.execute_script("arguments[0].click();", title_link)

            driver.switch_to.window(driver.window_handles[1])

            # Extract specifications and overall rating from the new window
            specs = {
                "Processor": "Processor",
                "Operating System": "Operating System",
                "SSD Capacity": "SSD Capacity",
                "RAM Type": "RAM type",
                "Graphics Processor": "Graphic Processor",
                "Display Size": "Display Size",
                "Display Resolution": "Display Resolution",
                "Capacity": "Capacity",
                "Aspect Ratio": "Aspect Ratio",
                "Overall Rating": "Overall Rating",
                "Battery Cell": "Battery Cell",
                "Battery type": "Battery type",
                "Power Supply": "Power Supply",
                "Weight": "Weight",
                "Touchscreen": "Touchscreen"
            }

            for spec_key, spec_name in specs.items():
                try:
                    if spec_key == "Overall Rating":
                        specs[spec_key] = driver.find_element(By.CSS_SELECTOR, "span.ratpt").text.split("/")[0].strip()
                    else:
                        specs[spec_key] = driver.find_element(By.XPATH, f"//td[text()='{spec_name}']/following-sibling::td").text.strip()
                except Exception as e:
                    specs[spec_key] = "N/A"

            data.append([name, price, spec_score] + list(specs.values()))

            driver.close()
            driver.switch_to.window(driver.window_handles[0])
        except Exception as e:
            print(f"Error extracting data: {e}")
            try:
                driver.close()
                driver.switch_to.window(driver.window_handles[0])
            except:
                pass
    return data

# List to store all the extracted data
all_data = []

# Extract data from the first 5 pages
for page in range(1, 4):
    print(f"Extracting data from page {page}")
    all_data.extend(extract_data_from_page())
    try:
        next_button = WebDriverWait(driver, 10).until(
            EC.element_to_be_clickable((By.XPATH, "//span[@class='list-bttnn' and contains(text(), 'Next')]"))
        )
        driver.execute_script("arguments[0].click();", next_button)
        time.sleep(2)  # Wait for the next page to load
    except Exception as e:
        print("Next button not found or no more pages:", e)
        break

# Create a DataFrame and save the data to a CSV file
columns = ["Name", "Price", "Spec Score", "Processor", "Operating System", "SSD Capacity", "RAM Type", "Graphics Processor", "Display Size", "Display Resolution", "Capacity", "Aspect Ratio", "Overall Rating", "Battery Cell", "Battery type", "Power Supply", "Weight", "Touchscreen"]
df = pd.DataFrame(all_data, columns=columns)
df.to_csv("laptops_data.csv", index=False)

# Close the browser
driver.quit()

print("Data extraction completed and saved to laptops_data.csv")


Extracting data from page 1
Extracting data from page 2
Extracting data from page 3
Data extraction completed and saved to laptops_data.csv


In [2]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import pandas as pd
import time

# Function to initialize the WebDriver
def init_driver():
    chrome_driver_path = "C:/Users/mypc/Desktop/chromedriver.exe"
    service = Service(chrome_driver_path)
    
    # Set Chrome options to handle notifications and run in headless mode
    options = webdriver.ChromeOptions()
    options.add_argument('--headless')
    options.add_argument('--disable-gpu')
    options.add_argument('--no-sandbox')
    prefs = {"profile.default_content_setting_values.notifications": 2}  # Block notifications
    options.add_experimental_option("prefs", prefs)
    
    driver = webdriver.Chrome(service=service, options=options)
    return driver

driver = init_driver()

# Navigate to the website
driver.get("https://www.91mobiles.com/laptopfinder.php")

# Wait for the page to load and click on the "Available In Stores" checkbox if it's not already selected
try:
    available_in_stores_checkbox = WebDriverWait(driver, 10).until(
        EC.element_to_be_clickable((By.XPATH, '//span[text()="Available In Stores"]'))
    )
    available_in_stores_checkbox.click()
    time.sleep(2)  # Wait for the page to reload
except Exception as e:
    print("Checkbox not found or already selected:", e)

# Function to extract data from the current page
def extract_data_from_page():
    laptops = driver.find_elements(By.CSS_SELECTOR, ".finder_snipet_wrap")
    data = []
    for laptop in laptops:
        try:
            name = laptop.find_element(By.CSS_SELECTOR, "a.hover_blue_link").text
            price = laptop.find_element(By.CSS_SELECTOR, ".price").text.replace('Rs.', '').strip()
            try:
                spec_score = laptop.find_element(By.CSS_SELECTOR, ".rating_box_new_list").text.replace('%', '').strip()
            except:
                spec_score = "N/A"
            
            title_link = laptop.find_element(By.CSS_SELECTOR, "a.hover_blue_link")
            driver.execute_script("arguments[0].click();", title_link)

            driver.switch_to.window(driver.window_handles[1])

            # Extract specifications and overall rating from the new window
            specs = {
                "Processor": "Processor",
                "Clock-speed": "Clock-speed",
                "Operating System": "Operating System",
                "SSD Capacity": "SSD Capacity",
                "RAM Type": "RAM type",
                "Graphics Processor": "Graphic Processor",
                "Display Size": "Display Size",
                "Display Resolution": "Display Resolution",
                "Capacity": "Capacity",
                "Aspect Ratio": "Aspect Ratio",
                "Overall Rating": "Overall Rating",
                "Battery Cell": "Battery Cell",
                "Battery type": "Battery type",
                "Power Supply": "Power Supply",
                "Weight": "Weight",
                "Touchscreen": "Touchscreen"
            }

            for spec_key, spec_name in specs.items():
                try:
                    if spec_key == "Overall Rating":
                        specs[spec_key] = driver.find_element(By.CSS_SELECTOR, "span.ratpt").text.split("/")[0].strip()
                    else:
                        specs[spec_key] = driver.find_element(By.XPATH, f"//td[text()='{spec_name}']/following-sibling::td").text.strip()
                except Exception as e:
                    specs[spec_key] = "N/A"

            data.append([name, price, spec_score] + list(specs.values()))

            driver.close()
            driver.switch_to.window(driver.window_handles[0])
        except Exception as e:
            print(f"Error extracting data: {e}")
            try:
                driver.close()
                driver.switch_to.window(driver.window_handles[0])
            except:
                pass
    return data

# List to store all the extracted data
all_data = []

# Start the timer
start_time = time.time()

# Extract data from the first 5 pages
for page in range(1, 4):
    print(f"Extracting data from page {page}")
    all_data.extend(extract_data_from_page())
    try:
        next_button = WebDriverWait(driver, 10).until(
            EC.element_to_be_clickable((By.XPATH, "//span[@class='list-bttnn' and contains(text(), 'Next')]"))
        )
        driver.execute_script("arguments[0].click();", next_button)
        time.sleep(2)  # Wait for the next page to load
    except Exception as e:
        print("Next button not found or no more pages:", e)
        break

# Stop the timer
end_time = time.time()
elapsed_time = end_time - start_time
print(f"Total time taken for data extraction: {elapsed_time} seconds")

# Create a DataFrame and save the data to a CSV file
columns = ["Name", "Price", "Spec Score", "Processor", "Clock-speed", "Operating System", "SSD Capacity", "RAM Type", "Graphics Processor", "Display Size", "Display Resolution", "Capacity", "Aspect Ratio", "Overall Rating", "Battery Cell", "Battery type", "Power Supply", "Weight", "Touchscreen"]
df = pd.DataFrame(all_data, columns=columns)
df.to_csv("laptops_data.csv", index=False)

# Close the browser
driver.quit()

print("Data extraction completed and saved to laptops_data.csv")


Extracting data from page 1
Extracting data from page 2
Extracting data from page 3
Total time taken for data extraction: 167.08781242370605 seconds
Data extraction completed and saved to laptops_data.csv


In [None]:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import pandas as pd
import time

# Function to initialize the WebDriver
def init_driver():
    chrome_driver_path = "C:/Users/mypc/Desktop/chromedriver.exe"  # Adjust the path to where your ChromeDriver is located
    service = Service(chrome_driver_path)
    options = webdriver.ChromeOptions()
    options.add_argument('--headless')
    options.add_argument('--disable-gpu')
    options.add_argument('--no-sandbox')
    driver = webdriver.Chrome(service=service, options=options)
    return driver

driver = init_driver()

# Navigate to the website
driver.get("https://www.91mobiles.com/laptopfinder.php")

# Wait for the page to load and click on the "Available In Stores" checkbox if it's not already selected
try:
    available_in_stores_checkbox = WebDriverWait(driver, 10).until(
        EC.element_to_be_clickable((By.XPATH, '//span[text()="Available In Stores"]'))
    )
    available_in_stores_checkbox.click()
    time.sleep(2)  # Wait for the page to reload
except Exception as e:
    print("Checkbox not found or already selected:", e)

# Function to extract data from the current page
def extract_data_from_page(driver, existing_urls):
    laptops = driver.find_elements(By.CSS_SELECTOR, ".finder_snipet_wrap")
    data = []
    new_urls = set()
    for laptop in laptops:
        try:
            name = laptop.find_element(By.CSS_SELECTOR, "a.hover_blue_link").text
            price = laptop.find_element(By.CSS_SELECTOR, ".price").text.replace('Rs.', '').strip()
            try:
                spec_score = laptop.find_element(By.CSS_SELECTOR, ".rating_box_new_list").text.replace('%', '').strip()
            except:
                spec_score = "N/A"
            
            link = laptop.find_element(By.CSS_SELECTOR, "a.hover_blue_link").get_attribute("href")
            if link in existing_urls:
                continue  # Skip duplicate entries
            new_urls.add(link)
            
            driver.execute_script("window.open('');")
            driver.switch_to.window(driver.window_handles[1])
            driver.get(link)

            specs = {
                "Processor": "Processor",
                "Clock-speed": "Clock-speed",
                "Operating System": "Operating System",
                "SSD Capacity": "SSD Capacity",
                "RAM Type": "RAM type",
                "Graphics Processor": "Graphic Processor",
                "Display Size": "Display Size",
                "Display Resolution": "Display Resolution",
                "Capacity": "Capacity",
                "Aspect Ratio": "Aspect Ratio",
                "Overall Rating": "Overall Rating",
                "Battery Cell": "Battery Cell",
                "Battery type": "Battery type",
                "Power Supply": "Power Supply",
                "Weight": "Weight",
                "Touchscreen": "Touchscreen",
                "Colour(s)": "Colour(s)",
                "Display Features": "Display Features"
            }

            spec_values = []
            for spec_key, spec_name in specs.items():
                try:
                    if spec_key == "Overall Rating":
                        value = driver.find_element(By.CSS_SELECTOR, "span.ratpt").text.split("/")[0].strip()
                    else:
                        value = driver.find_element(By.XPATH, f"//td[text()='{spec_name}']/following-sibling::td").text.strip()
                except Exception as e:
                    value = "N/A"
                spec_values.append(value)

            data.append([name, price, spec_score] + spec_values)
            driver.close()
            driver.switch_to.window(driver.window_handles[0])
        except Exception as e:
            print(f"Error extracting data: {e}")
    return data, new_urls

# List to store all the extracted data and URLs to avoid duplicates
all_data = []
all_urls = set()

# Start the timer
start_time = time.time()

page_num = 1

# Extract data from all pages until the "Next" button disappears
while True:
    print(f"Extracting data from page {page_num}")
    page_data, new_urls = extract_data_from_page(driver, all_urls)
    all_data.extend(page_data)
    all_urls.update(new_urls)
    print(f"Completed extracting data from page {page_num}")
    page_num += 1
    try:
        next_button = WebDriverWait(driver, 10).until(
            EC.element_to_be_clickable((By.XPATH, "//span[@class='list-bttnn' and contains(text(), 'Next')]"))
        )
        driver.execute_script("arguments[0].click();", next_button)
        time.sleep(5)  # Wait for the next page to load
    except Exception as e:
        print("Next button not found or no more pages:", e)
        break

# Stop the timer
end_time = time.time()
elapsed_time = end_time - start_time
print(f"Total time taken for data extraction: {elapsed_time} seconds")

# Create a DataFrame and save the data to a CSV file
columns = ["Name", "Price", "Spec Score", "Processor", "Clock-speed", "Operating System", "SSD Capacity", "RAM Type", "Graphics Processor", "Display Size", "Display Resolution", "Capacity", "Aspect Ratio", "Overall Rating", "Battery Cell", "Battery type", "Power Supply", "Weight", "Touchscreen", "Colour(s)", "Display Features"]
df = pd.DataFrame(all_data, columns=columns)
df.to_csv("laptops_data.csv", index=False)

# Close the browser
driver.quit()

print("Data extraction completed and saved to laptops_data.csv")


Extracting data from page 1
Completed extracting data from page 1
Extracting data from page 2
Completed extracting data from page 2
Extracting data from page 3
Completed extracting data from page 3
Extracting data from page 4
Completed extracting data from page 4
Extracting data from page 5
Completed extracting data from page 5
Extracting data from page 6


# Final Code to Extract data from all pages

In [None]:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import pandas as pd
import time

# Function to initialize the WebDriver
def init_driver():
    chrome_driver_path = "C:/Users/mypc/Desktop/chromedriver.exe"  # Adjust the path to where your ChromeDriver is located
    service = Service(chrome_driver_path)
    options = webdriver.ChromeOptions()
    options.add_argument('--headless')
    options.add_argument('--disable-gpu')
    options.add_argument('--no-sandbox')
    driver = webdriver.Chrome(service=service, options=options)
    return driver

driver = init_driver()

# Navigate to the website
driver.get("https://www.91mobiles.com/laptopfinder.php")

# Confirm checkbox is clicked and get the number of items
def confirm_checkbox_and_get_items(driver):
    try:
        available_in_stores_checkbox = WebDriverWait(driver, 10).until(
            EC.element_to_be_clickable((By.XPATH, '//span[text()="Available In Stores"]'))
        )
        label_element = available_in_stores_checkbox.find_element(By.XPATH, '..//label')
        items_text = label_element.text
        items_count = int(items_text.strip('()'))
        
        if not available_in_stores_checkbox.is_selected():
            available_in_stores_checkbox.click()
            time.sleep(2)  # Wait for the page to reload

        print(f"Checkbox clicked. Number of items available: {items_count}")
        return items_count
    except Exception as e:
        print("Checkbox not found or already selected:", e)
        return None

items_count = confirm_checkbox_and_get_items(driver)

# Function to extract data from the current page
def extract_data_from_page(driver, existing_urls):
    laptops = driver.find_elements(By.CSS_SELECTOR, ".finder_snipet_wrap")
    data = []
    new_urls = set()
    for laptop in laptops:
        try:
            name = laptop.find_element(By.CSS_SELECTOR, "a.hover_blue_link").text
            price = laptop.find_element(By.CSS_SELECTOR, ".price").text.replace('Rs.', '').strip()
            try:
                spec_score = laptop.find_element(By.CSS_SELECTOR, ".rating_box_new_list").text.replace('%', '').strip()
            except:
                spec_score = "N/A"
            
            link = laptop.find_element(By.CSS_SELECTOR, "a.hover_blue_link").get_attribute("href")
            if link in existing_urls:
                continue  # Skip duplicate entries
            new_urls.add(link)
            
            driver.execute_script("window.open('');")
            driver.switch_to.window(driver.window_handles[1])
            driver.get(link)

            specs = {
                "Processor": "Processor",
                "Clock-speed": "Clock-speed",
                "Operating System": "Operating System",
                "SSD Capacity": "SSD Capacity",
                "RAM Type": "RAM type",
                "Graphics Processor": "Graphic Processor",
                "Display Size": "Display Size",
                "Display Resolution": "Display Resolution",
                "Capacity": "Capacity",
                "Aspect Ratio": "Aspect Ratio",
                "Overall Rating": "Overall Rating",
                "Battery Cell": "Battery Cell",
                "Battery type": "Battery type",
                "Power Supply": "Power Supply",
                "Weight": "Weight",
                "Touchscreen": "Touchscreen",
                "Colour(s)": "Colour(s)",
                "Display Features": "Display Features"
            }

            spec_values = []
            for spec_key, spec_name in specs.items():
                try:
                    if spec_key == "Overall Rating":
                        value = driver.find_element(By.CSS_SELECTOR, "span.ratpt").text.split("/")[0].strip()
                    else:
                        value = driver.find_element(By.XPATH, f"//td[text()='{spec_name}']/following-sibling::td").text.strip()
                except Exception as e:
                    value = "N/A"
                spec_values.append(value)

            data.append([name, price, spec_score] + spec_values)
            driver.close()
            driver.switch_to.window(driver.window_handles[0])
        except Exception as e:
            print(f"Error extracting data: {e}")
    return data, new_urls

# List to store all the extracted data and URLs to avoid duplicates
all_data = []
all_urls = set()

# Start the timer
start_time = time.time()

page_num = 1

# Extract data from all pages until the "Next" button disappears
while True:
    print(f"Extracting data from page {page_num}")
    page_data, new_urls = extract_data_from_page(driver, all_urls)
    all_data.extend(page_data)
    all_urls.update(new_urls)
    print(f"Completed extracting data from page {page_num}")
    page_num += 1
    try:
        next_button = WebDriverWait(driver, 10).until(
            EC.element_to_be_clickable((By.XPATH, "//span[@class='list-bttnn' and contains(text(), 'Next')]"))
        )
        driver.execute_script("arguments[0].click();", next_button)
        time.sleep(5)  # Wait for the next page to load
    except Exception as e:
        print("Next button not found or no more pages:", e)
        break

# Stop the timer
end_time = time.time()
elapsed_time = end_time - start_time
print(f"Total time taken for data extraction: {elapsed_time} seconds")

# Create a DataFrame and save the data to a CSV file
columns = ["Name", "Price", "Spec Score", "Processor", "Clock-speed", "Operating System", "SSD Capacity", "RAM Type", "Graphics Processor", "Display Size", "Display Resolution", "Capacity", "Aspect Ratio", "Overall Rating", "Battery Cell", "Battery type", "Power Supply", "Weight", "Touchscreen", "Colour(s)", "Display Features"]
df = pd.DataFrame(all_data, columns=columns)
df.to_csv("laptops_data.csv", index=False)

# Close the browser
driver.quit()

print("Data extraction completed and saved to laptops_data.csv")
