In [3]:
import time
import pandas as pd
import numpy as np
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException, NoSuchElementException
# Ensure the necessary libraries are installed

In [4]:
def get_items(driver):
    products = {}
    try:
        # Wait for the search results to load
        WebDriverWait(driver, 10).until(
            EC.presence_of_element_located((By.CLASS_NAME, "pt__link"))
        )
    except TimeoutException:
        print("Search results did not load in time.")
        return products

    product_elements = driver.find_elements(By.CLASS_NAME, "pt")

    for indx, product in enumerate(product_elements):
        try:
            product_name = product.find_element(By.CLASS_NAME, "pt__link").get_attribute("title")
            product_price = product.find_element(By.CLASS_NAME, "pt__cost__retail-price").text

            products[product_name] = product_price

        except NoSuchElementException as e:
            print(f"Error retrieving product details for index {indx}: {e}")
            continue

    return products  # <-- moved outside the for loop

In [5]:
def reject_cookies(driver):
    try:
        # Wait for the cookie banner to appear and reject cookies - CAN TRY DIFFERENT WAY OF LOCATING HTMLS
        WebDriverWait(driver, 7).until(
            EC.presence_of_element_located((By.ID, "onetrust-reject-all-handler")) # wait for the cookie bannr
            ).click()
    except (TimeoutException, NoSuchElementException):
        print("Cookie banner not found or already rejected.")

In [6]:
def element_exists(driver, by, value, timeout=5):
    try:
        WebDriverWait(driver, timeout).until(EC.presence_of_element_located((by, value)))
        return True
    except TimeoutException:
        return False

try to search by id

In [14]:
url =  "https://www.sainsburys.co.uk"

driver = webdriver.Chrome()
driver.get(url)

reject_cookies(driver)
search = driver.find_element(By.ID, "term")
search.clear()  # Clear the search field if needed

query = "pasta sauce"

for char in query:
    search.send_keys(char)  # Type each character into the search field
    time.sleep(0.2)  # Wait for half a second between keystrokes
search.send_keys(Keys.RETURN)   # Press Enter to submit the search

all_items = {}

if element_exists(driver, By.CLASS_NAME, "ln-c-pagination__list"):
    num_pages = int(driver.find_element(By.CSS_SELECTOR, "[rel='last'].ln-c-pagination__link").text)
    for i in range(1, num_pages + 1):
        all_items.update(get_items(driver))
        next_button = driver.find_element(By.CSS_SELECTOR, "[rel='next'].ln-c-pagination__link")
        next_button.click()
        time.sleep(4)
        
else:
    if "0" in driver.find_element(By.CSS_SELECTOR, "[data-testid='search-results-title']").text:
        print("There are not results for this search")
    else:
        all_items.update(get_items(driver))

all_items_df = pd.DataFrame(all_items.items(), columns=['Product Name', 'Price'])

all_items_df.to_csv('sainsburys_{query}.csv', index=False)


In [16]:
all_items_df

Unnamed: 0,Product Name,Price
0,Homepride Tomato & Herb Pasta Bake Sauce 485g,£2.50
1,Homepride Tomato & Bacon Pasta Bake Sauce 485g,£2.50
2,Homepride No Added Sugar Tomato & Herb Pasta B...,£2.50
3,Sainsbury's Tomato & Herb Bolognese Pasta Sauc...,69p
4,Heinz Sundried Cherry Tomato & Basil Pasta Sau...,£2.00
...,...,...
160,Heinz Spaghetti In Tomato Sauce 4x400g,£3.00
161,Heinz Spaghetti In a Rich Tomato Sauce with Ri...,£2.00
162,Heinz Baked Beans No Added Sugar in a Rich Tom...,£3.00
163,Colman's Tuna Pasta Bake Recipe Mix 44g,90p
