## using similar logic to our sainsbury scraper - we extract product, price, link and category (new label) for our search query

In [10]:
import time
import pandas as pd
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException, NoSuchElementException

In [11]:
def reject_cookies(driver):
    try:
        WebDriverWait(driver, 7).until(
            EC.presence_of_element_located((By.ID, "onetrust-reject-all-handler"))
        ).click()
    except (TimeoutException, NoSuchElementException):
        print("Cookie banner not found or already rejected.")

In [12]:
def extract_morrisons_product(element, category):
    try:
        title = element.find_element(By.XPATH, './/h3[@data-test="fop-title"]').text
    except Exception:
        title = None
    try:
        link = element.find_element(By.XPATH, './/a[@data-test="fop-product-link"]').get_attribute("href")
    except Exception:
        link = None
    try:
        price = element.find_element(By.XPATH, './/span[@data-test="fop-price"]').text
    except Exception:
        price = None
    if title and price:
        return {
            "category": category,
            "title": title.strip(),
            "price": price.strip(),
            "link": link,
        }
    else:
        return None

In [13]:
def scroll_to_absolute_bottom(driver):

    last_height = driver.execute_script("return document.body.scrollHeight")
    while True:
        driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
        time.sleep(2)
        new_height = driver.execute_script("return document.body.scrollHeight")
        if new_height == last_height:
            break
        last_height = new_height

    skeleton_count = len(driver.find_elements(By.CSS_SELECTOR, "div.sc-filq44-0.epZQps"))
    wrapper_count = len(driver.find_elements(By.CSS_SELECTOR, "div.sc-filq44-0.iAbOJh"))
    print(f"After scrolling to bottom: skeleton wrappers = {skeleton_count}, visible product wrappers = {wrapper_count}, total = {skeleton_count + wrapper_count}")

    driver.execute_script("window.scrollTo(0, 0);")
    time.sleep(2)

In [14]:
def get_category_from_element(element):

    try:
        header = element.find_element(
            By.XPATH,
            './/div[contains(@class,"outer-header-container")]//span[@data-test="breadcrumb-text"]'
        )
        header_text = header.text.strip()
        if header_text:
            return header_text
    except Exception:
        return None

In [15]:
def scroll_and_extract_products_with_categories(driver):
    
    all_products = []
    seen_links = []
    driver.execute_script("window.scrollBy(0, 437.5);")

    current_category = "Top results"

    while True:

        product_elements = driver.find_elements(By.CSS_SELECTOR, "div.sc-filq44-0.iAbOJh")

        for element in product_elements:

            maybe_header = get_category_from_element(element)

            if maybe_header is not None and not "Top results" in maybe_header:
                current_category = maybe_header

            product = extract_morrisons_product(element, current_category)

            link = element.find_element(By.XPATH, './/a[@data-test="fop-product-link"]').get_attribute("href")
            if link not in seen_links:
                all_products.append(product)
            else:
                continue

            seen_links.append(link)
            
        print(f"extracted {len(all_products)} unique products so far.")

        prev_scroll = driver.execute_script("return window.pageYOffset")
        driver.execute_script("window.scrollBy(0, 875);")
        time.sleep(2)
        curr_scroll = driver.execute_script("return window.pageYOffset")

        if curr_scroll == prev_scroll:
            break

    print(f"Total unique products extracted: {len(all_products)}")
    return all_products

In [17]:
url = "https://groceries.morrisons.com/search?q=orange"

chrome_options = Options()
chrome_options.add_argument("--window-size=1434,710")  # Max width and height for Chrome
driver = webdriver.Chrome(options=chrome_options)

driver.get(url)
reject_cookies(driver)
time.sleep(2)

scroll_to_absolute_bottom(driver)

all_products = scroll_and_extract_products_with_categories(driver)


morrisons_df = pd.DataFrame(all_products, columns=["category", "title", "price", "link"])

print(f"Products in DataFrame: {len(morrisons_df)}")

After scrolling to bottom: skeleton wrappers = 341, visible product wrappers = 4, total = 345
extracted 12 unique products so far.
extracted 24 unique products so far.
extracted 36 unique products so far.
extracted 48 unique products so far.
extracted 60 unique products so far.
extracted 72 unique products so far.
extracted 84 unique products so far.
extracted 96 unique products so far.
extracted 108 unique products so far.
extracted 120 unique products so far.
extracted 132 unique products so far.
extracted 144 unique products so far.
extracted 156 unique products so far.
extracted 168 unique products so far.
extracted 180 unique products so far.
extracted 192 unique products so far.
extracted 204 unique products so far.
extracted 216 unique products so far.
extracted 228 unique products so far.
extracted 240 unique products so far.
extracted 252 unique products so far.
extracted 264 unique products so far.
extracted 276 unique products so far.
extracted 288 unique products so far.
ex

In [None]:
morrisons_df.to_csv("morrisons_oranges.csv", index=False)

Unnamed: 0,category,title,price,link
0,Top results,Morrisons Satsumas 600g,£1.00,https://groceries.morrisons.com/products/morri...
1,Top results,Morrisons Oranges,£1.35,https://groceries.morrisons.com/products/morri...
2,Top results,Morrisons Loose Orange,£0.30,https://groceries.morrisons.com/products/morri...
3,Top results,Morrisons Large Oranges,£2.20,https://groceries.morrisons.com/products/morri...
4,Top results,Morrisons Tangerines 600g,£1.50,https://groceries.morrisons.com/products/morri...
...,...,...,...,...
340,"Beer, Wines & Spirits",Haliborange Omega 3 Softies 60's Orange,£10.00,https://groceries.morrisons.com/products/halib...
341,"Beer, Wines & Spirits",Haliborange Disney Toy Story Omega 3 Orange 60s,£12.50,https://groceries.morrisons.com/products/halib...
342,"Beer, Wines & Spirits",Strepsils Orange Vitamin C,£5.80,https://groceries.morrisons.com/products/strep...
343,"Beer, Wines & Spirits","£100,000 Orange Doubler Scratch Card",£1.00,https://groceries.morrisons.com/products/100-0...


In [None]:
morrisons_df.to_csv("products.csv", index=False)