## using similar logic to our sainsbury scraper - we extract product, price, link and category (new label) for our search query

In [1]:
import time
import pandas as pd
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException, NoSuchElementException

In [2]:
def reject_cookies(driver):
    try:
        WebDriverWait(driver, 7).until(
            EC.presence_of_element_located((By.ID, "onetrust-reject-all-handler"))
        ).click()
    except (TimeoutException, NoSuchElementException):
        print("Cookie banner not found or already rejected.")

In [3]:
def extract_morrisons_product(element, category):
    try:
        title = element.find_element(By.XPATH, './/h3[@data-test="fop-title"]').text
    except Exception:
        title = None
    try:
        link = element.find_element(By.XPATH, './/a[@data-test="fop-product-link"]').get_attribute("href")
    except Exception:
        link = None
    try:
        price = element.find_element(By.XPATH, './/span[@data-test="fop-price"]').text
    except Exception:
        price = None
    if title and price:
        return {
            "category": category,
            "title": title.strip(),
            "price": price.strip(),
            "link": link,
        }
    else:
        return None

In [4]:
def scroll_to_absolute_bottom(driver):

    last_height = driver.execute_script("return document.body.scrollHeight")
    while True:
        driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
        time.sleep(2)
        new_height = driver.execute_script("return document.body.scrollHeight")
        if new_height == last_height:
            break
        last_height = new_height

    skeleton_count = len(driver.find_elements(By.CSS_SELECTOR, "div.sc-filq44-0.epZQps"))
    wrapper_count = len(driver.find_elements(By.CSS_SELECTOR, "div.sc-filq44-0.iAbOJh"))
    print(f"After scrolling to bottom: skeleton wrappers = {skeleton_count}, visible product wrappers = {wrapper_count}, total = {skeleton_count + wrapper_count}")

    driver.execute_script("window.scrollTo(0, 0);")
    time.sleep(2)

In [5]:
def get_category_from_element(element):

    try:
        header = element.find_element(
            By.XPATH,
            './/div[contains(@class,"outer-header-container")]//span[@data-test="breadcrumb-text"]'
        )
        header_text = header.text.strip()
        if header_text:
            return header_text
    except Exception:
        return None

In [6]:
def scroll_and_extract_products_with_categories(driver):
    
    all_products = []
    seen_links = []
    driver.execute_script("window.scrollBy(0, 437.5);")

    current_category = "Top results"

    while True:

        product_elements = driver.find_elements(By.CSS_SELECTOR, "div.sc-filq44-0.iAbOJh")

        for element in product_elements:

            maybe_header = get_category_from_element(element)

            if maybe_header is not None and not "Top results" in maybe_header:
                current_category = maybe_header

            product = extract_morrisons_product(element, current_category)

            link = element.find_element(By.XPATH, './/a[@data-test="fop-product-link"]').get_attribute("href")
            if link not in seen_links:
                all_products.append(product)
            else:
                continue

            seen_links.append(link)
            
        print(f"extracted {len(all_products)} unique products so far.")

        prev_scroll = driver.execute_script("return window.pageYOffset")
        driver.execute_script("window.scrollBy(0, 875);")
        time.sleep(2)
        curr_scroll = driver.execute_script("return window.pageYOffset")

        if curr_scroll == prev_scroll:
            break

    print(f"Total unique products extracted: {len(all_products)}")
    return all_products

In [7]:
url = "https://groceries.morrisons.com/search?q=period"

chrome_options = Options()
chrome_options.add_argument("--window-size=1434,710")  # zoom out and get length of page - divide by scrols
driver = webdriver.Chrome(options=chrome_options)

driver.get(url)
reject_cookies(driver)
time.sleep(2)

scroll_to_absolute_bottom(driver)

all_products = scroll_and_extract_products_with_categories(driver)


morrisons_df = pd.DataFrame(all_products, columns=["category", "title", "price", "link"])

print(f"Products in DataFrame: {len(morrisons_df)}")

After scrolling to bottom: skeleton wrappers = 125, visible product wrappers = 4, total = 129
extracted 12 unique products so far.
extracted 24 unique products so far.
extracted 36 unique products so far.
extracted 48 unique products so far.
extracted 60 unique products so far.
extracted 72 unique products so far.
extracted 84 unique products so far.
extracted 96 unique products so far.
extracted 108 unique products so far.
extracted 120 unique products so far.
extracted 129 unique products so far.
extracted 129 unique products so far.
Total unique products extracted: 129
Products in DataFrame: 129


In [10]:
morrisons_df

Unnamed: 0,category,title,price,link
0,Top results,Wuka Teen Stretch Period Pants Size 2 Age 12-16,£15.00,https://groceries.morrisons.com/products/wuka-...
1,Top results,Always Ultra Day Normal (Size 1) Sanitary Towe...,£3.40,https://groceries.morrisons.com/products/alway...
2,Top results,Tampax Pearl Super Tampons with Applicator 18 ...,£3.40,https://groceries.morrisons.com/products/tampa...
3,Top results,Bodyform Cour-V Ultra Night Sanitary Towels Wings,£2.50,https://groceries.morrisons.com/products/bodyf...
4,Top results,Morrisons Super Ultra Towels with Wings,£0.42,https://groceries.morrisons.com/products/morri...
...,...,...,...,...
124,Donate to Charity,Donate £3 To Support A Food Bank With Morrisons,£3.00,https://groceries.morrisons.com/products/donat...
125,Donate to Charity,Donate £1 To Support A Food Bank With Morrisons,£1.00,https://groceries.morrisons.com/products/donat...
126,Donate to Charity,Donate £10 To Support A Food Bank With Morrisons,£10.00,https://groceries.morrisons.com/products/donat...
127,Big Packs,Always Dailies Large Profresh Panty Liners,£3.40,https://groceries.morrisons.com/products/alway...


In [8]:
morrisons_df.to_csv("morrisons_oranges.csv", index=False)

In [9]:
morrisons_df.to_csv("products.csv", index=False)