In [4]:
from selenium.common.exceptions import TimeoutException, NoSuchElementException
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
import time
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.action_chains import ActionChains
import numpy as np
import csv
from datetime import datetime
from typing import Union

In [2]:
# Function to load more phones
def load_more_phones(alternative_xpath: bool) -> None:
    if not alternative_xpath:
        # Locate the 'Show More' button and click it
        show_more_button = WebDriverWait(driver, 2).until(
            EC.element_to_be_clickable((By.XPATH, '//*[@id="productListingContainer"]/div[5]/a[1]/button'))
        )
        show_more_button.click()

    else:
        show_more_button = WebDriverWait(driver, 2).until(
            EC.element_to_be_clickable((By.XPATH, '//*[@id="productListingContainer"]/div[5]/button'))
        )
        show_more_button.click()

    #to be on the safe side
    time.sleep(1)

def move_to_phone(number: int) -> None:
    #first go to the phone so we do not miss the hover field
    hover_element = driver.find_element(By.XPATH, f'//*[@id="productListingContainer"]/div[4]/article[{number}]/a')
    # Create an instance of ActionChains
    ActionChains(driver).move_to_element(hover_element).perform()

def get_phone_name(number: int) -> Union[str, float]:
    try:
        phone_name_element = WebDriverWait(driver, 2).until(
            EC.presence_of_element_located((By.XPATH, f'//*[@id="productListingContainer"]/div[4]/article[{number}]/div[5]/p[1]/span'))
        )
        phone_name = phone_name_element.text
        return phone_name
    except TimeoutException:
        return np.nan

def get_brand(number: int) -> Union[str, float]:
    try:
        phone_brand_element = WebDriverWait(driver, 2).until(
            EC.presence_of_element_located((By.XPATH, f'//*[@id="productListingContainer"]/div[4]/article[{number}]/div[5]/p[1]/strong'))
        )
        phone_brand = phone_brand_element.text
        return phone_brand
    except TimeoutException:
        return np.nan

def get_specs(number: int) -> Union[str, float]:
    try:
        phone_specs_element = WebDriverWait(driver, 2).until(
            EC.presence_of_element_located((By.XPATH, f'//*[@id="productListingContainer"]/div[4]/article[{number}]/div[5]/p[2]'))
        )
        phone_specs = phone_specs_element.text
        return phone_specs
    except TimeoutException:
        return np.nan


def get_price(number: int) -> Union[str, float]:
    try:
        phone_price_element = WebDriverWait(driver, 2).until(
            EC.presence_of_element_located((By.XPATH, f'//*[@id="productListingContainer"]/div[4]/article[{number}]/div[4]/span/span'))
        )
        phone_price = phone_price_element.text
        return phone_price
    except TimeoutException:
        return np.nan

def get_rating(number: int) -> Union[str, float]:
    try:
        phone_rating_element = WebDriverWait(driver, 2).until(
            EC.presence_of_element_located((By.XPATH, f'//*[@id="productListingContainer"]/div[4]/article[{number}]/div[5]/div/span'))
        )
        phone_rating = phone_rating_element.get_attribute('aria-label')
        return phone_rating
    except TimeoutException:
        return np.nan

def get_delivery(number: int) -> Union[str, float]:
    try:
        hover_element = WebDriverWait(driver, 2).until(
            EC.presence_of_element_located((By.XPATH, f'//*[@id="productListingContainer"]/div[4]/article[{number}]/div[3]/span/span'))
        )
    except TimeoutException:
        return np.nan

    try:
        # click pop up window
        hover_element.click()
    except NoSuchElementException:
        return np.nan

    #need to wait a bit to let the window pop up
    time.sleep(1)

    try:
        #popover id increases per product (is 1 higher than product id)
        div_element = WebDriverWait(driver, 2).until(EC.presence_of_element_located((By.XPATH, "//div[contains(@id, 'popover-')]")))
    except TimeoutException:
        return np.nan

    try:
        full_text = div_element.text
        delivery = full_text.split('\n')[2]
    except IndexError:
        return np.nan

    try:
        #we need to close the hover field afterward
        close = WebDriverWait(driver, 2).until(EC.presence_of_element_located((By.XPATH, "//div[contains(@id, 'popover')]/button")))
        close.click()
    except:
        print("Could not close hover field")
        return delivery

    return delivery

def get_total_nr_of_phones() -> int:
    total_phones_element = WebDriverWait(driver, 3).until(
        EC.presence_of_element_located((By.XPATH, '//*[@id="productListingContainer"]/div[1]/h2'))
    )
    total = int(total_phones_element.text.split(" ")[0].replace("’", ""))
    return total

def save_scraped_data(path: str, data: list[str], col_names: list[str]) -> None:
    with open(path, mode='w', newline='', encoding='utf-16') as file:
        writer = csv.writer(file)
        writer.writerow(col_names)
        writer.writerows(data)

In [5]:
chrome_options = webdriver.ChromeOptions()

# Definieren Sie den gewünschten User-Agent
custom_user_agent = "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36"
chrome_options.add_argument(f"user-agent={custom_user_agent}")
#chrome_options.add_argument("--headless")

# Set the path to the chromedriver
service = Service("/usr/bin/chromedriver")

# Set up the WebDriver using the Service object
driver = webdriver.Chrome(service=service, options=chrome_options)
driver.maximize_window()

# Open the webpage
driver.get('https://www.digitec.ch/de/s1/producttype/smartphone-24')
time.sleep(2)

In [6]:
counter = 1
total_phones = get_total_nr_of_phones()
data_to_save = []

for i in range(1, total_phones+1):
    try:
        move_to_phone(i)

    except NoSuchElementException: #continue to next loop iteration
        print(i, "Could not find phone")
        i += 1
        if not i == total_phones: #dont skip loop if last phone -> otherwise endless loop can happen
            continue

    name = get_phone_name(i)
    brand = get_brand(i)
    specs = get_specs(i)
    price = get_price(i)
    rating = get_rating(i)
    delivery_info = get_delivery(i)

    print(i, brand, name, rating, specs, price, delivery_info, datetime.now())
    data_to_save.append([i, brand, name, rating, specs, price, delivery_info, datetime.now()])


    i += 1

    if i == total_phones:
        print(f"Scraped all {i} phones, reached the end of the page")
        break   #exit loop (safe option)

    #click the load more button after 60 phones each (at the beginning 36 are loaded)
    if i == 37 or ((i-37) % 60) == 0:
        try:
            #xpath of the show more button changes after 3 clicks
            if counter > 3:
                load_more_phones(alternative_xpath=True)
            else:
                load_more_phones(alternative_xpath=False)
        except TimeoutException:
            print("An error occurred loading more phones")
        counter += 1

driver.quit()

1 Google Pixel 7a 218 Bewertungen 4.3 von 5 Sternen 128 GB, Charcoal, 6.10", SIM + eSIM, 64 Mpx, 5G 419.– übermorgen geliefert 2023-12-17 09:58:43.682329
2 Samsung Galaxy A54 5G 389 Bewertungen 4.5 von 5 Sternen 256 GB, Awesome Graphite, 6.40", Hybrid Dual SIM, 50 Mpx, 5G 389.– übermorgen geliefert 2023-12-17 09:58:45.708408
3 Samsung Galaxy S22 610 Bewertungen 4.3 von 5 Sternen 128 GB, Phantom Black, 6.10", Dual SIM, 50 Mpx, 5G 599.– übermorgen geliefert 2023-12-17 09:58:47.674149
4 Apple iPhone 14 803 Bewertungen 4.7 von 5 Sternen 128 GB, Midnight, 6.10", SIM + eSIM, 12 Mpx, 5G 679.– übermorgen geliefert 2023-12-17 09:58:49.691626
5 Samsung Galaxy S23 Ultra 340 Bewertungen 4.6 von 5 Sternen 256 GB, Phantom Black, 6.80", SIM + eSIM, 200 Mpx, 5G 1099.– übermorgen geliefert 2023-12-17 09:58:51.864473
6 Google Pixel 8 143 Bewertungen 4.1 von 5 Sternen 128 GB, Obsidian, 6.20", SIM + eSIM, 50 Mpx, 5G 649.– übermorgen geliefert 2023-12-17 09:58:53.962529
7 Google Pixel 7a 218 Bewertungen 4.

KeyboardInterrupt: 

In [27]:
#today's date
today = datetime.date(datetime.now())
save_scraped_data(f'data/scraped_phones_{today}_2.csv',
                  data_to_save,
                  ["nr", "brand", "name", "ratings", "specs", "price", "delivery_information", "scraped_at"])