In [1]:
import pandas as pd

In [2]:
df = pd.read_csv("IKEA_SA_Furniture_Web_Scrapings_sss.csv")
df.columns

Index(['Unnamed: 0', 'item_id', 'name', 'category', 'price', 'old_price',
       'sellable_online', 'link', 'other_colors', 'short_description',
       'designer', 'depth', 'height', 'width'],
      dtype='object')

In [5]:
df["name"].nunique()

607

In [6]:
df_altered = df.copy()

In [None]:
df_altered["name"] = df["name"].str.split(" ").str[0] #as there are values like  "NORDVIKEN / NORDVIKEN", maybe we only use the first one? -> No, as that are different items


In [8]:
df_altered["name"].nunique()

479

In [12]:
unique_names = df_altered["name"].unique() 

In [None]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
import time
import requests
import os

def fetch_images_selenium(names, folder_path="ikea_images"):
    """
    Fetches the first image for each IKEA item name using Selenium.

    Args:
        names (list): List of product names to search for.
        folder_path (str): Folder where images will be saved.
    """
    if not os.path.exists(folder_path):
        os.makedirs(folder_path)

    driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()))

    for name in names:
        try:
            print(f"Fetching image for: {name}")
            driver.get(f"https://www.ikea.com/us/en/search/?q={name}")
            time.sleep(3)  # time to load

            # first product image, right now also fetches if product now found 
            img_tag = driver.find_element(By.CSS_SELECTOR, "img.image.plp-product__image.plp-product__image--alt")
            img_url = img_tag.get_attribute("src")

            # Download the image
            response = requests.get(img_url, stream=True)
            if response.status_code == 200:
                img_path = os.path.join(folder_path, f"{name}.jpg")
                with open(img_path, "wb") as img_file:
                    for chunk in response.iter_content(1024):
                        img_file.write(chunk)
                print(f"Image saved: {img_path}")
            else:
                print(f"Failed to download image for {name}")
        except Exception as e:
            print(f"Error fetching image for {name}: {e}")

    driver.quit()

#test
product_names = ["STIG", "FREKVENS"]
fetch_images_selenium(product_names)


In [None]:
#fetch_images_selenium(unique_names.tolist(), folder_path="ikea_images")

Fetching image for: FREKVENS
Image saved: ikea_images/FREKVENS.jpg
Fetching image for: NORDVIKEN
Image saved: ikea_images/NORDVIKEN.jpg
Fetching image for: STIG
Image saved: ikea_images/STIG.jpg
Fetching image for: NORBERG
Image saved: ikea_images/NORBERG.jpg
Fetching image for: INGOLF
Image saved: ikea_images/INGOLF.jpg
Fetching image for: FRANKLIN
Image saved: ikea_images/FRANKLIN.jpg
Fetching image for: DALFRED
Image saved: ikea_images/DALFRED.jpg
Fetching image for: EKEDALEN
Image saved: ikea_images/EKEDALEN.jpg
Fetching image for: NORRARYD
Image saved: ikea_images/NORRARYD.jpg
Fetching image for: TOMMARYD


KeyboardInterrupt: 