# Image Collection: part 2

In [1]:
# import libraries for image collection 
import os 
import pandas as pd 
import numpy as np 
import requests
import time

## Approach:

+ Data Collectiion: from Web 1: Ebay
+ Data Collection: from Web 2: Mediamarkt
+ Data Collection: from Web 3: other

## Data Collection: Web 1

Websites are mostly loaded through javascript. Therefore, Selenium would be an approriate choice for extraction

In [2]:
# Import selenium dependencies for setting up 
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.chrome.options import Options

# Import selenium depdencies part 2 for extracting content from web
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC 

In [3]:
# Webpage for extracting content
weblink = "https://www.mobileshop.eu/android-os/"

In [4]:
# Load the webpage 
def extract_images(weblink: str) -> list:
    # Set up options 
    options = Options()
    options.add_argument("--disable-popup-blocking")
    options.add_argument("--disable-notifications")

    # Set up driver including options 
    driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options)
    driver.get(weblink)
    driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")

    time.sleep(5)

    # Extract images from website 
    product_images = driver.find_elements(By.CSS_SELECTOR, "div.product-module figure img")
    image_urls = [img.get_attribute("data-src") or img.get_attribute("src") for img in product_images]

    # Quit the driver 
    driver.quit()

    return image_urls

    

In [5]:
# Extract image data
image_urls1 = extract_images(weblink=weblink)
image_urls1

['https://images.mobileshop.eu/1747292971/product-medium/oppo-find-n5-5g-dual-sim-512gb-16gb-ram-white.jpg',
 'https://images.mobileshop.eu/1747292925/product-medium/oppo-find-n5-5g-dual-sim-512gb-16gb-ram-black.jpg',
 'https://images.mobileshop.eu/1753347897/product-medium/samsung-galaxy-z-fold7-5g-dual-sim-256gb-12gb-ram-sm-f966-blue.jpg',
 'https://images.mobileshop.eu/1753347889/product-medium/samsung-galaxy-z-fold7-5g-dual-sim-256gb-12gb-ram-sm-f966-jet-black.jpg',
 'https://images.mobileshop.eu/1738831771/product-medium/samsung-galaxy-s25-ultra-5g-dual-sim-1-tb-12gb-ram-titanium-black.jpg',
 'https://images.mobileshop.eu/1738831856/product-medium/samsung-galaxy-s25-ultra-5g-dual-sim-1-tb-12gb-ram-titanium-white-silver.jpg',
 'https://images.mobileshop.eu/1738831801/product-medium/samsung-galaxy-s25-ultra-5g-dual-sim-1-tb-12gb-ram-titanium-grey.jpg',
 'https://images.mobileshop.eu/1721802790/product-medium/samsung-galaxy-z-fold6-5g-dual-sim-256gb-12gb-ram-silver.jpg',
 'https://im

In [6]:
# Save html content in local device
image_folder = r"C:\Development\Projects\MachineLearning\Mobile-Image_Classifier-System\data\external_data\image1"

# Check if folder exists
if not os.path.exists(image_folder):
    os.makedirs(image_folder, exist_ok=True)

# Store the images into image folder 
for index, image_url in enumerate(image_urls1):
    file_name = f"image_{index + 1}.png"
    filepath = os.path.join(image_folder, file_name)

    # Send http-request to web to get access to the image data 
    response = requests.get(image_url)
    if response.status_code == 200:
        # Download image + Store into the matching file path
        with open(filepath, "wb") as item:
            item.write(response.content)

        print(f"Image stored successfully in path {filepath}")
    else:
        print(f"Failed to download image. Status code = {response.status_code}")



Image stored successfully in path C:\Development\Projects\MachineLearning\Mobile-Image_Classifier-System\data\external_data\image1\image_1.png
Image stored successfully in path C:\Development\Projects\MachineLearning\Mobile-Image_Classifier-System\data\external_data\image1\image_2.png
Image stored successfully in path C:\Development\Projects\MachineLearning\Mobile-Image_Classifier-System\data\external_data\image1\image_3.png
Image stored successfully in path C:\Development\Projects\MachineLearning\Mobile-Image_Classifier-System\data\external_data\image1\image_4.png
Image stored successfully in path C:\Development\Projects\MachineLearning\Mobile-Image_Classifier-System\data\external_data\image1\image_5.png
Image stored successfully in path C:\Development\Projects\MachineLearning\Mobile-Image_Classifier-System\data\external_data\image1\image_6.png
Image stored successfully in path C:\Development\Projects\MachineLearning\Mobile-Image_Classifier-System\data\external_data\image1\image_7.png