In [1]:
import os
import time
import requests
import undetected_chromedriver as uc
from selenium.webdriver.common.by import By
from bs4 import BeautifulSoup
import re

def create_folder(base_path, folder_name):
    folder_path = os.path.join(base_path, clean_filename(folder_name))
    if not os.path.exists(folder_path):
        os.makedirs(folder_path)
    return folder_path

def clean_filename(filename):
    return re.sub(r'[<>:"/\\|?*]', '', filename)

def download_image(url, folder_path, filename):
    try:
        headers = {
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
        }
        response = requests.get(url, headers=headers)
        if response.status_code == 200:
            file_path = os.path.join(folder_path, f"{filename}.jpg")
            with open(file_path, 'wb') as f:
                f.write(response.content)
            print(f"Downloaded: {filename}")
            return True
    except Exception as e:
        print(f"Error downloading {url}: {str(e)}")
    return False

def scrape_category_images(driver, category_url, category_folder):
    try:
        driver.get(category_url)
        time.sleep(5)

        # Scroll to load all images
        last_height = driver.execute_script("return document.body.scrollHeight")
        while True:
            driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
            time.sleep(2)
            new_height = driver.execute_script("return document.body.scrollHeight")
            if new_height == last_height:
                break
            last_height = new_height

        # Find all product images using multiple selectors
        product_elements = driver.find_elements(By.CSS_SELECTOR, '.product-item-photo img, .product-image-photo, .category-image img')
        
        for idx, element in enumerate(product_elements, 1):
            try:
                img_url = element.get_attribute('src')
                if img_url and not img_url.endswith('blank.gif'):
                    filename = f"{os.path.basename(category_folder)}_{idx}"
                    download_image(img_url, category_folder, filename)
            except Exception as e:
                print(f"Error processing image {idx}: {str(e)}")
                continue

    except Exception as e:
        print(f"Error scraping {category_url}: {str(e)}")

def main():
    base_path = r"E:\Datasets\royaloakindia"
    driver = uc.Chrome()
    driver.maximize_window()
    
    # Complete category and subcategory structure
    categories = {
        'Sofa': {
            'main': '/living/sofas.html',
            'subcategories': {
                'Fabric_Sofa': '/living/sofas/fabric-sofa.html',
                'Wooden_Sofas': '/living/sofas/wooden-sofas.html',
                'Leatherette_Sofas': '/living/sofas/leatherette-sofas.html',
                'Leather_Sofas': '/living/sofas/leather-sofas.html',
                'Sofa_Beds': '/living/sofas/sofa-cum-bed.html',
                'Sofa_Sets': '/living/sofas/sofa-sets.html',
                'Corner_Sofas': '/living/sofas/l-shaped-sofa.html',
                'Single_Seater': '/living/sofas/single-seater.html',
                'Two_Seater': '/living/sofas/two-seater.html',
                'Three_Seater': '/living/sofas/three-seater.html'
            }
        },
        'Recliner': {
            'main': '/living/recliners.html',
            'subcategories': {
                'Fabric_Recliners': '/living/recliners/fabric-recliners.html',
                'Leatherette_Recliners': '/living/recliners/leatherette-recliners.html',
                'Leather_Recliners': '/living/recliners/leather-recliners.html',
                'Single_Seater_Recliners': '/living/recliners/single-seater-recliners.html',
                'Two_Seater_Recliners': '/living/recliners/two-seater-recliners.html',
                'Three_Seater_Recliners': '/living/recliners/three-seater-recliners.html',
                'Recliner_Sets': '/living/recliners/recliner-sets.html',
                'Home_Theatre_Recliners': '/living/recliners/home-theatre-recliners.html'
            }
        },
        'Dining': {
            'main': '/dining/dining-tables-sets.html',
            'subcategories': {
                'Four_Seater': '/dining/dining-tables-sets/4-seater-dining-sets.html',
                'Six_Seater': '/dining/dining-tables-sets/6-seater-dining-sets.html',
                'Eight_Seater': '/dining/dining-tables-sets/8-seater-dining-sets.html'
            }
        },
        'Bed': {
            'main': '/bedroom/beds.html',
            'subcategories': {
                'Queen_Without_Storage': '/bedroom/beds/queen-bed-without-storage.html',
                'King_Without_Storage': '/bedroom/beds/king-bed-without-storage.html',
                'Queen_With_Storage': '/bedroom/beds/queen-bed-with-storage.html',
                'King_With_Storage': '/bedroom/beds/king-bed-with-storage.html',
                'Bedroom_Sets': '/bedroom/beds/bedroom-sets.html',
                'Bunk_Bed': '/bedroom/beds/bunk-bed.html',
                'Kids_Bed': '/bedroom/beds/kids-bed.html',
                'Single_Bed': '/bedroom/beds/single-bed.html',
                'Wooden_Bed': '/bedroom/beds/wooden-bed.html',
                'Engineered_Wood_Bed': '/bedroom/beds/engineered-wood-bed.html',
                'Folding_Beds': '/bedroom/beds/folding-beds.html'
            }
        },
        'Office': {
            'main': '/study-office.html',
            'subcategories': {
                'Study_Tables': '/study-office/tables/study-tables.html',
                'Computer_Tables': '/study-office/tables/computer-tables.html',
                'Boss_Table': '/study-office/tables/boss-tables.html',
                'Conference_Tables': '/study-office/tables/conference-tables.html'
            }
        },
        'Chairs': {
            'main': '/study-office/chairs.html',
            'subcategories': {
                'Office_Chairs': '/study-office/chairs/office-chairs.html',
                'Computer_Chairs': '/study-office/chairs/computer-chairs.html',
                'Visitor_Chairs': '/study-office/chairs/visitor-chairs.html',
                'Study_Chairs': '/study-office/chairs/study-chairs.html',
                'Gaming_Chairs': '/study-office/chairs/gaming-chairs.html'
            }
        },
        'Center_Table': {
            'main': '/living/center-table.html',
            'subcategories': {
                'Wooden_Center_Tables': '/living/center-table/wooden-center-tables.html',
                'Glass_Center_Tables': '/living/center-table/glass-center-tables.html',
                'Marble_Center_Tables': '/living/center-table/marble-center-tables.html',
                'Side_End_Tables': '/living/center-table/side-end-tables.html',
                'Console_Tables': '/living/center-table/console-tables.html',
                'Teapoy': '/living/center-table/teapoy.html'
            }
        },
        'Mattress': {
            'main': '/bedroom/mattresses.html',
            'subcategories': {
                'Queen_Size': '/bedroom/mattresses/queen-size-mattresses.html',
                'King_Size': '/bedroom/mattresses/king-size-mattresses.html',
                'Single_Mattresses': '/bedroom/mattresses/single-mattresses.html',
                'Pillows': '/bedroom/mattresses/pillows.html'
            }
        },
        'Wardrobes': {
            'main': '/bedroom/wardrobes.html',
            'subcategories': {
                'Two_Door': '/bedroom/wardrobes/2-door-wardrobes.html',
                'Three_Door': '/bedroom/wardrobes/3-door-wardrobes.html',
                'Four_Door': '/bedroom/wardrobes/4-door-wardrobes.html',
                'Five_Door': '/bedroom/wardrobes/5-door-wardrobes.html'
            }
        },
        'Home_Decor': {
            'main': '/decor.html',
            'subcategories': {
                'Wall_Decor': '/decor/wall-decor.html',
                'Furniture_Accents': '/decor/furniture-accents.html',
                'Spiritual': '/decor/spiritual.html',
                'Table_Decor': '/decor/table-decor.html',
                'Home_Garden': '/decor/home-garden.html',
                'Lighting': '/decor/lighting.html',
                'Kitchen': '/decor/kitchen.html'
            }
        },
        'Outdoor': {
            'main': '/outdoor.html',
            'subcategories': {
                'Outdoor_Set': '/outdoor/outdoor-set.html',
                'Outdoor_Chairs': '/outdoor/outdoor-chairs.html',
                'Patio_Set': '/outdoor/patio-set.html',
                'Swings': '/outdoor/swings.html',
                'Outdoor_Tables': '/outdoor/outdoor-tables.html',
                'Patio_Tables': '/outdoor/patio-tables.html',
                'Balcony_Furniture': '/outdoor/balcony-furniture.html',
                'Garden_Chairs': '/outdoor/garden-chairs.html'
            }
        },
        'Bookshelves': {
            'main': '/smart-storage/book-shelves.html',
            'subcategories': {
                'Engineered_Wood': '/smart-storage/book-shelves/engineered-wood.html',
                'Solid_Wood': '/smart-storage/book-shelves/solid-wood.html'
            }
        },
        'Furnishings': {
            'main': '/furnishing.html',
            'subcategories': {
                'Mats': '/furnishing/mats.html',
                'Curtains': '/furnishing/curtains.html',
                'Flooring': '/furnishing/flooring.html',
                'Cushions_Covers': '/furnishing/cushions-covers.html',
                'Bed_Linen': '/furnishing/bed-linen.html',
                'Table_Linen': '/furnishing/table-linen.html'
            }
        },
        'Giftings': {
            'main': '/decor-gifting.html',
            'subcategories': {
                'Candles': '/decor-gifting/candles.html',
                'Wall_Paintings': '/decor-gifting/wall-paintings.html',
                'Artificial_Flowers': '/decor-gifting/artificial-flowers.html',
                'Buddha_Idols': '/decor-gifting/buddha-idols.html',
                'Religious_Idols': '/decor-gifting/religious-idols.html',
                'Showpieces': '/decor-gifting/showpieces.html'
            }
        }
    }

    try:
        base_url = 'https://www.royaloakindia.com'
        for category_name, category_data in categories.items():
            print(f"\nProcessing main category {category_name}...")
            category_folder = create_folder(base_path, category_name)
            
            # Scrape main category
            main_url = base_url + category_data['main']
            scrape_category_images(driver, main_url, category_folder)
            
            # Process subcategories
            if 'subcategories' in category_data:
                for subcat_name, subcat_path in category_data['subcategories'].items():
                    print(f"Processing subcategory {subcat_name}...")
                    subcat_folder = create_folder(category_folder, subcat_name)
                    subcat_url = base_url + subcat_path
                    scrape_category_images(driver, subcat_url, subcat_folder)
            
            time.sleep(2)  # Add delay between categories

    except Exception as e:
        print(f"Error in main execution: {str(e)}")
    
    finally:
        driver.quit()

if __name__ == "__main__":
    main()


Processing main category Sofa...
Downloaded: Sofa_1
Downloaded: Sofa_2
Downloaded: Sofa_3
Downloaded: Sofa_4
Downloaded: Sofa_5
Downloaded: Sofa_6
Downloaded: Sofa_7
Downloaded: Sofa_8
Downloaded: Sofa_9
Downloaded: Sofa_10
Downloaded: Sofa_11
Downloaded: Sofa_12
Downloaded: Sofa_13
Downloaded: Sofa_14
Downloaded: Sofa_15
Downloaded: Sofa_16
Downloaded: Sofa_17
Downloaded: Sofa_18
Downloaded: Sofa_19
Downloaded: Sofa_20
Downloaded: Sofa_21
Downloaded: Sofa_22
Downloaded: Sofa_23
Downloaded: Sofa_24
Downloaded: Sofa_25
Downloaded: Sofa_26
Downloaded: Sofa_27
Downloaded: Sofa_28
Downloaded: Sofa_29
Downloaded: Sofa_30
Downloaded: Sofa_31
Downloaded: Sofa_32
Downloaded: Sofa_33
Downloaded: Sofa_34
Downloaded: Sofa_35
Downloaded: Sofa_36
Processing subcategory Fabric_Sofa...
Downloaded: Fabric_Sofa_1
Downloaded: Fabric_Sofa_2
Downloaded: Fabric_Sofa_3
Downloaded: Fabric_Sofa_4
Downloaded: Fabric_Sofa_5
Downloaded: Fabric_Sofa_6
Downloaded: Fabric_Sofa_7
Downloaded: Fabric_Sofa_8
Downloade