# 1. Import packages

In [33]:
from selenium import webdriver
from selenium.webdriver.firefox.options import Options
from selenium.webdriver.common.by import By
import time
import pandas as pd

In [34]:
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from bs4 import BeautifulSoup
import json
import requests
import re
from urllib.parse import urljoin

# 2. Create functions

## Get links

In [35]:
# Use the urljoin function from Python's urllib.parse module to join the base URL with the relative URLs found in the href attributes.

def get_product_links(base_url, url):     
    response = requests.get(url)
    soup = BeautifulSoup(response.text, "html.parser")

    product_links = set () # Initiate an empty set to store product links. A set is an unordered collection of unique elements, compared to the list
    
    for link in soup.find_all('a'):
        href = link.get('href')
        if href and "/fr-fr/products/" in href:  # Check if href contains "/fr-fr/products/"
            full_url = urljoin(base_url, href)
            product_links.add(full_url) # Add the full URL to the set
            
    return product_links

In [36]:
# Test
base_url = "https://www.fossil.com"
url = "https://www.fossil.com/fr-fr/sacs/sacs-a-main-pour-femme/sacs-porte-croise/?&page=4"
test_link = get_product_links(base_url, url)
test_link

{'https://www.fossil.com/fr-fr/products/bague-montre-raquel-a-deux-aiguilles--en-acier-inoxydable--dore/ES5343.html',
 'https://www.fossil.com/fr-fr/products/besace-jolie/ZB1434200.html',
 'https://www.fossil.com/fr-fr/products/mini-sac-porte-croise-baguette-jolie/ZB1906001.html',
 'https://www.fossil.com/fr-fr/products/mini-sac-porte-croise-baguette-jolie/ZB1906200.html',
 'https://www.fossil.com/fr-fr/products/petit-sac-porte-croise-a-rabat-harwell/ZB1853001.html',
 'https://www.fossil.com/fr-fr/products/petit-sac-porte-croise-a-rabat-harwell/ZB1853200.html',
 'https://www.fossil.com/fr-fr/products/pochette-porte-croise-pm-disney-fossil/SL10052216.html',
 'https://www.fossil.com/fr-fr/products/sac-baguette-jolie/ZB1969001.html',
 'https://www.fossil.com/fr-fr/products/sac-porte-croise-avec-rabat-pm-lennox/ZB1926001.html',
 'https://www.fossil.com/fr-fr/products/sac-porte-croise-jolie/ZB11009194.html',
 'https://www.fossil.com/fr-fr/products/sac-porte-croise-jolie/ZB1970200.html',
 'h

## Get product info

In [41]:
def get_product_info(url):
    response = requests.get(url)
    soup = BeautifulSoup(response.text, "html.parser")

    script = soup.find('script', string=re.compile('dataLayer')) # Using "string" instead of "text"
    json_text = re.search(r'dataLayer\s*=\s*(\[.*?\]);', script.string, flags=re.DOTALL | re.MULTILINE).group(1)

    data = json.loads(json_text)

    # Extract product information
    product_info = data[1]['ecommerce']['detail']['products'][0]
    name = product_info['name']
    id = product_info['id']
    price = product_info['price']
    gender = product_info['gender']

    # Convert the dictionary into dataframe
    #product_info = pd.DataFrame([product_info])

    return product_info

In [42]:
url = "https://www.fossil.com/fr-fr/products/sac-a-dos-parker/ZB1836200.html"
test_info = get_product_info(url)
test_info

{'name': 'Sac à dos Parker',
 'id': 'ZB1836200',
 'brand': 'Fossil',
 'gender': 'Femmes',
 'state': 'online',
 'badge': [],
 'price': '369.00',
 'msrp': '369.00',
 'category': 'Sacs à main pour femme Sacs à dos',
 'variant': ['ZB1836200'],
 'priceStatus': 'full',
 'productType': 'Leathers',
 'silhouette': 'Sac à dos',
 'family': 'Leather Bags',
 'inventory': '224'}

In [43]:
def get_all_product_info(base_url, url):
    # List to store product information
    product_info_list = []

    # Get product information for each product
    
    product_links = get_product_links(base_url, url)
    for link in product_links:
        product_info = get_product_info(link)
        product_info_list.append(product_info)

    # Convert list of dictionaries to DataFrame
    df = pd.DataFrame(product_info_list)

    return df

# 3. Test

In [44]:
# Test
base_url = "https://www.fossil.com"
url = "https://www.fossil.com/fr-fr/sacs/sacs-a-main-pour-femme/sacs-porte-croise/?&page=4"
test_info_full = get_all_product_info(base_url, url)
test_info_full

Unnamed: 0,name,id,brand,gender,state,badge,price,msrp,category,variant,priceStatus,productType,silhouette,family,inventory
0,Pochette porté croisé PM Disney Fossil,SL10052216,Fossil,Femmes,online,[Collaboration],139.0,139.0,Sacs à main pour femme Sacs porté croisé,[SL10052216],full,Leathers,Pochette porté croisé PM,Small Leathers,196
1,Sac baguette Jolie,ZB1969001,Fossil,Femmes,online,[],209.0,209.0,Sacs à main pour femme Sacs double porté et sa...,[ZB1969001],full,Leathers,Baguette,Leather Bags,170
2,Sac porté croisé Jolie,ZB11009194,Fossil,Femmes,online,[Nouveautés],279.0,279.0,Sacs à main pour femme Sacs porté croisé,[ZB11009194],full,Leathers,Sac porté croisé,Leather Bags,38
3,Sac porté croisé Jolie,ZB1970200,Fossil,Femmes,online,[],279.0,279.0,Sacs à main pour femme Sacs porté croisé,[ZB1970200],full,Leathers,Sac porté croisé,Leather Bags,143
4,Sac porté croisé Jolie,ZB7716001,Fossil,Femmes,online,[],209.0,209.0,Sacs à main pour femme Sacs porté croisé,[ZB7716001],full,Leathers,Sac porté croisé,Leather Bags,458
5,Sac porté croisé avec rabat PM Lennox,ZB1926001,Fossil,Femmes,online,[],209.0,209.0,Sacs à main pour femme Sacs porté croisé,[ZB1926001],full,Leathers,Sac porté croisé avec rabat PM,Leather Bags,72
6,Mini sac porté croisé baguette Jolie,ZB1906001,Fossil,Femmes,online,[],169.0,169.0,Sacs à main pour femme Sacs double porté et sa...,[ZB1906001],full,Leathers,Mini sac porté croisé baguette,Leather Bags,329
7,Mini sac porté croisé baguette Jolie,ZB1906200,Fossil,Femmes,online,[],169.0,169.0,Sacs à main pour femme Sacs double porté et sa...,[ZB1906200],full,Leathers,Mini sac porté croisé baguette,Leather Bags,171
8,Besace Jolie,ZB1434200,Fossil,Femmes,online,[],279.0,279.0,Sacs à main pour femme Sacs double porté et sa...,[ZB1434200],full,Leathers,Besace,Leather Bags,626
9,Sac seau porté croisé PM Jessie,ZB11005249,Fossil,Femmes,online,[Nouveautés],299.0,299.0,Sacs à main pour femme Sacs porté croisé,[ZB11005249],full,Leathers,Sac seau porté croisé PM,Leather Bags,5
