In [10]:
from bs4 import BeautifulSoup
import requests
import pandas as pd
import numpy as np


In [14]:
import re

In [27]:
# Function to extract Product Title
def get_title(soup):

    try:
        # Outer Tag Object
        title = soup.find("h1", attrs={'class':'-fs20 -pts -pbxs'})
        
        # Inner NavigatableString Object
        title_value = title.text

        # Title as a string value
        title_string = title_value.strip() # '   title   ' -> 'title'

    except AttributeError:
        title_string = ""

    return title_string

# Function to extract Product Price
def get_price(soup):

    try:
        price = soup.find("span", attrs={'class':'-b -ubpt -tal -fs24 -prxs'}).string.strip()

    except AttributeError:
        price = ""
            

    return price


def get_rating(soup):
    try:
        rating_tag = soup.find("div", class_="stars _m _al")  
        rating_text = rating_tag.get_text(strip=True) if rating_tag else ""
        
        # Extract the rating number
        match = re.search(r"(\d+(?:[\.,]\d+)?)", rating_text)  
        return match.group().replace(",", ".") if match else "No Rating"

    except AttributeError:
        return "No Rating"


# Function to extract discount
def get_discount(soup):

    try:
        discount = soup.find("span", attrs={'class':'bdg _dsct _dyn -mls'}).string.strip()
    
    except AttributeError:
        discount = ""	

    return discount


# Function to extract old price
def get_old_price(soup):

    try:
        old_price = soup.find("span", attrs={'class':'-tal -gy5 -lthr -fs16 -pvxs -ubpt'}).string.strip()
    
    except AttributeError:
        old_price = ""	

    return old_price

# Function to extract Number of reviews
def get_review_count(soup):
    try:
        review_tag = soup.find("a", class_="-plxs _more")
        review_text = review_tag.text.strip() if review_tag else ""  

        # Extract only the number
        match = re.search(r"\d+", review_text)  
        return match.group() if match else "0"

    except AttributeError:
        return "0"





In [28]:

if __name__ == '__main__':

    # Headers for request
    headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/133.0.0.0 Safari/537.36" ,
    "accept-language": "en-US,en;q=0.9"
    }

    # The webpage URL
    URL="https://www.jumia.ma/catalog/?q=beauty"

    # HTTP Request
    webpage = requests.get(URL, headers=headers)

    # Soup -> objct that store the content of the page
    soup = BeautifulSoup(webpage.content, "html.parser")

    # Fetch links of products
    links = soup.find_all("a", attrs={'class':'core'})

    # storing links extracted from Tag Objects
    links_list = []
    for link in links:
            links_list.append(link.get('href'))

    d = {"title":[], "price":[], "rating":[], "discount":[], "old_price":[], "review_count":[]}
    
    # extracting product details from each link 
    for link in links_list:
        new_webpage = requests.get("https://www.jumia.ma/" + link, headers=headers)

        new_soup = BeautifulSoup(new_webpage.content, "html.parser")

        d['title'].append(get_title(new_soup))
        d['price'].append(get_price(new_soup))
        d['rating'].append(get_rating(new_soup))
        d['discount'].append(get_discount(new_soup))
        d['old_price'].append(get_old_price(new_soup))
        d['review_count'].append(get_review_count(new_soup))

    
    jumia_df = pd.DataFrame.from_dict(d)
    jumia_df['title'].replace('', np.nan, inplace=False)
    jumia_df=jumia_df.dropna(subset=['title'])

    jumia_df.to_csv("jumia_beauty_Products.csv", header=True, index=False)


In [29]:
jumia_df

Unnamed: 0,title,price,rating,discount,old_price,review_count
0,"Kiss Beauty Eyeliner, stylo Eyeliner et tampon...",39.00 Dhs,4.3,61%,99.00 Dhs,59
1,Beauty of Joseon Soulagement du soleil : Riz +...,224.00 Dhs,4.3,34%,341.21 Dhs,3
2,Beauty of Joseon Soulagement du soleil : Riz +...,285.00 Dhs,5.0,24%,375.00 Dhs,3
3,Beauty of Joseon Crème solaire réparatrice riz...,305.00 Dhs,0.0,13%,349.00 Dhs,0
4,Beauty of Joseon ALL DAY SUN DUO RELIEF SUN MA...,380.00 Dhs,4.3,32%,560.00 Dhs,3
5,Kiss Beauty Pack de 3 mask et baume et gommage...,58.00 Dhs,4.3,15%,68.00 Dhs,3
6,"Beauty of Joseon Sérum Éclat, Propolis + Niaci...",257.00 Dhs,4.5,33%,385.91 Dhs,2
7,Aichun Beauty Pack 2 Sérums pour visage Exfoli...,79.00 Dhs,4.2,11%,89.00 Dhs,5
8,Beauty & Wild Acide Hyaluronique + Collagène M...,299.00 Dhs,3.7,25%,399.00 Dhs,3
9,"Beauty of Joseon Sérum Revive, Ginseng et muci...",249.00 Dhs,0.0,,,0
