# Scraping Jumia Ecommerce Site: DSE HACKATHON

## Import libraries

In [232]:
import requests as req
from bs4 import BeautifulSoup
import pandas as pd
import time

## Define function to fetch website html data

In [233]:
def fetch_html_data(web_address):
    try:
        print(f"Fetching data from {web_address}...")
        res = req.get(web_address)
        return res
    except req.exceptions.RequestException as e:
        print('Stopped:', e)
    except TypeError as e:
        print('Stopped:', e)

# Define function to convert html data to BeautifulSoup object 

In [234]:
def convert_web_data_to_beautiful_soup_obj(web_data):
    try:
        print("\nCreating BeautifulSoup object...")
        soup_obj = BeautifulSoup(web_data.text, "html.parser")
        print("Success! Object created!")
        return soup_obj
    except Exception as e:
        print("Stopped:", e)

### Declare website source urls

In [235]:
webroot_fetch_url = "https://www.jumia.co.ke/all-products/"

### Fetch root url web data & convert to BeautifulSoup Object

In [236]:
response = fetch_html_data(webroot_fetch_url)
soup = convert_web_data_to_beautiful_soup_obj(response)

Fetching data from https://www.jumia.co.ke/all-products/...

Creating BeautifulSoup object...
Success! Object created!


In [237]:
print(soup.prettify())

<!DOCTYPE html>
<html dir="ltr" lang="en">
 <head>
  <meta charset="utf-8"/>
  <title>
   Shop &amp; Buy Online | Jumia Kenya
  </title>
  <meta content="product" property="og:type"/>
  <meta content="Jumia Kenya" property="og:site_name"/>
  <meta content=" Shop &amp; Buy Online | Jumia Kenya" property="og:title"/>
  <meta content="/all-products/" property="og:url"/>
  <meta content="https://ke.jumia.is/cms/icons/jumialogo-x-4.png" property="og:image"/>
  <meta content="en_KE" property="og:locale"/>
  <meta content=" Shop &amp; Buy Online | Jumia Kenya" name="title"/>
  <meta content="noindex,follow" name="robots"/>
  <link href="android-app://com.jumia.android/JUMIA/KE/c/all-products?utm_source=google&amp;utm_medium=organic&amp;adjust_tracker=j1hd8h&amp;adjust_campaign=GOOGLE_SEARCH&amp;adjust_adgroup=https%3A%2F%2Fwww.jumia.co.ke%2Fall-products%2F" rel="alternate"/>
  <link href="https://www.jumia.co.ke/all-products/" rel="canonical"/>
  <link href="https://www.jumia.co.ke/all-produc

### Get first page products

In [238]:
# def get_product_details(soup):
#     details_dict = {"name": soup.find_all("h3", class_="name").text.strip(),
#                     "new_price": soup.find_all("div", class_="prc").text.strip(),
#                     "old_price": soup.find_all("div").find_all("div", class_="old").text.strip(),
#                     "discount(percent)": soup.find_all("div").find_all("div", class_="bdg _dsct _sm").text.strip(),
#                     "votes_count": soup.find_all("div").find_all("div", class_="stars _s").text.strip()}
#     return details_dict
# 


In [239]:
# product_details = get_product_details(soup)
# print(product_details)

In [240]:
product_details_soup = soup.find_all("article", class_="prd _fb col c-prd")
print(product_details_soup)

[<article class="prd _fb col c-prd"><a class="core" data-gtm-brand="NIVEA" data-gtm-category="Health &amp; Beauty/Beauty &amp; Personal Care/Personal Care/Skin Care/Face/Cleansers" data-gtm-dimension23="" data-gtm-dimension26="2214" data-gtm-dimension27="4.5" data-gtm-dimension28="1" data-gtm-dimension37="0" data-gtm-dimension43="BDEAL|BLF|BLF_02|Boost|COL_216|COL_224|COL_260|COL_295|COL_297|COL_302|COL_317|CP_24|CP_26|CP_MT117|CP_UN17|CP_UN19|CP_UN238|CP_UN247|CP_UN250|CP_UN255|CP_UN261|CP_UN34|CP_UN5|CP_UN9|CP_UN94|DOW_100|INV_CLEAR|JA23_01|JMALL|TBOOST|WCS|XMAS|XMAS22|XMAS22_01" data-gtm-dimension44="0" data-gtm-id="NI534ST01U2JENAFAMZ" data-gtm-list="" data-gtm-name="Perfect &amp; Radiant Even Tone Day And Night Cream For Women - 50ml" data-gtm-position="1" data-gtm-price="6.39" data-track-onclick="eecProduct" data-track-onview="eecProduct" href="/nivea-perfect-radiant-even-tone-day-and-night-cream-for-women-50ml-68528030.html"><div class="img-c"><img alt="" class="img" data-lazy="

In [241]:
product_details_clean = []

for detail in product_details_soup:
    details_dict = {"name": detail.find("h3", class_="name").text.strip(),
                    "new_price": detail.find("div", class_="prc").text.strip(),
                    "old_price": detail.find("div", class_="old").text.strip() if detail.find("div", class_="old") else None,
                    "discount(percent)": detail.find("div", class_="bdg _dsct _sm").text.strip() if detail.find("div", class_="bdg _dsct _sm") else None,
                    "rating": detail.find("div", class_="stars _s").text.strip() if detail.find("div", class_="stars _s") else None,
                    "votes": detail.find("div", class_="rev").text.strip() if detail.find("div", class_="rev") else None}
    product_details_clean.append(details_dict)

product_details_clean


[{'name': 'NIVEA Perfect & Radiant Even Tone Day And Night Cream For Women - 50ml',
  'new_price': 'KSh 999',
  'old_price': 'KSh 1,560',
  'discount(percent)': '36%',
  'rating': '4.5 out of 5',
  'votes': '4.5 out of 5(2214)'},
 {'name': 'NIVEA Radiant & Beauty Advanced Care Lotion For Women - 400ml (Pack Of 2)',
  'new_price': 'KSh 949',
  'old_price': 'KSh 1,460',
  'discount(percent)': '35%',
  'rating': '4.7 out of 5',
  'votes': '4.7 out of 5(601)'},
 {'name': 'NIVEA Nourishing Cocoa Body Lotion With Cocoa Butter 400ml (Pack Of 2)',
  'new_price': 'KSh 1,174',
  'old_price': 'KSh 1,302',
  'discount(percent)': '10%',
  'rating': '4.7 out of 5',
  'votes': '4.7 out of 5(1114)'},
 {'name': 'NIVEA Pearl & Beauty Anti-Perspirant Rollon, 48h - 50ml (Pack Of 2)',
  'new_price': 'KSh 728',
  'old_price': 'KSh 1,040',
  'discount(percent)': '30%',
  'rating': '4.6 out of 5',
  'votes': '4.6 out of 5(826)'},
 {'name': 'NIVEA MEN Deep Antibacterial Anti-Perspirant Rollon,48h - 50ml (Pack 

# Add all the products text into a list

In [242]:
# first_page_items_list = [x.text for x in first_page_items]
# first_page_items_list