## Import Modules

In [1]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import NoSuchElementException, TimeoutException
from time import sleep
from bs4 import BeautifulSoup
import numpy as np
import pandas as pd

## Set path for Webdriver

In [2]:
# open the browser
browser = webdriver.Chrome()

In [3]:
products = []
input_file = r"E:\projects\mobaily\pn.xlsx"
output_file = r"E:\projects\mobaily\output_data.xlsx"
df = pd.read_excel(input_file)

In [4]:
# send the input to the webpage
def search(pn):
    try:
        search_bar = browser.find_element(By.CSS_SELECTOR, "input[placeholder='Search']")
        search_bar.clear()
        search_bar.send_keys(pn)
        search_bar.send_keys(Keys.ENTER)
    except NoSuchElementException:
        print(f"Search bar not found for PN: {pn}")

In [5]:
def search_result(x): # get the page of search result 
    y=str(x)
    try:
        search_result_element = WebDriverWait(browser, 10).until(
            EC.presence_of_element_located((By.CSS_SELECTOR,'#content > div.products-category > div.products-list.row.nopadding-xs > div:nth-child('+y+')' ))
        )
        search_result_element.click()
        return browser.current_url
    except (TimeoutException, NoSuchElementException):
        print("Search result not found or took too long to load")
        return None

In [6]:
def extract_table_data():
    soup = BeautifulSoup(browser.page_source, 'html.parser')
    container = soup.find('div', {'class': 'product-box-desc'})
    
    data = []
    if container:
        rows = container.find_all('div', {'class': 'inner-box-desc'})
        for row in rows:
            brand = row.find('div', {'class': 'brand'})
            value_br = row.find('span', {'itemprop': 'name'})
            if brand and value_br:
                label = brand.get_text(strip=True)
                value_br = value_br.get_text(strip=True)
                data.append((label, value_br))
    
    return data

## Scrape Products from mobaily



In [7]:

for index, row in df.iterrows():
       browser.get('https://mobaili.com/')
       browser.maximize_window()
       pn = row['PN']
       num_of_scraped_pages=2 # num of pages you want to scrape
       for i in range(0,2):
            print('Scraping page', i+1)
            for x in range(1,13): # num of items appear in seacrch result
                search(pn)
                u=search_result(x)
                #print(u) 
                price = browser.find_element(By.XPATH, "//span[@itemprop='price']")
                title = browser.find_element(By.XPATH, "//h1[@itemprop='name']") 
                table_data = extract_table_data()
                if table_data:
                    for label, value in table_data:
                            products.append({
                               'pn':pn,
                            'price':price.text,
                            'title':title.text,
                            'Label': label,
                                'Brand': value,
                                'URL': u
                            })  
       if(i!=(num_of_scraped_pages-1)):                 
           next_button = browser.find_element(By.XPATH, "//a[text()='Next']") # to navigate to next page 
           next_button.click()

Scraping page 1
Scraping page 2
Scraping page 1
Scraping page 2
Scraping page 1
Scraping page 2


In [8]:
len(products)

66

In [9]:
products[:5]

[{'pn': 'honor',
  'price': '3,599 LE',
  'title': 'Honor 10 Lite Dual Sim , 64GB, 3GB RAM, 4G LTE, Sky Blue',
  'Label': 'BrandsHonor',
  'Brand': 'Honor',
  'URL': 'https://mobaili.com/index.php?route=product/product&product_id=558&search=honor'},
 {'pn': 'honor',
  'price': '3,499 LE',
  'title': 'Honor 10 Lite Dual Sim - 64GB, 3GB RAM, 4G LTE, Sky Blue',
  'Label': 'BrandsHonor',
  'Brand': 'Honor',
  'URL': 'https://mobaili.com/index.php?route=product/product&product_id=1222&search=honor'},
 {'pn': 'honor',
  'price': '2,879 LE',
  'title': 'Honor 10X Lite Dual SIM Mobile - 6.67 Inches, 128 GB, 4 GB RAM, 4G LTE - Emerald Green',
  'Label': 'BrandsHonor',
  'Brand': 'Honor',
  'URL': 'https://mobaili.com/index.php?route=product/product&product_id=1627&search=honor'},
 {'pn': 'honor',
  'price': '2,879 LE',
  'title': 'Honor 10X Lite Dual SIM Mobile - 6.67 Inches, 128 GB, 4 GB RAM, 4G LTE - Icelandic Frost',
  'Label': 'BrandsHonor',
  'Brand': 'Honor',
  'URL': 'https://mobaili.com

In [10]:
browser.quit()

In [11]:
output_df = pd.DataFrame(products)
output_df.to_excel(output_file, index=False)