In [80]:
# Requierd libararies and modules

from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException, NoSuchElementException, StaleElementReferenceException, ElementClickInterceptedException, ElementNotInteractableException
import pandas as pd
import time
import re

In [81]:
#launching selenium chrome driver

service = Service(ChromeDriverManager().install())
driver = webdriver.Chrome(service=service) 

In [None]:
rows = []

driver.get('https://www.noon.com/egypt-en/reviews/N70106183V/?o=b8e38d273a63a3af')  #starting page

#translating the reviews into the English language only
try:
    translate_button = WebDriverWait(driver, 5).until(
        EC.element_to_be_clickable((By.XPATH, "//button[span[text()='Translate all reviews']]"))
    )
    translate_button.click()
except TimeoutException:
    pass


#fetching and parsing the required data
max_pages = 195
current_page = 2
while current_page < max_pages:

    try:
        review_cards = WebDriverWait(driver, 10).until(
            EC.presence_of_all_elements_located((By.CLASS_NAME, 'ReviewListSection_inView__vMMSX'))
        )
    except TimeoutException:
        break

    for card in review_cards:
        try:
            #parsing rating stars
            rating_container = card.find_element(By.CSS_SELECTOR, "div.RatingStars_container__Maoqq")
            filled_stars = 0
            for star_img in rating_container.find_elements(By.CSS_SELECTOR, "img[alt='starFilledV2']"):
                try:
                    color = star_img.get_attribute("color")
                    if color != "grey3":
                        filled_stars += 1
                except StaleElementReferenceException:
                    continue

            rating = filled_stars
            
            
            #parsing text and cleaning all numbers, punctuations, and unnecessary words
            text = card.find_element(By.CLASS_NAME, 'TranslatedReviewTitle_reviewTitle__I0NET').text.strip()
            text = text.lower()
            
            phone_words = r'\b(?:phone|iphone|samsung|pixel|android|apple|camera|pro|max|battery|mobile)\b'
            clean_text = re.sub(phone_words, '', text)
            clean_text = re.sub(r'[^a-zA-Z\s]', '', clean_text)
            clean_text = ' '.join(clean_text.split())
            
            
            rows.append([rating, clean_text]) #saving data
        except NoSuchElementException:
            continue
        
    #navigating to the next pages to load more reviews    
    try:
        next_button = WebDriverWait(driver, 5).until(
            EC.presence_of_element_located((By.CSS_SELECTOR, 'li a[rel="next"]'))
        )
        driver.execute_script("arguments[0].scrollIntoView(true);", next_button)
        # Check if the button can be clicked
        try:
            next_button.click()
        except ElementClickInterceptedException:
            driver.execute_script("arguments[0].click();", next_button) 

    except (TimeoutException, ElementClickInterceptedException):
        break  #no more pages 
    
    current_page+=1 #extra safety to avoid infinite looping
    if current_page >= max_pages:
        break

driver.quit()

df = pd.DataFrame(rows, columns=['Rating', 'Text'])

In [86]:
df.head(15)

Unnamed: 0,Rating,Text
0,5,blew my expectations away truly nextgen gaming...
1,3,good for nothing
2,5,fantastic deal on fast delivery great price an...
3,5,excellent product just nice for one hands i ha...
4,5,super duper i use thanks noon
5,2,im very desapointed this
6,5,this comment is for those who are afraid to bu...
7,4,very satisfied
8,1,one of the worst phones ever
9,1,review for i


In [84]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2895 entries, 0 to 2894
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   Rating  2895 non-null   int64 
 1   Text    2895 non-null   object
dtypes: int64(1), object(1)
memory usage: 45.4+ KB


In [85]:
df.to_csv('Scraped_data.csv') #saving the data into a CSV file