# <strong>Data Collection (Scraping Google Maps Reviews)</strong>

### <strong> Version original </strong>

In [9]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import NoSuchElementException, TimeoutException
import time
import pandas as pd
from datetime import datetime

# 🔹 CONFIGURATION : Chemin vers ChromeDriver
chrome_driver_path = "C:/chromedriver/chromedriver/chromedriver.exe"

# 🔹 Liste des banques à scraper
bank_urls = [
    "https://www.google.com/maps/place/BANK+OF+AFRICA,+Agence+Abdelmoumen,+Casablanca./@33.5265726,-7.7316491,13z/data=!4m13!1m3!2m2!1sBMCE+Bank+of+Africa!6e2!3m8!1s0xda7d2b1401cddab:0xa09ec03c31179e50!8m2!3d33.5677444!4d-7.6264518!9m1!1b1!15sChNCTUNFIEJhbmsgb2YgQWZyaWNhkgEEYmFua-ABAA!16s%2Fg%2F1tklrxp2!5m1!1e1?entry=ttu&g_ep=EgoyMDI1MDMwNC4wIKXMDSoJLDEwMjExNDUzSAFQAw%3D%3D",
    "https://www.google.com/maps/place/%D8%A8%D9%86%D9%83+%D8%A3%D9%81%D8%B1%D9%8A%D9%82%D9%8A%D8%A7%E2%80%AD/@33.5265726,-7.7316491,13z/data=!4m13!1m3!2m2!1sBMCE+Bank+of+Africa!6e2!3m8!1s0xda62cb2e1b355d9:0x92b1eb404df1b850!8m2!3d33.5563893!4d-7.6848036!9m1!1b1!15sChNCTUNFIEJhbmsgb2YgQWZyaWNhkgEEYmFua-ABAA!16s%2Fg%2F11crv4pc1x!5m1!1e1?entry=ttu&g_ep=EgoyMDI1MDMwNC4wIKXMDSoJLDEwMjExNDUzSAFQAw%3D%3D"
]

# 🔹 Lancer Chrome en mode headless
service = Service(chrome_driver_path)
options = webdriver.ChromeOptions()
options.add_argument("--headless")  
options.add_argument("--disable-gpu")  # Option utile pour éviter certains bugs graphiques
options.add_argument("--no-sandbox")
options.add_argument("--disable-dev-shm-usage")
driver = webdriver.Chrome(service=service, options=options)

# 🔹 Liste pour stocker toutes les données
all_data = []

# 🔹 Fonction pour scroller la page
def scroll_page():
    for _ in range(10):  # Nombre de scrolls
        driver.execute_script("window.scrollBy(0, 400);")
        time.sleep(2)

# 🔹 Fonction pour convertir "il y a X ans" en une année précise
def convert_relative_date(relative_date):
    now = datetime.now()
    try:
        # Split the relative date string and extract the number at index 2
        parts = relative_date.split()
        if len(parts) >= 3 :  # Handle years
            if parts[3] == "un":
                parts[3] = "1"
            years_ago = int(parts[3])  # Extract the number (e.g., "5" in "il y a 5 ans")
            return str(now.year - years_ago)  # Calculate the year (e.g., 2025 - 5 = 2020)
        elif "mois" in relative_date:  # Handle months
            return str(now.year)  # Use the current year for months
        elif "jour" in relative_date:  # Handle days
            return str(now.year)  # Use the current year for days
        else:
            return "N/A"  # Default if the format is not recognized
    except (IndexError, ValueError):
        return "N/A"  # Handle unexpected formats or errors

# 🔹 Traiter chaque banque
for index, url in enumerate(bank_urls, start=1):
    print(f"🔍 Scraping de l'URL {index}: {url}")
    driver.get(url)
    time.sleep(5)  # Laisser la page se charger

    scroll_page()  # Scroller pour charger les avis

    # 🔹 Extraire les avis et dates avant de cliquer sur "Présentation"
    try:
        review_elements = driver.find_elements(By.CSS_SELECTOR, "span.wiI7pd")
        date_elements = driver.find_elements(By.CSS_SELECTOR, "span.rsqaWe")

        print(f"🔍 Nombre d'avis trouvés : {len(review_elements)}")

        # 🔹 Stocker temporairement les avis et dates
        reviews = [review.text.strip() for review in review_elements]
        dates = [convert_relative_date(date.text.strip()) for date in date_elements]

    except Exception as e:
        print(f"⚠️ Erreur lors de l'extraction des avis : {e}")
        continue

    # 🔹 Cliquer sur le div "Présentation" pour révéler l'adresse et le rating
    try:
        presentation_div = WebDriverWait(driver, 10).until(
            EC.presence_of_element_located((By.CSS_SELECTOR, "div.LRkQ2"))
        )
        presentation_div.click()
        time.sleep(2)  # Attendre que l'adresse et le rating soient chargés
    except TimeoutException:
        print("⚠️ Impossible de cliquer sur le div de présentation")
        continue

    # 🔹 Récupérer le nom de la banque après avoir cliqué sur "Présentation"
    try:
        bank_name_element = WebDriverWait(driver, 10).until(
            EC.presence_of_element_located((By.CSS_SELECTOR, "h1.DUwDvf.lfPIob"))
        )
        bank_name = bank_name_element.text.strip()
    except TimeoutException:
        bank_name = f"Banque {index}"

    print(f"📌 Nom de la banque détecté : {bank_name}")

    # 🔹 Récupérer l'adresse de la banque
    try:
        address_element = WebDriverWait(driver, 10).until(
            EC.presence_of_element_located((By.CSS_SELECTOR, "div.Io6YTe.fontBodyMedium.kR99db.fdkmkc"))
        )
        address = address_element.text.strip()
    except TimeoutException:
        address = "Non trouvée"

    print(f"📌 Adresse de la banque détectée : {address}")

    # 🔹 Récupérer le rating après avoir cliqué sur "Présentation"
    try:
        rating_element = WebDriverWait(driver, 10).until(
            EC.presence_of_element_located((By.CSS_SELECTOR, "div.F7nice span[aria-hidden='true']"))
        )
        rating = rating_element.text.strip()
    except TimeoutException:
        rating = "N/A"

    print(f"📌 Note de la banque détectée : {rating}")

    # 🔹 Stocker les données dans une liste
    for i in range(len(reviews)):
        try:
            all_data.append({
                "Nom de la Banque": bank_name,
                "Adresse": address,
                "Avis": reviews[i],
                "Note": rating,  # Use the same rating for all reviews
                "Date": dates[i] if i < len(dates) else "N/A"  # Only the year (e.g., 2020)
            })
        except Exception as e:
            print(f"⚠️ Erreur lors du traitement d'un avis : {e}")

# 🔹 Fermer le navigateur
driver.quit()

# 🔹 Enregistrer toutes les données dans un seul fichier CSV
df = pd.DataFrame(all_data)
csv_filename = "avis_banques.csv"
df.to_csv(csv_filename, index=False, encoding="utf-8")

print(f"✅ Toutes les données sont enregistrées dans {csv_filename} ({len(all_data)} avis).")
print("🚀 Scraping terminé pour toutes les banques !")

🔍 Scraping de l'URL 1: https://www.google.com/maps/place/BANK+OF+AFRICA,+Agence+Abdelmoumen,+Casablanca./@33.5265726,-7.7316491,13z/data=!4m13!1m3!2m2!1sBMCE+Bank+of+Africa!6e2!3m8!1s0xda7d2b1401cddab:0xa09ec03c31179e50!8m2!3d33.5677444!4d-7.6264518!9m1!1b1!15sChNCTUNFIEJhbmsgb2YgQWZyaWNhkgEEYmFua-ABAA!16s%2Fg%2F1tklrxp2!5m1!1e1?entry=ttu&g_ep=EgoyMDI1MDMwNC4wIKXMDSoJLDEwMjExNDUzSAFQAw%3D%3D
🔍 Nombre d'avis trouvés : 8
📌 Nom de la banque détecté : BANK OF AFRICA, Agence Abdelmoumen, Casablanca.
📌 Adresse de la banque détectée : H99F+3CV, Rue Pasquier, Casablanca 20250
📌 Note de la banque détectée : 2,1
🔍 Scraping de l'URL 2: https://www.google.com/maps/place/%D8%A8%D9%86%D9%83+%D8%A3%D9%81%D8%B1%D9%8A%D9%82%D9%8A%D8%A7%E2%80%AD/@33.5265726,-7.7316491,13z/data=!4m13!1m3!2m2!1sBMCE+Bank+of+Africa!6e2!3m8!1s0xda62cb2e1b355d9:0x92b1eb404df1b850!8m2!3d33.5563893!4d-7.6848036!9m1!1b1!15sChNCTUNFIEJhbmsgb2YgQWZyaWNhkgEEYmFua-ABAA!16s%2Fg%2F11crv4pc1x!5m1!1e1?entry=ttu&g_ep=EgoyMDI1MDMwNC4wIKX

### <Strong>Imitation</String>

In [10]:
import pandas as pd
df = pd.read_csv("avis_banques.csv")
df

Unnamed: 0,Nom de la Banque,Adresse,Avis,Note,Date
0,"BANK OF AFRICA, Agence Abdelmoumen, Casablanca.","H99F+3CV, Rue Pasquier, Casablanca 20250",Catastrophe !\nJ'ai d'abord demandé à ce qu'on...,21,2020
1,"BANK OF AFRICA, Agence Abdelmoumen, Casablanca.","H99F+3CV, Rue Pasquier, Casablanca 20250","Je suis un client de la BMCE, et très étonné d...",21,2019
2,"BANK OF AFRICA, Agence Abdelmoumen, Casablanca.","H99F+3CV, Rue Pasquier, Casablanca 20250",Le client doit s adapter avec les horaires de ...,21,2024
3,"BANK OF AFRICA, Agence Abdelmoumen, Casablanca.","H99F+3CV, Rue Pasquier, Casablanca 20250",Pas mal comme banque,21,2021
4,"BANK OF AFRICA, Agence Abdelmoumen, Casablanca.","H99F+3CV, Rue Pasquier, Casablanca 20250",ils ne decroche pas le telephone et ne repond ...,21,2022
5,"BANK OF AFRICA, Agence Abdelmoumen, Casablanca.","H99F+3CV, Rue Pasquier, Casablanca 20250",Je trouve le coin agréable,21,2021
6,"BANK OF AFRICA, Agence Abdelmoumen, Casablanca.","H99F+3CV, Rue Pasquier, Casablanca 20250",Service zbaaaaala,21,2022
7,"BANK OF AFRICA, Agence Abdelmoumen, Casablanca.","H99F+3CV, Rue Pasquier, Casablanca 20250","Si je pouvais donner moins d’une étoile, je di...",21,2023
8,بنك أفريقيا,"H848+H34, Casablanca",Meilleur banque,30,2018
9,بنك أفريقيا,"H848+H34, Casablanca",Fidélité Casablanca,30,2018


In [None]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import NoSuchElementException, TimeoutException
import time
import pandas as pd
from datetime import datetime

# 🔹 CONFIGURATION : Chemin vers ChromeDriver
chrome_driver_path = "C:/chromedriver/chromedriver/chromedriver.exe"

# 🔹 Liste des banques à scraper
bank_urls = [
    "https://www.google.com/maps/place/%D8%A8%D9%86%D9%83+%D8%A3%D9%81%D8%B1%D9%8A%D9%82%D9%8A%D8%A7%E2%80%AD/@33.5265726,-7.7316491,13z/data=!4m13!1m3!2m2!1sBMCE+Bank+of+Africa!6e2!3m8!1s0xda62cb2e1b355d9:0x92b1eb404df1b850!8m2!3d33.5563893!4d-7.6848036!9m1!1b1!15sChNCTUNFIEJhbmsgb2YgQWZyaWNhkgEEYmFua-ABAA!16s%2Fg%2F11crv4pc1x!5m1!1e1?entry=ttu&g_ep=EgoyMDI1MDMwNC4wIKXMDSoJLDEwMjExNDUzSAFQAw%3D%3D"
]

# 🔹 Lancer Chrome en mode headless
service = Service(chrome_driver_path)
options = webdriver.ChromeOptions()
options.add_argument("--headless")  
options.add_argument("--disable-gpu")  # Option utile pour éviter certains bugs graphiques
options.add_argument("--no-sandbox")
options.add_argument("--disable-dev-shm-usage")
driver = webdriver.Chrome(service=service, options=options)

# 🔹 Liste pour stocker toutes les données
all_data = []

# 🔹 Fonction pour scroller la page
def scroll_page():
    for _ in range(10):  # Nombre de scrolls
        driver.execute_script("window.scrollBy(0, 400);")
        time.sleep(2)

# 🔹 Fonction pour convertir "il y a X ans" en une année précise
def convert_relative_date(relative_date):
    now = datetime.now()
    try:
        # Split the relative date string and extract the number at index 2
        parts = relative_date.split()
        if len(parts) >= 3 :  # Handle years
            if parts[3] == "un":
                parts[3] = "1"
            years_ago = int(parts[3])  # Extract the number (e.g., "5" in "il y a 5 ans")
            return str(now.year - years_ago)  # Calculate the year (e.g., 2025 - 5 = 2020)
        elif "mois" in relative_date:  # Handle months
            return str(now.year)  # Use the current year for months
        elif "jour" in relative_date:  # Handle days
            return str(now.year)  # Use the current year for days
        else:
            return "N/A"  # Default if the format is not recognized
    except (IndexError, ValueError):
        return "N/A"  # Handle unexpected formats or errors

# 🔹 Traiter chaque banque
for index, url in enumerate(bank_urls, start=1):
    print(f"🔍 Scraping de l'URL {index}: {url}")
    driver.get(url)
    time.sleep(5)  # Laisser la page se charger

    scroll_page()  # Scroller pour charger les avis

    # 🔹 Extraire les avis et dates avant de cliquer sur "Présentation"
    try:
        review_elements = driver.find_elements(By.CSS_SELECTOR, "span.wiI7pd")
        date_elements = driver.find_elements(By.CSS_SELECTOR, "span.rsqaWe")

        print(f"🔍 Nombre d'avis trouvés : {len(review_elements)}")

        # 🔹 Stocker temporairement les avis et dates
        reviews = [review.text.strip() for review in review_elements]
        dates = [convert_relative_date(date.text.strip()) for date in date_elements]

    except Exception as e:
        print(f"⚠️ Erreur lors de l'extraction des avis : {e}")
        continue

    # 🔹 Cliquer sur le div "Présentation" pour révéler l'adresse et le rating
    try:
        presentation_div = WebDriverWait(driver, 10).until(
            EC.presence_of_element_located((By.CSS_SELECTOR, "div.LRkQ2"))
        )
        presentation_div.click()
        time.sleep(2)  # Attendre que l'adresse et le rating soient chargés
    except TimeoutException:
        print("⚠️ Impossible de cliquer sur le div de présentation")
        continue

    # 🔹 Récupérer le nom de la banque après avoir cliqué sur "Présentation"
    try:
        bank_name_element = WebDriverWait(driver, 10).until(
            EC.presence_of_element_located((By.CSS_SELECTOR, "h1.DUwDvf.lfPIob"))
        )
        bank_name = bank_name_element.text.strip()
    except TimeoutException:
        bank_name = f"Banque {index}"

    print(f"📌 Nom de la banque détecté : {bank_name}")

    # 🔹 Récupérer l'adresse de la banque
    try:
        address_element = WebDriverWait(driver, 10).until(
            EC.presence_of_element_located((By.CSS_SELECTOR, "div.Io6YTe.fontBodyMedium.kR99db.fdkmkc"))
        )
        address = address_element.text.strip()
    except TimeoutException:
        address = "Non trouvée"

    print(f"📌 Adresse de la banque détectée : {address}")

    # 🔹 Récupérer le rating après avoir cliqué sur "Présentation"
    try:
        rating_element = WebDriverWait(driver, 10).until(
            EC.presence_of_element_located((By.CSS_SELECTOR, "div.F7nice span[aria-hidden='true']"))
        )
        rating = rating_element.text.strip()
    except TimeoutException:
        rating = "N/A"

    print(f"📌 Note de la banque détectée : {rating}")

    # 🔹 Stocker les données dans une liste
    for i in range(len(reviews)):
        try:
            all_data.append({
                "Bank name": bank_name,
                "Branch name": bank_name+" "+address,
                "Location": address,
                "Review text": reviews[i],
                "Rating": rating,
                "Review date": dates[i] if i < len(dates) else "N/A"  # Only the year (e.g., 2020)
            })
        except Exception as e:
            print(f"⚠️ Erreur lors du traitement d'un avis : {e}")

# 🔹 Fermer le navigateur
driver.quit()

# 🔹 Enregistrer toutes les données dans un seul fichier CSV
df = pd.DataFrame(all_data)
csv_filename = "avis_banques.csv"
df.to_csv(csv_filename, index=False, encoding="utf-8")

print(f"✅ Toutes les données sont enregistrées dans {csv_filename} ({len(all_data)} avis).")
print("🚀 Scraping terminé pour toutes les banques !")

🔍 Scraping de l'URL 1: https://www.google.com/maps/place/BANK+OF+AFRICA,+Agence+Abdelmoumen,+Casablanca./@33.5265726,-7.7316491,13z/data=!4m13!1m3!2m2!1sBMCE+Bank+of+Africa!6e2!3m8!1s0xda7d2b1401cddab:0xa09ec03c31179e50!8m2!3d33.5677444!4d-7.6264518!9m1!1b1!15sChNCTUNFIEJhbmsgb2YgQWZyaWNhkgEEYmFua-ABAA!16s%2Fg%2F1tklrxp2!5m1!1e1?entry=ttu&g_ep=EgoyMDI1MDMwNC4wIKXMDSoJLDEwMjExNDUzSAFQAw%3D%3D
🔍 Nombre d'avis trouvés : 8
📌 Nom de la banque détecté : BANK OF AFRICA, Agence Abdelmoumen, Casablanca.
📌 Adresse de la banque détectée : H99F+3CV, Rue Pasquier, Casablanca 20250
📌 Note de la banque détectée : 2,1
🔍 Scraping de l'URL 2: https://www.google.com/maps/place/%D8%A8%D9%86%D9%83+%D8%A3%D9%81%D8%B1%D9%8A%D9%82%D9%8A%D8%A7%E2%80%AD/@33.5265726,-7.7316491,13z/data=!4m13!1m3!2m2!1sBMCE+Bank+of+Africa!6e2!3m8!1s0xda62cb2e1b355d9:0x92b1eb404df1b850!8m2!3d33.5563893!4d-7.6848036!9m1!1b1!15sChNCTUNFIEJhbmsgb2YgQWZyaWNhkgEEYmFua-ABAA!16s%2Fg%2F11crv4pc1x!5m1!1e1?entry=ttu&g_ep=EgoyMDI1MDMwNC4wIKX

In [18]:
import pandas as pd
df = pd.read_csv("avis_banques.csv")
df

Unnamed: 0,Bank name,Branch name,Location,Review text,Rating,Review date
0,"BANK OF AFRICA, Agence Abdelmoumen, Casablanca.","BANK OF AFRICA, Agence Abdelmoumen, Casablanca...","H99F+3CV, Rue Pasquier, Casablanca 20250",Catastrophe !\nJ'ai d'abord demandé à ce qu'on...,21,2020
1,"BANK OF AFRICA, Agence Abdelmoumen, Casablanca.","BANK OF AFRICA, Agence Abdelmoumen, Casablanca...","H99F+3CV, Rue Pasquier, Casablanca 20250","Je suis un client de la BMCE, et très étonné d...",21,2019
2,"BANK OF AFRICA, Agence Abdelmoumen, Casablanca.","BANK OF AFRICA, Agence Abdelmoumen, Casablanca...","H99F+3CV, Rue Pasquier, Casablanca 20250",Le client doit s adapter avec les horaires de ...,21,2024
3,"BANK OF AFRICA, Agence Abdelmoumen, Casablanca.","BANK OF AFRICA, Agence Abdelmoumen, Casablanca...","H99F+3CV, Rue Pasquier, Casablanca 20250",Pas mal comme banque,21,2021
4,"BANK OF AFRICA, Agence Abdelmoumen, Casablanca.","BANK OF AFRICA, Agence Abdelmoumen, Casablanca...","H99F+3CV, Rue Pasquier, Casablanca 20250",ils ne decroche pas le telephone et ne repond ...,21,2022
5,"BANK OF AFRICA, Agence Abdelmoumen, Casablanca.","BANK OF AFRICA, Agence Abdelmoumen, Casablanca...","H99F+3CV, Rue Pasquier, Casablanca 20250",Je trouve le coin agréable,21,2021
6,"BANK OF AFRICA, Agence Abdelmoumen, Casablanca.","BANK OF AFRICA, Agence Abdelmoumen, Casablanca...","H99F+3CV, Rue Pasquier, Casablanca 20250",Service zbaaaaala,21,2022
7,"BANK OF AFRICA, Agence Abdelmoumen, Casablanca.","BANK OF AFRICA, Agence Abdelmoumen, Casablanca...","H99F+3CV, Rue Pasquier, Casablanca 20250","Si je pouvais donner moins d’une étoile, je di...",21,2023
8,بنك أفريقيا,"بنك أفريقياH848+H34, Casablanca","H848+H34, Casablanca",Meilleur banque,30,2018
9,بنك أفريقيا,"بنك أفريقياH848+H34, Casablanca","H848+H34, Casablanca",Fidélité Casablanca,30,2018


In [1]:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from webdriver_manager.chrome import ChromeDriverManager

# Initialiser le driver
options = Options()
options.add_argument("--headless")
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options)

# URL de Google Maps avec la liste des agences (exemple)
url = "https://www.google.com/maps/place/CIH+BANK/@33.9863553,-6.9484621,5967m/data=!3m1!1e3!4m10!1m2!2m1!1scih+bank!3m6!1s0xda76c70de128e97:0x2f5cf53cfddf6dd4!8m2!3d33.999388!4d-6.8444769!15sCghjaWggYmFuayIDiAEBkgEEYmFua-ABAA!16s%2Fg%2F11hcjzjj66!5m1!1e1?entry=ttu&g_ep=EgoyMDI1MDMxMi4wIKXMDSoASAFQAw%3D%3D"
driver.get(url)

# Attendre le chargement de la page
driver.implicitly_wait(5)

# Trouver le div contenant les agences
parent_div = driver.find_element(By.CLASS_NAME, "m6QErb")

# Extraire les liens des agences bancaires
agence_links = []
agences = parent_div.find_elements(By.TAG_NAME, "a")

for agence in agences:
    link = agence.get_attribute("div > Nv2PK > href")
    print(link)


# The original multi agence

In [32]:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from webdriver_manager.chrome import ChromeDriverManager
import time

# Set up WebDriver with options
options = Options()
# options.add_argument("--headless")  # Run without UI (disable for testing)
options.add_argument("--disable-blink-features=AutomationControlled")  # Avoid detection
options.add_experimental_option("excludeSwitches", ["enable-automation"])
options.add_experimental_option("useAutomationExtension", False)

# Initialize the driver
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options)

# Open Google Maps search page
url = "https://www.google.com/maps/search/CIH+BANK/"
driver.get(url)

# Wait for results to load
wait = WebDriverWait(driver, 10)
wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, "div.Nv2PK.Q2HXcd.THOPZb")))

# Scroll to load more results
for _ in range(5):  # Scroll multiple times
    driver.execute_script("window.scrollBy(0, 500);")
    time.sleep(2)

# Find all the agency divs
agences = driver.find_elements(By.CSS_SELECTOR, "div.Nv2PK.Q2HXcd.THOPZb")

# Extract the href links
for agence in agences:
    try:
        link = agence.find_element(By.CSS_SELECTOR, "a.hfpxzc").get_attribute("href")
        print(link)  # Output extracted links
    except:
        print("No link found.")

# Close the browser
driver.quit()


https://www.google.com/maps/place/CIH+Bank/data=!4m7!3m6!1s0xda7132dea22b8ff:0xa6282b8795ca748f!8m2!3d33.9574084!4d-6.8687443!16s%2Fg%2F11b6hy9bxv!19sChIJ_7gi6i0Tpw0Rj3TKlYcrKKY?authuser=0&hl=fr&rclk=1
https://www.google.com/maps/place/CIH/data=!4m7!3m6!1s0xda76ce1ba996839:0xea0bce250e2160a2!8m2!3d33.9863553!4d-6.8763643!16s%2Fg%2F1q5bxc5q5!19sChIJOWiZuuFspw0RomAhDiXOC-o?authuser=0&hl=fr&rclk=1
https://www.google.com/maps/place/CIH/data=!4m7!3m6!1s0xda76c859f7e237b:0x217d9ca0edf0eeb0!8m2!3d33.9948545!4d-6.8471361!16s%2Fg%2F1th4brqp!19sChIJeyN-n4Vspw0RsO7w7aCcfSE?authuser=0&hl=fr&rclk=1


In [None]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.action_chains import ActionChains
import time

# Initialize WebDriver
driver = webdriver.Chrome()
driver.get("https://www.google.com/maps/search/CIH+BANK/")

# Find the scrollable element
scrollable_element = driver.find_element(By.CSS_SELECTOR, "YOUR_SCROLLABLE_ELEMENT_SELECTOR")

# Initialize previous height
last_height = driver.execute_script("return arguments[0].scrollHeight", scrollable_element)

while True:
    # Scroll down
    driver.execute_script("arguments[0].scrollTop = arguments[0].scrollHeight", scrollable_element)
    
    # Wait for new content to load
    time.sleep(2)  # Adjust delay as needed
    
    # Get new height
    new_height = driver.execute_script("return arguments[0].scrollHeight", scrollable_element)
    
    # Check if the height changed
    if new_height == last_height:
        print("No more content to load.")
        break  # Stop scrolling if no more new content appears

    last_height = new_height

print("Scrolling complete.")
driver.quit()


NoSuchElementException: Message: no such element: Unable to locate element: {"method":"css selector","selector":"YOUR_SCROLLABLE_ELEMENT_SELECTOR"}
  (Session info: chrome=134.0.6998.88); For documentation on this error, please visit: https://www.selenium.dev/documentation/webdriver/troubleshooting/errors#no-such-element-exception
Stacktrace:
#0 0x55c244581a1a <unknown>
#1 0x55c244039390 <unknown>
#2 0x55c24408ac85 <unknown>
#3 0x55c24408aeb1 <unknown>
#4 0x55c2440d9d64 <unknown>
#5 0x55c2440b0bfd <unknown>
#6 0x55c2440d707b <unknown>
#7 0x55c2440b09a3 <unknown>
#8 0x55c24407c60e <unknown>
#9 0x55c24407ddd1 <unknown>
#10 0x55c244547ddb <unknown>
#11 0x55c24454bcbc <unknown>
#12 0x55c24452f392 <unknown>
#13 0x55c24454c834 <unknown>
#14 0x55c2445131ef <unknown>
#15 0x55c244570038 <unknown>
#16 0x55c244570216 <unknown>
#17 0x55c244580896 <unknown>
#18 0x7f36c1a0f609 start_thread


In [9]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time

# Initialize WebDriver
driver = webdriver.Chrome()
driver.get("https://www.google.com/maps/search/CIH+BANK/")

# Wait for the scrollable element to appear
scrollable_element = WebDriverWait(driver, 10).until(
    EC.presence_of_element_located((By.CSS_SELECTOR, "div.m6QErb.DxyBCb.kA9KIf.dS8AEf.XiKgde.ecceSd"))
)

# Scrolling to load more results
last_height = driver.execute_script("return arguments[0].scrollHeight", scrollable_element)

while True:
    driver.execute_script("arguments[0].scrollTop = arguments[0].scrollHeight", scrollable_element)
    time.sleep(2)  # Wait for new content to load
    new_height = driver.execute_script("return arguments[0].scrollHeight", scrollable_element)
    
    if new_height == last_height:  # Stop if no new content is loaded
        break
    last_height = new_height

# Extracting links
business_links = driver.find_elements(By.CSS_SELECTOR, "a[href^='https://www.google.com/maps/place/']")
links = [link.get_attribute("href") for link in business_links]

# Print links
for idx, link in enumerate(links, 1):
    print(f"{idx}: {link}")

# Close WebDriver
driver.quit()


1: https://www.google.com/maps/place/CIH+Bank/data=!4m7!3m6!1s0xda7132dea22b8ff:0xa6282b8795ca748f!8m2!3d33.9574084!4d-6.8687443!16s%2Fg%2F11b6hy9bxv!19sChIJ_7gi6i0Tpw0Rj3TKlYcrKKY?authuser=0&hl=fr&rclk=1
2: https://www.google.com/maps/place/CIH/data=!4m7!3m6!1s0xda76ce1ba996839:0xea0bce250e2160a2!8m2!3d33.9863553!4d-6.8763643!16s%2Fg%2F1q5bxc5q5!19sChIJOWiZuuFspw0RomAhDiXOC-o?authuser=0&hl=fr&rclk=1
3: https://www.google.com/maps/place/Credit+Immobilier+Et+Hotelier+%28Cih+Agence+Yacoub+Al+Mansour%29/data=!4m7!3m6!1s0xda76d24964dd881:0xae1c4ec6c91e3837!8m2!3d33.9760898!4d-6.8874566!16s%2Fg%2F11b6jg_p7d!19sChIJgdhNliRtpw0RNzgeycZOHK4?authuser=0&hl=fr&rclk=1
4: https://www.google.com/maps/place/CIH+Bank/data=!4m7!3m6!1s0xda76d3ee69fbdcd:0x5890f67b66478786!8m2!3d33.9954991!4d-6.879399!16s%2Fg%2F11b6hzvwbv!19sChIJzb2f5j5tpw0RhodHZnv2kFg?authuser=0&hl=fr&rclk=1
5: https://www.google.com/maps/place/CIH/data=!4m7!3m6!1s0xda76c859f7e237b:0x217d9ca0edf0eeb0!8m2!3d33.9948545!4d-6.8471361!16s%2Fg

In [10]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time

# Initialize WebDriver
driver = webdriver.Chrome()
driver.get("https://www.google.com/maps/search/CIH+BANK/")

# Wait for the scrollable element to appear
scrollable_element = WebDriverWait(driver, 15).until(
    EC.presence_of_element_located((By.CSS_SELECTOR, "div.m6QErb.DxyBCb.kA9KIf.dS8AEf.XiKgde.ecceSd"))
)

# Scroll and Load More Results
last_height = 0
max_scrolls = 30  # Increase this value to scroll more times
scroll_count = 0

while scroll_count < max_scrolls:
    driver.execute_script("arguments[0].scrollTop = arguments[0].scrollHeight", scrollable_element)
    time.sleep(3)  # Increased wait time for more content loading
    new_height = driver.execute_script("return arguments[0].scrollHeight", scrollable_element)

    if new_height == last_height:  # Stop if no new content is loaded
        break

    last_height = new_height
    scroll_count += 1

# Extracting links
WebDriverWait(driver, 10).until(
    EC.presence_of_all_elements_located((By.CSS_SELECTOR, "a[href^='https://www.google.com/maps/place/']"))
)
business_links = driver.find_elements(By.CSS_SELECTOR, "a[href^='https://www.google.com/maps/place/']")
links = list(set(link.get_attribute("href") for link in business_links))  # Remove duplicates

# Print links
for idx, link in enumerate(links, 1):
    print(f"{idx}: {link}")

# Close WebDriver
driver.quit()


1: https://www.google.com/maps/place/CIH+Bank/data=!4m7!3m6!1s0xda76d3ee69fbdcd:0x5890f67b66478786!8m2!3d33.9954991!4d-6.879399!16s%2Fg%2F11b6hzvwbv!19sChIJzb2f5j5tpw0RhodHZnv2kFg?authuser=0&hl=fr&rclk=1
2: https://www.google.com/maps/place/Credit+Immobilier+Et+Hotelier+%28Cih+Agence+Yacoub+Al+Mansour%29/data=!4m7!3m6!1s0xda76d24964dd881:0xae1c4ec6c91e3837!8m2!3d33.9760898!4d-6.8874566!16s%2Fg%2F11b6jg_p7d!19sChIJgdhNliRtpw0RNzgeycZOHK4?authuser=0&hl=fr&rclk=1
3: https://www.google.com/maps/place/CIH+Banque/data=!4m7!3m6!1s0xda76b50b2e48fbb:0xbd19f7cc5481a3d3!8m2!3d33.9742009!4d-6.8296046!16s%2Fg%2F11j8gcx4rb!19sChIJu4_kslBrpw0R06OBVMz3Gb0?authuser=0&hl=fr&rclk=1
4: https://www.google.com/maps/place/CIH/data=!4m7!3m6!1s0xda76ce1ba996839:0xea0bce250e2160a2!8m2!3d33.9863553!4d-6.8763643!16s%2Fg%2F1q5bxc5q5!19sChIJOWiZuuFspw0RomAhDiXOC-o?authuser=0&hl=fr&rclk=1
5: https://www.google.com/maps/place/CIH+Bank/data=!4m7!3m6!1s0xda712ddb9fe4cf5:0xb5390f368b25b462!8m2!3d33.9406386!4d-6.8850588!

In [37]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time

# Initialize WebDriver
driver = webdriver.Chrome()
driver.get("https://www.google.com/maps/search/bmce/@33.986504,-7.010264,23139m/data=!3m2!1e3!4b1!5m1!1e1?entry=ttu&g_ep=EgoyMDI1MDMxMi4wIKXMDSoASAFQAw%3D%3D")

# Wait for the scrollable element
scrollable_element = WebDriverWait(driver, 15).until(
    EC.presence_of_element_located((By.CSS_SELECTOR, "div.m6QErb.DxyBCb.kA9KIf.dS8AEf.XiKgde.ecceSd"))
)

# Function to extract links
def extract_links():
    business_links = driver.find_elements(By.CSS_SELECTOR, "a.hfpxzc")
    return set(link.get_attribute("href") for link in business_links)

# Scroll and Load More Results
scroll_distance = 700  # Scroll distance in pixels
max_scrolls = 50  # Maximum number of scroll attempts
scroll_count = 0
all_links = set()  # Store all unique links
retry_count = 0
max_retries = 5  # Maximum number of retries to confirm no new links are loaded

while scroll_count < max_scrolls:
    # Wait for links to load in the current view
    try:
        WebDriverWait(driver, 50).until(
            EC.presence_of_all_elements_located((By.CSS_SELECTOR, "div.Nv2PK > a.hfpxzc"))
        )
    except Exception as e:
        print("Timed out waiting for links to load. Continuing...")

    # Extract links in the current view
    current_links = extract_links()
    all_links.update(current_links)  # Add new links to the set

    # Scroll down by the fixed distance
    driver.execute_script(f"arguments[0].scrollBy(0, {scroll_distance});", scrollable_element)
    time.sleep(3)  # Allow time for new content to load

    # Check if new content has been loaded
    new_links = extract_links()
    if new_links.issubset(all_links):  # No new links loaded
        retry_count += 1
        if retry_count >= max_retries:  # Confirm no new links after retries
            print("No new links loaded after retries. Stopping scroll.")
            break
        else:
            print(f"No new links loaded. Retrying ({retry_count}/{max_retries})...")
            time.sleep(20)  # Wait longer before retrying
    else:
        retry_count = 0  # Reset retry count if new links are found

    scroll_count += 1

# Print all links
print(f"Total links found: {len(all_links)}")
for idx, link in enumerate(all_links, 1):
    print(f"{idx}: {link}")

# Close WebDriver
driver.quit()

No new links loaded. Retrying (1/5)...
No new links loaded. Retrying (2/5)...
No new links loaded. Retrying (3/5)...
No new links loaded. Retrying (4/5)...
No new links loaded after retries. Stopping scroll.
Total links found: 10
1: https://www.google.com/maps/place/Bank+of+Africa/data=!4m7!3m6!1s0xda7132975d31b65:0xf6256fb873cd052f!8m2!3d33.957545!4d-6.8688988!16s%2Fg%2F1thfgt1r!19sChIJZRvTdSkTpw0RLwXNc7hvJfY?authuser=0&hl=fr&rclk=1
2: https://www.google.com/maps/place/BMCE+El+Manzeh/data=!4m7!3m6!1s0xda76d3bd4967483:0xde764aefe845351!8m2!3d33.9743072!4d-6.8938483!16s%2Fg%2F11b6jgpk2b!19sChIJg3SW1Dttpw0RUVOE_q5k5w0?authuser=0&hl=fr&rclk=1
3: https://www.google.com/maps/place/BMCE+Bank/data=!4m7!3m6!1s0xda71329768659f3:0xaad2c03266d5fbfe!8m2!3d33.9547244!4d-6.8750924!16s%2Fg%2F11b6jh234j!19sChIJ81mGdikTpw0R_vvVZjLA0qo?authuser=0&hl=fr&rclk=1
4: https://www.google.com/maps/place/BMCE/data=!4m7!3m6!1s0xda76c6487453f5d:0xdeb9a4a46b78c2d6!8m2!3d34.0131582!4d-6.8484548!16s%2Fg%2F1thzq7kt!19

In [82]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time

# Initialize WebDriver
driver = webdriver.Chrome()
driver.get("https://www.google.com/maps/search/bmce/@33.986504,-7.010264,23139m/data=!3m2!1e3!4b1!5m1!1e1?entry=ttu&g_ep=EgoyMDI1MDMxMi4wIKXMDSoASAFQAw%3D%3D")

# Wait for the scrollable element
scrollable_element = WebDriverWait(driver, 15).until(
    EC.presence_of_element_located((By.CSS_SELECTOR, "div.m6QErb.DxyBCb.kA9KIf.dS8AEf.XiKgde.ecceSd"))
)

# Function to extract links
def extract_links():
    business_links = driver.find_elements(By.CSS_SELECTOR, "a.hfpxzc")
    return set(link.get_attribute("href") for link in business_links)

# Scroll and Load More Results
scroll_distance = 2000  # Scroll distance in pixels
max_scrolls = 50  # Maximum number of scroll attempts
scroll_count = 0
all_links = set()  # Store all unique links
retry_count = 0
max_retries = 5  # Maximum number of retries to confirm no new links are loaded

while scroll_count < max_scrolls:
    # Wait for links to load in the current view
    try:
        WebDriverWait(driver, 10).until(
            EC.presence_of_all_elements_located((By.CSS_SELECTOR, "a.hfpxzc"))
        )
    except Exception as e:
        print("Timed out waiting for links to load. Continuing...")

    # Extract links in the current view
    current_links = extract_links()
    all_links.update(current_links)  # Add new links to the set

    # Print the number of links found in the current view
    print(f"Scroll {scroll_count + 1}: Found {len(current_links)} links.")

    # Scroll down by the fixed distance
    driver.execute_script(f"arguments[0].scrollTop = arguments[0].scrollTop + {scroll_distance}", scrollable_element)
    time.sleep(3)  # Allow time for new content to load

    # Check if new content has been loaded
    new_links = extract_links()
    if new_links.issubset(all_links):  # No new links loaded
        retry_count += 1
        if retry_count >= max_retries:  # Confirm no new links after retries
            print("No new links loaded after retries. Stopping scroll.")
            break
        else:
            print(f"No new links loaded. Retrying ({retry_count}/{max_retries})...")
            time.sleep(5)  # Wait longer before retrying
    else:
        retry_count = 0  # Reset retry count if new links are found

    scroll_count += 1

# Print all links
print(f"Total links found: {len(all_links)}")
for idx, link in enumerate(all_links, 1):
    print(f"{idx}: {link}")

# Close WebDriver
driver.quit()

Scroll 1: Found 10 links.
No new links loaded. Retrying (1/5)...
Scroll 2: Found 10 links.
No new links loaded. Retrying (2/5)...
Scroll 3: Found 10 links.
No new links loaded. Retrying (3/5)...


KeyboardInterrupt: 

In [81]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time

# Initialize WebDriver
driver = webdriver.Chrome()
driver.get("https://www.google.com/maps/place/Agence+Attijari+Wafa+Banque/@33.9867338,-7.0102649,23139m/data=!3m1!1e3!4m12!1m2!2m1!1sattijari!3m8!1s0xda76cdbaa2bfa2f:0x396699bfc2f03fe0!8m2!3d33.9758475!4d-6.8700879!9m1!1b1!15sCghhdHRpamFyaSIDiAEBkgEEYmFua-ABAA!16s%2Fg%2F11c2y8rcsq!5m1!1e1?entry=ttu&g_ep=EgoyMDI1MDMxMi4wIKXMDSoASAFQAw%3D%3D")

# Wait for the scrollable element
scrollable_element = WebDriverWait(driver, 15).until(
    EC.presence_of_element_located((By.CSS_SELECTOR, "div.m6QErb.DxyBCb.kA9KIf.dS8AEf.XiKgde.ecceSd"))
)

# Function to extract links
def extract_links():
    business_links = driver.find_elements(By.CSS_SELECTOR, "a.hfpxzc")
    [link.get_attribute("href") for link in business_links]

# Scroll and Load More Results
scroll_distance = 2000  # Scroll distance in pixels
max_scrolls = 1  # Maximum number of scroll attempts
scroll_count = 0
all_links = set()  # Store all unique links
retry_count = 0

while scroll_count < max_scrolls:
    # Extract links in the current view
    current_links = extract_links()
    all_links.update(current_links)  # Add new links to the set

    # Print the number of links found in the current view
    print(f"Scroll {scroll_count + 1}: Found {len(current_links)} links.")

    # Scroll down by the fixed distance
    driver.execute_script(f"arguments[0].scrollTop = arguments[0].scrollTop + {scroll_distance}", scrollable_element)
    time.sleep(3)  # Allow time for new content to load

    # Wait for new links to load
    try:
        WebDriverWait(driver, 10).until(
            lambda driver: len(extract_links()) > len(current_links)
        )
    except Exception as e:
        print("No new links loaded after scrolling. Retrying...")
        retry_count += 1
        time.sleep(5)  # Wait longer before retrying

    retry_count = 0  # Reset retry count if new links are found
    scroll_count += 1

# Print all links
print(f"Total links found: {len(all_links)}")
for idx, link in enumerate(all_links, 1):
    print(f"{idx}: {link}")

# Close WebDriver
driver.quit()

TypeError: 'NoneType' object is not iterable

# For agence

In [None]:
# Function to initialize the WebDriver
def initialize_driver():
    options = Options()
    options.add_argument("--headless")
    options.add_argument("--disable-gpu")
    options.add_argument("--no-sandbox")
    options.add_argument("--disable-dev-shm-usage")
    chrome_driver_path = ChromeDriverManager().install()
    service = Service(chrome_driver_path)
    driver = webdriver.Chrome(service=service, options=options)
    return driver


scroll_distance = 2000  # Scroll distance in pixels
max_scrolls = 5  # Increase scroll attempts to load more data
scroll_count = 0
all_links = set()  # Store all unique links
retry_count = 0

driver = initialize_driver()  # Initialize WebDriver inside the task
driver.get("https://www.google.com/maps/search/attijari/@33.9866448,-7.0522163,30768m/data=!3m2!1e3!4b1!5m1!1e1?entry=ttu&g_ep=EgoyMDI1MDMxMi4wIKXMDSoASAFQAw%3D%3D")

all_data = []

def extract_links():
    elements = driver.find_elements(By.CSS_SELECTOR, "a.hfpxzc")
    return {el.get_attribute("href") for el in elements if el.get_attribute("href")}

while scroll_count < max_scrolls:
    current_links = extract_links()
    all_links.update(current_links)  # Add new links to the set

    print(f"Scroll {scroll_count + 1}: Found {len(current_links)} links.")
    
    # Scroll down to load more elements
    try:
        scrollable_element = WebDriverWait(driver, 10).until(
            EC.presence_of_element_located((By.CSS_SELECTOR, "div.m6QErb.DxyBCb.kA9KIf.dS8AEf.XiKgde.ecceSd"))
        )
        driver.execute_script("arguments[0].scrollTop += arguments[0].scrollHeight", scrollable_element)
        time.sleep(3)
    except Exception as e:
        print(f"⚠️ Erreur lors du scrolling : {e}")

    # Wait for new links to load
    try:
        WebDriverWait(driver, 10).until(
            lambda d: len(extract_links()) > len(current_links)
        )
    except Exception:
        print("⚠️ No new links loaded after scrolling. Retrying...")
        retry_count += 1
        if retry_count > 3:
            break  # Stop retrying if too many failures
        time.sleep(5)
    scroll_count += 1

print(all_links)

Scroll 1: Found 10 links.
⚠️ No new links loaded after scrolling. Retrying...
Scroll 2: Found 10 links.
⚠️ No new links loaded after scrolling. Retrying...
Scroll 3: Found 10 links.
⚠️ No new links loaded after scrolling. Retrying...
Scroll 4: Found 10 links.
⚠️ No new links loaded after scrolling. Retrying...
{'https://www.google.com/maps/place/Attijariwafa+Bank/data=!4m7!3m6!1s0xda71348e3a89937:0x7ca016fdc103cbe0!8m2!3d33.9540508!4d-6.8530095!16s%2Fg%2F11c55c3gfb!19sChIJN5mo40gTpw0R4MsDwf0WoHw?authuser=0&hl=fr&rclk=1', 'https://www.google.com/maps/place/Attijariwafa+Bank/data=!4m7!3m6!1s0xda76c89b08f5a69:0x9992d9dde387ebda!8m2!3d34.0052983!4d-6.8491273!16s%2Fg%2F11c806q09t!19sChIJaVqPsIlspw0R2uuH493Zkpk?authuser=0&hl=fr&rclk=1', 'https://www.google.com/maps/place/Attijariwafa+Bank/data=!4m7!3m6!1s0xda76d3b35cfe913:0xdce4a84d626d60a1!8m2!3d33.9756931!4d-6.8890203!16s%2Fg%2F11b6hqgdtc!19sChIJE-nPNTttpw0RoWBtYk2o5Nw?authuser=0&hl=fr&rclk=1', 'https://www.google.com/maps/place/Attijari+W

# For reviews

In [5]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from webdriver_manager.chrome import ChromeDriverManager
import time

# Function to initialize the WebDriver
def initialize_driver():
    options = Options()
    options.add_argument("--headless")  # Run in headless mode
    options.add_argument("--disable-gpu")
    options.add_argument("--no-sandbox")
    options.add_argument("--disable-dev-shm-usage")
    chrome_driver_path = ChromeDriverManager().install()
    service = Service(chrome_driver_path)
    driver = webdriver.Chrome(service=service, options=options)
    return driver

# Initialize WebDriver
driver = initialize_driver()
driver.get("https://www.google.com/maps/place/CIH/@33.999388,-6.9165747,15382m/data=!3m1!1e3!4m12!1m2!2m1!1scih+bank!3m8!1s0xda76ce1ba996839:0xea0bce250e2160a2!8m2!3d33.9863553!4d-6.8763643!9m1!1b1!15sCghjaWggYmFuayIDiAEBkgEEYmFua-ABAA!16s%2Fg%2F1q5bxc5q5!5m1!1e1?entry=ttu&g_ep=EgoyMDI1MDMxMi4wIKXMDSoASAFQAw%3D%3D")

# Wait for the reviews section to load
try:
    WebDriverWait(driver, 15).until(
        EC.presence_of_element_located((By.CSS_SELECTOR, "div.m6QErb.DxyBCb.kA9KIf.dS8AEf.XiKgde"))
    )
except Exception as e:
    print("Timed out waiting for reviews section to load.")
    driver.quit()
    exit()

# Function to extract reviews
def extract_reviews():
    reviews = driver.find_elements(By.CSS_SELECTOR, "div.MyEned span.wiI7pd")
    return [review.text for review in reviews]

# Scroll and extract reviews
scroll_distance = 2000  # Scroll distance in pixels
max_scrolls = 10  # Maximum number of scroll attempts
scroll_count = 0
all_reviews = set()  # Use a set to store unique reviews
retry_count = 0

while scroll_count < max_scrolls:
    # Extract reviews in the current view
    current_reviews = extract_reviews()
    unique_reviews = set(current_reviews) - all_reviews  # Get only new reviews
    all_reviews.update(unique_reviews)  # Add new reviews to the set

    print(f"Scroll {scroll_count + 1}: Found {len(unique_reviews)} new reviews.")

    # Scroll down by the fixed distance
    try:
        scrollable_element = driver.find_element(By.CSS_SELECTOR, "div.m6QErb.DxyBCb.kA9KIf.dS8AEf.XiKgde")
        driver.execute_script(f"arguments[0].scrollTop += {scroll_distance}", scrollable_element)
        time.sleep(3)  # Allow time for new reviews to load
    except Exception as e:
        print(f"⚠️ Error during scrolling: {e}")

    # Wait for new reviews to load
    try:
        WebDriverWait(driver, 10).until(
            lambda d: len(extract_reviews()) > len(current_reviews)
        )
    except Exception:
        print("⚠️ No new reviews loaded after scrolling. Retrying...")
        retry_count += 1
        if retry_count > 5:
            break  # Stop retrying if too many failures
        time.sleep(5)

    scroll_count += 1

# Print all extracted reviews
print("\nAll reviews found:")
for idx, review in enumerate(all_reviews, 1):
    print(f"{idx}: {review}")

# Close WebDriver
driver.quit()
print(f"Total unique reviews found: {len(all_reviews)}")

Scroll 1: Found 8 new reviews.
⚠️ No new reviews loaded after scrolling. Retrying...
Scroll 2: Found 0 new reviews.
Scroll 3: Found 10 new reviews.
⚠️ No new reviews loaded after scrolling. Retrying...
Scroll 4: Found 0 new reviews.
Scroll 5: Found 9 new reviews.
⚠️ No new reviews loaded after scrolling. Retrying...
Scroll 6: Found 0 new reviews.
Scroll 7: Found 10 new reviews.
⚠️ No new reviews loaded after scrolling. Retrying...
Scroll 8: Found 0 new reviews.
Scroll 9: Found 9 new reviews.
⚠️ No new reviews loaded after scrolling. Retrying...
Scroll 10: Found 0 new reviews.
⚠️ No new reviews loaded after scrolling. Retrying...

All reviews found:
1: Mauvais service Le caissier doit être sanctionné
2: J'ai fait échouer une banque dans le service et c'était suffisant pour une banque
3: J'avais besoin d'activer mon compte pendant des jours je viens la dame en gauche me donne chaque fois une excuse ( ya pas de connexion, le système a tomber....) Mais la dernière fois elle a carrément jur

In [None]:
import time
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

def initialize_driver():
    options = webdriver.ChromeOptions()
    options.add_argument("--headless")  # Run in headless mode
    options.add_argument("--disable-gpu")
    options.add_argument("--no-sandbox")
    driver = webdriver.Chrome(options=options)
    return driver

def extract_links(**kwargs):
    driver = initialize_driver()
    
    # List of search result URLs to scrape
    search_urls = [
        "https://www.google.com/maps/search/attijari/@33.9866448,-7.0522163,30768m/data=!3m2!1e3!4b1!5m1!1e1?entry=ttu",
        "https://www.google.com/maps/search/attijari/@34.0023,-6.8444,30768m/data=!3m2!1e3!4b1!5m1!1e1?entry=ttu"
        # Add more search URLs if needed
    ]
    
    all_links = set()
    
    def extract():
        elements = driver.find_elements(By.CSS_SELECTOR, "a.hfpxzc")
        return {el.get_attribute("href") for el in elements if el.get_attribute("href")}
    
    for url in search_urls:
        driver.get(url)
        time.sleep(3)  # Let the page load

        scroll_distance = 1000
        max_scrolls = 5
        scroll_count = 0
        retry_count = 0

        while scroll_count < max_scrolls:
            current_links = extract()
            all_links.update(current_links)
            print(f"🔄 Page: {url} | Scroll {scroll_count + 1}: Found {len(current_links)} links.")

            try:
                scrollable_element = WebDriverWait(driver, 10).until(
                    EC.presence_of_element_located((By.CSS_SELECTOR, "div.m6QErb.DxyBCb.kA9KIf.dS8AEf.XiKgde.ecceSd"))
                )
                driver.execute_script("arguments[0].scrollTop += arguments[0].scrollHeight", scrollable_element)
                time.sleep(3)
            except Exception as e:
                print(f"⚠️ Scrolling Error: {e}")

            try:
                WebDriverWait(driver, 10).until(
                    lambda d: len(extract()) > len(current_links)
                )
            except Exception:
                print("⚠️ No new links loaded after scrolling. Retrying...")
                retry_count += 1
                if retry_count > 3:
                    break
                time.sleep(5)

            scroll_count += 1

    driver.quit()
    kwargs['ti'].xcom_push(key='all_links', value=list(all_links))
    return list(all_links)


NameError: name 'Options' is not defined

In [12]:
sum(1 for review in all_reviews if "Service de déménagement".lower() in review.lower())

1

ModuleNotFoundError: No module named 'transformers'

In [None]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from webdriver_manager.chrome import ChromeDriverManager
import time

# Function to initialize the WebDriver
def initialize_driver():
    options = Options()
    options.add_argument("--headless")  # Run in headless mode
    options.add_argument("--disable-gpu")
    options.add_argument("--no-sandbox")
    options.add_argument("--disable-dev-shm-usage")
    chrome_driver_path = ChromeDriverManager().install()
    service = Service(chrome_driver_path)
    driver = webdriver.Chrome(service=service, options=options)
    return driver

# Initialize WebDriver
driver = initialize_driver()
driver.get("https://www.google.com/maps/place/%D8%A8%D9%86%D9%83+%D8%A3%D9%81%D8%B1%D9%8A%D9%82%D9%8A%D8%A7%E2%80%AD/@33.5265726,-7.7316491,13z/data=!4m13!1m3!2m2!1sBMCE+Bank+of+Africa!6e2!3m8!1s0xda62cb2e1b355d9:0x92b1eb404df1b850!8m2!3d33.5563893!4d-7.6848036!9m1!1b1!15sChNCTUNFIEJhbmsgb2YgQWZyaWNhkgEEYmFua-ABAA!16s%2Fg%2F11crv4pc1x!5m1!1e1?entry=ttu&g_ep=EgoyMDI1MDMxMi4wIKXMDSoASAFQAw%3D%3D")

# Wait for the reviews section to load
try:
    WebDriverWait(driver, 15).until(
        EC.presence_of_element_located((By.CSS_SELECTOR, "div.m6QErb.DxyBCb.kA9KIf.dS8AEf.XiKgde"))
    )
except Exception as e:
    print("Timed out waiting for reviews section to load.")
    driver.quit()
    exit()

# Function to extract reviews
def extract_reviews():
    reviews = driver.find_elements(By.CSS_SELECTOR, "div.MyEned span.wiI7pd")
    return [review.text for review in reviews]


# Scroll and extract reviews
scroll_distance = 1000  # Scroll distance in pixels
max_scrolls = 10  # Maximum number of scroll attempts
scroll_count = 0
all_reviews = []  # Store all reviews
retry_count = 0

while scroll_count < max_scrolls:
    # Extract reviews in the current view
    current_reviews = extract_reviews()
    all_reviews.extend(current_reviews)  # Add new reviews to the list

    print(f"Scroll {scroll_count + 1}: Found {len(current_reviews)} reviews.")

    # Scroll down by the fixed distance
    try:
        scrollable_element = driver.find_element(By.CSS_SELECTOR, "div.m6QErb.DxyBCb.kA9KIf.dS8AEf.XiKgde")
        driver.execute_script(f"arguments[0].scrollTop += {scroll_distance}", scrollable_element)
        time.sleep(3)  # Allow time for new reviews to load
    except Exception as e:
        print(f"⚠️ Error during scrolling: {e}")

    # Wait for new reviews to load
    try:
        WebDriverWait(driver, 10).until(
            lambda d: len(extract_reviews()) > len(current_reviews)
        )
    except Exception:
        print("⚠️ No new reviews loaded after scrolling. Retrying...")
        retry_count += 1
        if retry_count > 3:
            break  # Stop retrying if too many failures
        time.sleep(5)

    scroll_count += 1

# Print all extracted reviews
print("\nAll reviews found:")
for idx, review in enumerate(all_reviews, 1):
    print(f"{idx}: {review}")

# Close WebDriver
driver.quit()

Scroll 1: Found 4 reviews.
⚠️ No new reviews loaded after scrolling. Retrying...
Scroll 2: Found 4 reviews.
⚠️ No new reviews loaded after scrolling. Retrying...
Scroll 3: Found 4 reviews.
⚠️ No new reviews loaded after scrolling. Retrying...
Scroll 4: Found 4 reviews.
⚠️ No new reviews loaded after scrolling. Retrying...

All reviews found:
1: Meilleur banque
2: Fidélité Casablanca
3: Non
4: Fidélité Casablanca
5: Meilleur banque
6: Fidélité Casablanca
7: Non
8: Fidélité Casablanca
9: Meilleur banque
10: Fidélité Casablanca
11: Non
12: Fidélité Casablanca
13: Meilleur banque
14: Fidélité Casablanca
15: Non
16: Fidélité Casablanca


# Task extract_data

In [None]:
from airflow import DAG
from airflow.operators.python_operator import PythonOperator
from airflow.utils.dates import days_ago
from sqlalchemy import create_engine, text
from sqlalchemy.exc import SQLAlchemyError
import time
import pandas as pd
from datetime import datetime
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import NoSuchElementException, TimeoutException
from webdriver_manager.chrome import ChromeDriverManager
from deep_translator import GoogleTranslator
from langdetect import detect
from datetime import timedelta

def extract_data(**kwargs):
    driver = initialize_driver()
    all_data = []

    try:
        # Open the main page
        driver.get("https://www.google.com/maps/search/attijari/@33.9867338,-7.0102649,11934m/data=!3m2!1e3!4b1!5m1!1e1?entry=ttu&g_ep=EgoyMDI1MDMxMi.0wIKXMDSoASAFQAw%3D%3D")

        # Extract links to individual bank pages
        links = WebDriverWait(driver, 15).until(
            EC.presence_of_all_elements_located((By.CSS_SELECTOR, "a.hfpxzc"))
        )
        links = [link.get_attribute("href") for link in links]

        for index, url in enumerate(links, start=1):
            print(f"🔍 Scraping URL {index}: {url}")
            driver.get(url)
            time.sleep(2)  # Let the page load

            # Extract bank name and address
            try:
                bank_name = WebDriverWait(driver, 5).until(
                    EC.presence_of_element_located((By.CSS_SELECTOR, "h1.DUwDvf.lfPIob"))
                ).text.strip()
            except:
                bank_name = f"Banque {index}"

            try:
                address = WebDriverWait(driver, 5).until(
                    EC.presence_of_element_located((By.CSS_SELECTOR, "div.Io6YTe.fontBodyMedium.kR99db.fdkmkc"))
                ).text.strip()
            except:
                address = "Non trouvée"

            # Click on the reviews section
            try:
                review_div = WebDriverWait(driver, 5).until(
                    EC.presence_of_element_located((By.CSS_SELECTOR, "div.Gpq6kf"))
                )
                review_div.click()
                time.sleep(2)
            except:
                print("⚠️ Impossible de cliquer sur 'reviews'")

            # Extract reviews
            reviews = scroll_and_extract_reviews(driver)

            # Extract dates and ratings
            try:
                date_elements = driver.find_elements(By.CSS_SELECTOR, "span.rsqaWe")
                rating_elements = driver.find_elements(By.CSS_SELECTOR, "span.kvMYJc")

                dates = [date.text.strip() for date in date_elements]
                ratings = [len(r.find_elements(By.CLASS_NAME, "elGi1d")) for r in rating_elements]
            except Exception as e:
                print(f"⚠️ Erreur lors de l'extraction des avis : {e}")
                continue

            # Store data
            for i in range(len(reviews)):
                all_data.append({
                    "bank_name": bank_name,
                    "branche_name": f"{bank_name} {address}",
                    "address": address,
                    "review": reviews[i],
                    "rating": ratings[i] if i < len(ratings) else "N/A",
                    "review_date": dates[i] if i < len(dates) else "N/A"
                })

    except Exception as e:
        print(f"⚠️ Erreur lors de l'exécution du script : {e}")
    finally:
        driver.quit()

    kwargs['ti'].xcom_push(key='scraped_data', value=all_data)
    return all_data

# Tp

In [15]:
import requests
from bs4 import BeautifulSoup

url = "https://en.wikipedia.org/wiki/Static_web_page"
response = requests.get(url)

if response.status_code == 200:
    soup = BeautifulSoup(response.text, 'html.parser')


    overview_title = soup.find('h2', id='Overview')
    
    if overview_title:
        print(overview_title.text.strip())  
        print(response.status_code)
    else:
        print("Overview section not found.")    
else:
    print(f"Error: {response.status_code}")


Overview
200


# <strong>Phase 2: Data Cleaning & Transformation</strong>

In [None]:
import pandas as pd

data = pd.read_csv('bank_reviews.csv')

data

In [None]:
import time
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

# Initialize the driver
def initialize_driver():
    options = webdriver.ChromeOptions()
    options.add_argument("--headless")  # Run browser in headless mode
    driver = webdriver.Chrome(options=options)
    return driver

# Function to extract links from search results
def extract_links():
    driver = initialize_driver()

    # List of search result URLs to scrape
    search_urls = [
        "https://www.google.com/maps/search/umnia/@33.9861174,-7.0522176,61536m/data=!3m1!1e3!5m1!1e1?entry=ttu&g_ep=EgoyMDI1MDMxMi4wIKXMDSoASAFQAw%3D%3D",
        "https://www.google.com/maps/search/bank+pop/@33.9869065,-7.0522175,46278m/data=!3m2!1e3!4b1!5m1!1e1?entry=ttu&g_ep=EgoyMDI1MDMxMi4wIKXMDSoASAFQAw%3D%3D"
    ]

    all_links = {}  # Dictionary to store {search_url: [list_of_links]}

    def extract():
        elements = driver.find_elements(By.CSS_SELECTOR, "a.hfpxzc")
        return {el.get_attribute("href") for el in elements if el.get_attribute("href")}

    for url in search_urls:
        driver.get(url)
        time.sleep(3)  # Let the page load

        scroll_distance = 2000
        max_scrolls = 5
        scroll_count = 0
        retry_count = 0

        all_links[url] = set()

        while scroll_count < max_scrolls:
            current_links = extract()
            all_links[url].update(current_links)
            print(f"🔄 Page: {url} | Scroll {scroll_count + 1}: Found {len(current_links)} links.")

            try:
                scrollable_element = WebDriverWait(driver, 10).until(
                    EC.presence_of_element_located((By.CSS_SELECTOR, "div.m6QErb.DxyBCb.kA9KIf.dS8AEf.XiKgde.ecceSd"))
                )
                driver.execute_script("arguments[0].scrollTop += arguments[0].scrollHeight", scrollable_element)
                time.sleep(3)
            except Exception as e:
                print(f"⚠️ Scrolling Error: {e}")

            try:
                WebDriverWait(driver, 10).until(
                    lambda d: len(extract()) > len(current_links)
                )
            except Exception:
                print("⚠️ No new links loaded after scrolling. Retrying...")
                retry_count += 1
                if retry_count > 3:
                    break
                time.sleep(5)

            scroll_count += 1

    driver.quit()
    return all_links

# Function to extract data from the links
def extract_data(all_links):
    driver = initialize_driver()
    all_data = []

    for search_url, bank_urls in all_links.items():
        print(f"Processing URLs from: {search_url}")

        # Set correct bank name based on search_url
        if "umnia" in search_url:
            bank_name = "Umnia Bank"
        elif "bank+pop" in search_url:
            bank_name = "Banque Populaire"
        else:
            bank_name = "Unknown Bank"

        for index, url in enumerate(bank_urls, start=1):
            print(f"🔍 Scraping URL {index}: {url}")
            driver.get(url)
            time.sleep(5)

            # Extract bank address
            try:
                address_element = WebDriverWait(driver, 10).until(
                    EC.presence_of_element_located((By.CSS_SELECTOR, "div.Io6YTe.fontBodyMedium.kR99db.fdkmkc"))
                )
                address = address_element.text.strip()
            except TimeoutException:
                address = "Not Found"

            # Click on "Avis" button to reveal reviews
            try:
                buttons = WebDriverWait(driver, 10).until(
                    EC.presence_of_all_elements_located((By.CSS_SELECTOR, "div.RWPxGd button"))
                )
                buttons[1].click()
                time.sleep(5)
            except (TimeoutException, IndexError):
                print(f"⚠️ 'Avis' button not found for {bank_name}")

            # Extract reviews
            def extract_reviews():
                reviews = driver.find_elements(By.CSS_SELECTOR, "div.MyEned span.wiI7pd")
                return [review.text for review in reviews]

            all_reviews = set()
            scroll_count = 0
            max_scrolls = 2
            retry_count = 0

            while scroll_count < max_scrolls:
                current_reviews = extract_reviews()
                unique_reviews = set(current_reviews) - all_reviews
                all_reviews.update(unique_reviews)

                print(f"Scroll {scroll_count + 1}: Found {len(unique_reviews)} new reviews.")

                try:
                    scrollable_element = driver.find_element(By.CSS_SELECTOR, "div.m6QErb.DxyBCb.kA9KIf.dS8AEf.XiKgde")
                    driver.execute_script("arguments[0].scrollTop += 2000", scrollable_element)
                    time.sleep(3)
                except Exception as e:
                    print(f"⚠️ Error during scrolling: {e}")

                try:
                    WebDriverWait(driver, 10).until(
                        lambda d: len(extract_reviews()) > len(current_reviews)
                    )
                except Exception:
                    print("⚠️ No new reviews loaded after scrolling. Retrying...")
                    retry_count += 1
                    if retry_count > 3:
                        break
                    time.sleep(10)

                scroll_count += 1

            reviews = list(all_reviews)

            # Extract review dates and ratings
            try:
                date_elements = driver.find_elements(By.CSS_SELECTOR, "span.rsqaWe")
                rating_elements = driver.find_elements(By.CSS_SELECTOR, "span.kvMYJc")

                dates = [date.text.strip() for date in date_elements]
                ratings = [len(r.find_elements(By.CLASS_NAME, "elGi1d")) for r in rating_elements]
            except Exception as e:
                print(f"⚠️ Error extracting review details: {e}")
                continue

            # Store extracted data
            for i in range(len(reviews)):
                try:
                    all_data.append({
                        "bank_name": bank_name,
                        "branch_name": bank_name + " " + address,
                        "address": address,
                        "review": reviews[i],
                        "rating": ratings[i] if i < len(ratings) else "N/A",
                        "review_date": dates[i] if i < len(dates) else "N/A"
                    })
                except Exception as e:
                    print(f"⚠️ Error processing review: {e}")

    driver.quit()
    return all_data

# Run the scraping
all_links = extract_links()
scraped_data = extract_data(all_links)

# Display the result
scraped_data


🔄 Page: https://www.google.com/maps/search/umnia/@33.9861174,-7.0522176,61536m/data=!3m1!1e3!5m1!1e1?entry=ttu&g_ep=EgoyMDI1MDMxMi4wIKXMDSoASAFQAw%3D%3D | Scroll 1: Found 10 links.
⚠️ No new links loaded after scrolling. Retrying...
🔄 Page: https://www.google.com/maps/search/umnia/@33.9861174,-7.0522176,61536m/data=!3m1!1e3!5m1!1e1?entry=ttu&g_ep=EgoyMDI1MDMxMi4wIKXMDSoASAFQAw%3D%3D | Scroll 2: Found 10 links.
⚠️ No new links loaded after scrolling. Retrying...
🔄 Page: https://www.google.com/maps/search/umnia/@33.9861174,-7.0522176,61536m/data=!3m1!1e3!5m1!1e1?entry=ttu&g_ep=EgoyMDI1MDMxMi4wIKXMDSoASAFQAw%3D%3D | Scroll 3: Found 10 links.


In [None]:
import time
import json
import pandas as pd
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException
def initialize_driver():
    options = webdriver.FirefoxOptions()
    
    options.set_preference(
        "general.useragent.override",
        "Mozilla/5.0 (Macintosh; Intel Mac OS X 13_3_1) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.0 Safari/605.1.15"
    )

    driver = webdriver.Firefox(options=options)
    return driver

# Paramètres de recherche pour les banques au Maroc
moroccan_banks = [
    "Attijariwafa+Bank",
    "Banque+Populaire",
    "Al+Barid+Bank",
    "CIH+Bank",
]

# Liste des villes marocaines
cities = [
    "Casablanca", "Rabat", "Marrakech", "Fes", "Tangier", "Agadir", "Oujda", 
    "Laayoune", "Tetouan", "Kenitra", "Nador", "Taza", "Errachidia", 
    "El+Jadida", "Essaouira"
]

# Latitudes et longitudes correspondantes (à garder coordonnées correctes)
latitudes = [
    "33.5731104", "34.020882", "31.629472", "34.033126", "35.759465", 
    "30.427755", "34.680569", "27.150000", "35.566666", "34.261013", 
    "35.168781", "34.213334", "31.931444", "33.231706", "31.508492"
]
longitudes = [
   "-7.5898434", "-6.841650", "-7.981084", "-5.000000", "-5.833954", 
   "-9.598107", "-1.891180", "-13.199999", "-5.366666", "-6.589013", 
   "-2.928345", "-4.011192", "-4.426003", "-8.500000", "-9.759504"
]

# Génération des URLs de recherche Google Maps
def generate_search_urls():
    search_urls = []
    for bank in moroccan_banks:
        for city, lat, long in zip(cities, latitudes, longitudes):
            url = f'https://www.google.com/maps/search/{bank}+{city}/@{lat},{long},14059m/data=!3m2!1e3!4b1!5m1!1e1?entry=ttu'
            search_urls.append(url)
    return search_urls

# Extraction des liens des agences pour chaque URL de recherche
def extract_links(driver, search_urls):
    all_links = {}

    def extract():
        elements = driver.find_elements(By.CSS_SELECTOR, "a[href*='/place/']")
        return {el.get_attribute("href") for el in elements if el.get_attribute("href")}

    for url in search_urls:
        driver.get(url)
        time.sleep(3)
        scroll_count = 0
        max_scrolls = 3  # Suggestion : augmenter à 5 si tu veux récupérer plus de résultats
        links_in_page = set()

        while scroll_count < max_scrolls:
            current_links = extract()
            links_in_page.update(current_links)
            print(f"Page {url} | Scroll {scroll_count + 1}: Found {len(current_links)} links.")

            try:
                scrollable_element = WebDriverWait(driver, 10).until(
                    EC.presence_of_element_located((By.CSS_SELECTOR, "div[role='feed']"))
                )
                driver.execute_script("arguments[0].scrollTop = arguments[0].scrollHeight", scrollable_element)
                time.sleep(3)
            except Exception as e:
                print(f"Error scrolling: {e}")
                break

            scroll_count += 1

        all_links[url] = list(links_in_page)

    return all_links

# Extraction des infos d'une agence à partir de son lien
def extract_data(driver, bank_name, all_links):
    all_data = []
    for url in all_links:
        print(f"Processing: {url}")
        driver.get(url)
        time.sleep(5)

        # Adresse
        try:
            address_element = WebDriverWait(driver, 20).until(
                EC.presence_of_element_located((By.CSS_SELECTOR, "div.Io6YTe.fontBodyMedium.kR99db.fdkmkc"))
            )
            address = address_element.text.strip()
        except TimeoutException:
            address = "Not Found"

        # Avis clients
        reviews = []
        try:
            review_elements = WebDriverWait(driver, 10).until(
                EC.presence_of_all_elements_located((By.CSS_SELECTOR, "div.MyEned span.wiI7pd"))
            )
            for review_element in review_elements:
                reviews.append(review_element.text)
        except TimeoutException:
            reviews = ["No reviews found"]

        # Dates et notes
        dates = []
        ratings = []
        try:
            date_elements = driver.find_elements(By.CSS_SELECTOR, "span.rsqaWe")
            rating_elements = driver.find_elements(By.CSS_SELECTOR, "span.kvMYJc")

            dates = [date.text.strip() for date in date_elements]

            # Suggestion : récupérer directement le texte de la note si possible
            ratings = [rating.get_attribute("aria-label") or "Unknown" for rating in rating_elements]
        except Exception as e:
            print(f"Error extracting review details: {e}")

        for i in range(len(reviews)):
            all_data.append({
                "bank_name": bank_name,
                "branch_name": bank_name + " " + address,
                "address": address,
                "review": reviews[i] if i < len(reviews) else "",
                "date": dates[i] if i < len(dates) else "",
                "rating": ratings[i] if i < len(ratings) else ""
            })

    return all_data

# Sauvegarde JSON
def save_data(data, filename="bank_reviews.json"):
    with open(filename, 'w', encoding='utf-8') as f:
        json.dump(data, f, ensure_ascii=False, indent=4)

# Sauvegarde CSV
def save_data_csv(data, filename="bank_reviews.csv"):
    df = pd.DataFrame(data)
    df.to_csv(filename, index=False)

# Fonction principale
def main():
    driver = initialize_driver()
    search_urls = generate_search_urls()

    all_links = extract_links(driver, search_urls)

    all_data = []
    for search_url, bank_urls in all_links.items():
        print(f"Processing URLs from: {search_url}")
        bank_name = ""
        for bank in moroccan_banks:
            if bank in search_url:
                bank_name = bank.replace('+', ' ').replace('Bank', '').strip()
                break

        data = extract_data(driver, bank_name, bank_urls)
        all_data.extend(data)

    # Sauvegarde JSON et CSV
    save_data(all_data, "bank_reviewss.json")
    save_data_csv(all_data, "bank_reviewss.csv")
    driver.quit()

if _name_ == "_main_":
    main()