In [5]:
import time
import csv

from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

In [53]:
# Setup Chrome options
chrome_options = Options()
chrome_options.add_argument("--headless")  # Runs Chrome in headless mode.
chrome_options.add_argument("--no-sandbox")
chrome_options.add_argument("--disable-dev-shm-usage")

chrome_options.add_argument("--disable-blink-features=AutomationControlled")
chrome_options.add_argument("window-size=1280,800")
chrome_options.add_experimental_option("excludeSwitches", ["enable-automation"])
chrome_options.add_experimental_option('useAutomationExtension', False)

chrome_options.add_argument("--window-size=1920,1080")
chrome_options.add_argument("--disable-gpu")
chrome_options.add_argument("--disable-features=NetworkService,NetworkServiceInProcess")
chrome_options.add_argument("--enable-features=NetworkServiceOutOfProcess")

In [54]:
urls = [
    "https://maps.app.goo.gl/4UgGJojF9W9y7gap6",
    "https://www.google.com/maps/contrib/107500847196278757767",
    "https://www.google.com/maps/contrib/102897822509174603995",
]

In [56]:
def clean_number_string(number_string):
    return number_string.split(" ")[0].replace(".", "").replace(",", ".")

# Initialize the Chrome driver
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=chrome_options)

# Data storage
data = []

waiting_time_per_url = 10

# Iterate over each URL
for i, url in enumerate(urls):
    driver.get(url)
    wait = WebDriverWait(driver, waiting_time_per_url)
    
    # Handle the cookie acceptance dialog
    try:
        if i == 0:
            time.sleep(5)
            reject_field = "Rifiuta tutto"
            cookie_button = driver.find_element(By.XPATH, f"//button[@aria-label='{reject_field}']")
            cookie_button.click()
            print("Clicked the cookie button.")
            print('-' * 50)
    
    except Exception as e:
        print("Could not find or click the cookie acceptance button.", str(e))
        driver.save_screenshot("debug_screenshot_accept_cookies.png")
        # Save the HTML of the page for debugging
        html_content = driver.page_source
        with open(f'debug_html_cookies.html', 'w', encoding='utf-8') as f:
            f.write(html_content)
        break

    try:
        user_data = {}
    
        # Extract the name using the specific classes and attributes
        name_element = wait.until(EC.visibility_of_element_located((By.CSS_SELECTOR, "h1.geAzIe.F8kQwb[role='button']")))
        name = name_element.text if name_element else "Name not found"
        user_data['Name'] = name

        # Wait and click the score element to open detailed view
        score_element = wait.until(EC.visibility_of_element_located((By.CSS_SELECTOR, 'span.VEEl9c#ucc-0')))
        # Get the score text  
        user_data['Score'] = clean_number_string(score_element.text)

        score_element.click()

        # Allow time for the details to load
        wait = WebDriverWait(driver, waiting_time_per_url / 2)

        container = wait.until(EC.visibility_of_element_located((By.CSS_SELECTOR, 'div.QrGqBf')))

        # Find all the entries within the container
        entries = container.find_elements(By.CSS_SELECTOR, 'div.nKYSz')

        # Iterate over each entry and extract the field name and value
        fields = {}
        for entry in entries:
            try:
                field_name = entry.find_element(By.CSS_SELECTOR, 'span.FM5HI').text
                field_value = entry.find_element(By.CSS_SELECTOR, 'span.AyEQdd').text
                fields[field_name] = clean_number_string(field_value)
            except Exception as e:
                print(f"Error extracting field: {str(e)}")
        user_data.update(fields)

        # Add current date to user data
        user_data['Date'] = time.strftime("%Y/%m/%d")

        # Add the url to the user data
        user_data['URL'] = url

        # Print or store the fields as needed
        for field_name, field_value in user_data.items():
            print(f"{field_name}: {field_value}")
        
        data.append(user_data)

    except Exception as e:
        print(f"Failed to process {url}: {str(e)}")
        driver.save_screenshot("debug_screenshot.png")
    
    print('-' * 50)

# Close the browser
driver.quit()

Clicked the cookie button.
--------------------------------------------------
Name: Naresh Darji
Score: 268924
Recensioni: 695
Valutazioni: 40
Foto: 4203
Video: 287
Didascalie: 24
Risposte: 18927
Modifiche: 10751
Segnalato come errato: 4
Luoghi aggiunti: 1050
Strade aggiunte: 9236
Informazioni verificate: 5732
Domande e risposte: 374
Date: 2024/07/22
URL: https://maps.app.goo.gl/4UgGJojF9W9y7gap6
--------------------------------------------------
Name: Marco Davoli
Score: 708403
Recensioni: 149
Valutazioni: 13
Foto: 1147
Video: 73
Didascalie: 14
Risposte: 2412
Modifiche: 12659
Segnalato come errato: 0
Luoghi aggiunti: 29
Strade aggiunte: 42201
Informazioni verificate: 14
Domande e risposte: 6
Date: 2024/07/22
URL: https://www.google.com/maps/contrib/107500847196278757767
--------------------------------------------------
Name: Lucas
Score: 1132978
Recensioni: 455
Valutazioni: 45
Foto: 1866
Video: 163
Didascalie: 37
Risposte: 13270
Modifiche: 28079
Segnalato come errato: 5
Luoghi aggiun

In [48]:
# Save data to a CSV file
csv_file = "data.csv"
csv_columns = data[0].keys()
try:
    with open(csv_file, 'w', newline='') as csvfile:
        writer = csv.DictWriter(csvfile, fieldnames=csv_columns)
        writer.writeheader()
        for user_data in data:
            writer.writerow(user_data)
except IOError:
    print("I/O error")

In [49]:
import gradio as gr

  from .autonotebook import tqdm as notebook_tqdm
