## Best Code so Far

In [5]:
from IPython import get_ipython
from IPython.display import display
import json
import os
import pandas as pd
import time
import re
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import NoSuchElementException, TimeoutException, ElementClickInterceptedException, StaleElementReferenceException
from webdriver_manager.chrome import ChromeDriverManager

# Path to your JSON file containing branch URLs
JSON_PATH = r"C:\Users\rzzzc\BFARPy\Python\4th Sem\UCO_review_scrapping\uco_bank_branches.json"

def load_branch_urls(json_path, limit=5):
    """
    Load branch URLs from a JSON file.
    It handles various possible JSON formats.
    Only the first 'limit' entries are returned (unless limit is None, then return all).
    """
    try:
        with open(json_path, 'r', encoding='utf-8') as f:
            data = json.load(f)
    except UnicodeDecodeError:
        # Try with a different encoding if UTF-8 fails
        with open(json_path, 'r', encoding='latin-1') as f:
            data = json.load(f)
    except Exception as e:
        print("Error reading JSON file:", e)
        return []

    urls = []
    count = 0

    # Print first item for debugging
    if data and len(data) > 0:
        print("First item in JSON data:", data[0])

    # Handle different JSON formats
    if isinstance(data, list):
        for item in data:
            # Only check limit if it's not None
            if limit is not None and count >= limit:
                break

            if isinstance(item, dict):
                # Handle dictionary format
                if "url" in item:
                    urls.append(item["url"])
                    count += 1
                elif isinstance(item, dict) and all(isinstance(v, str) for v in item.values()):
                    # Dictionary with string values but no 'url' key
                    # Try to find a value that looks like a URL
                    for value in item.values():
                        if value.startswith("http"):
                            urls.append(value)
                            count += 1
                            break
            elif isinstance(item, str):
                # Handle string format
                if item.startswith("http"):
                    urls.append(item)
                    count += 1
    else:
        print("Unexpected JSON format. Expected a list.")

    return urls

def is_owner_response(review_element):
    """
    Check if the review element is actually an owner response.
    Returns True if it's an owner response, False otherwise.
    """
    # FIXED: Use more specific indicators that only exist in owner responses
    owner_response_indicators = [
        # Look for exact text matches
        './/div[text()="Response from the owner"]',
        './/div[contains(text(), "Response from the owner")]',
        './/span[text()="Owner"]',
        
        # Very specific class combinations that only appear in owner responses
        './/div[contains(@class, "CDe7pd") and contains(@class, "RbETPb")]',
        
        # Owner response usually has specific aria-label
        './/div[contains(@aria-label, "Response from")]'
    ]

    for xpath in owner_response_indicators:
        try:
            elements = review_element.find_elements(By.XPATH, xpath)
            if elements and elements[0].is_displayed():
                # Print what triggered the owner response detection
                print(f"Found owner response indicator: {xpath}")
                return True
        except Exception:
            continue
            
    # Not an owner response
    return False

def scroll_to_load_all_reviews(driver, max_scrolls=30):
    """
    Scroll the reviews container to load more reviews dynamically.
    Returns the number of reviews loaded.
    """
    print("Starting to scroll to load more reviews...")

    # Wait for reviews to be initially loaded
    time.sleep(3)

    # Try different scrollable container selectors
    scrollable_div_xpaths = [
        '//div[contains(@class, "m6QErb")]//div[@role="feed"]',
        '//div[contains(@class, "DxyBCb")]',
        '//div[@role="feed"]',
        '//div[contains(@class, "section-scrollbox")]',
        '//div[contains(@class, "m6QErb-HiaYvf")]',
        '//div[contains(@class, "m6QErb")]/div',
        '//div[contains(@class, "review-dialog-list")]',
        # Add more precise selectors for Google Maps reviews
        '//div[@data-review-id]/ancestor::div[@role="feed"]',
        '//div[contains(@class, "siAUzd-neVct")]',
        '//div[@role="main"]',
        '//div[contains(@class, "section-layout")]'
    ]

    # Find the scrollable container
    scrollable = None
    for xpath in scrollable_div_xpaths:
        try:
            elements = driver.find_elements(By.XPATH, xpath)
            if elements:
                for element in elements:
                    try:
                        # Check if element is scrollable by examining its properties
                        scroll_height = driver.execute_script("return arguments[0].scrollHeight", element)
                        client_height = driver.execute_script("return arguments[0].clientHeight", element)

                        if scroll_height > client_height:
                            scrollable = element
                            print(f"Found scrollable container with xpath: {xpath}")
                            print(f"Scroll height: {scroll_height}, Client height: {client_height}")
                            break
                    except Exception as e:
                        print(f"Error checking element scrollability: {e}")
                        continue
            if scrollable:
                break
        except Exception:
            continue

    if not scrollable:
        print("Could not find scrollable container. Trying direct document scrolling.")
        scrollable = driver.find_element(By.TAG_NAME, 'body')

    # Count reviews before scrolling
    review_elements = count_review_elements(driver)
    print(f"Initial review count: {review_elements}")

    # Track the last review count to detect when no new reviews are loaded
    last_review_count = 0
    unchanged_count = 0
    scroll_count = 0

    # Scroll until we hit maximum or no new reviews load after multiple attempts
    while scroll_count < max_scrolls and unchanged_count < 5:
        try:
            # Try multiple scroll methods

            # Method 1: Standard JavaScript scroll
            driver.execute_script("arguments[0].scrollTop = arguments[0].scrollHeight", scrollable)
            print(f"Method 1: Scrolled container, attempt {scroll_count+1}")

            # Method 2: Alternative scroll approach
            driver.execute_script("""
                var element = arguments[0];
                element.scrollTo({
                    top: element.scrollHeight,
                    behavior: 'smooth'
                });
            """, scrollable)
            print(f"Method 2: Smooth scrolled container")

            # Method 3: Scroll by a specific amount
            driver.execute_script("arguments[0].scrollTop += 1000;", scrollable)
            print(f"Method 3: Incremental scroll by 1000px")

            # Method 4: Try to click on the last visible review to ensure focus
            try:
                reviews = driver.find_elements(By.XPATH, '//div[@data-review-id]')
                if reviews:
                    last_review = reviews[-1]
                    driver.execute_script("arguments[0].scrollIntoView({behavior: 'smooth', block: 'end'});", last_review)
                    print(f"Method 4: Scrolled last review into view")
            except Exception as e:
                print(f"Method 4 failed: {e}")

            # Method 5: Direct body scroll (a fallback)
            driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
            print(f"Method 5: Direct body scroll")

            # Wait longer for new content to load
            time.sleep(3)

            # Count reviews after scrolling
            current_reviews = count_review_elements(driver)
            print(f"Current review count: {current_reviews}")

            if current_reviews == last_review_count:
                unchanged_count += 1
                print(f"No new reviews loaded. Unchanged count: {unchanged_count}")

                # Take screenshot for debugging if no new reviews loaded
                if unchanged_count == 1:
                    try:
                        screenshot_path = f"scroll_debug_{scroll_count}.png"
                        driver.save_screenshot(screenshot_path)
                        print(f"Saved debug screenshot to {screenshot_path}")
                    except Exception:
                        pass

                # Try to click "Show More Reviews" button if no progress
                try_click_more_reviews(driver)
            else:
                unchanged_count = 0
                print(f"Success! Loaded {current_reviews - last_review_count} new reviews")

            last_review_count = current_reviews
            scroll_count += 1

        except Exception as e:
            print(f"Error while scrolling: {e}")
            break

    final_review_count = count_review_elements(driver)
    print(f"Finished scrolling. Total reviews loaded: {final_review_count}")
    return final_review_count

def try_click_more_reviews(driver):
    """Attempt to click on "More Reviews" or similar buttons."""
    more_reviews_button_xpaths = [
        '//button[contains(text(), "More reviews")]',
        '//button[contains(text(), "Show more")]',
        '//button[contains(@aria-label, "more reviews")]',
        '//button[contains(@aria-label, "Show more reviews")]',
        '//span[contains(text(), "more reviews")]/ancestor::button',
        '//div[contains(text(), "More reviews") and @role="button"]',
        # Add more Google Maps specific selectors
        '//button[contains(@data-tab-index, "0") and contains(text(), "review")]',
        '//button[contains(@class, "HHrUdb")]',  # Common class for buttons in Google Maps
        '//span[text()="More"]/ancestor::button',
        '//div[contains(@jsaction, "pane.review.expandReview")]',
        '//div[text()="Show more reviews"]/ancestor::*[@role="button"]'
    ]

    for xpath in more_reviews_button_xpaths:
        try:
            more_buttons = driver.find_elements(By.XPATH, xpath)
            for more_button in more_buttons:
                if more_button.is_displayed():
                    print(f"Found 'More Reviews' button with text: {more_button.text}")
                    try:
                        # Try the regular click first
                        more_button.click()
                        print("Clicked 'More Reviews' button with direct click")
                    except Exception as e:
                        print(f"Direct click failed: {e}, trying JavaScript click")
                        try:
                            # Try JavaScript click as fallback
                            driver.execute_script("arguments[0].click();", more_button)
                            print("Clicked 'More Reviews' button with JavaScript")
                        except Exception as e:
                            print(f"JavaScript click also failed: {e}")
                            continue
                    time.sleep(3)  # Wait longer for new reviews to load
                    return True
        except Exception as e:
            print(f"Error with xpath {xpath}: {e}")
            continue

    # Additional fallback method: Look for "more reviews" text and click nearby
    try:
        more_text_elements = driver.find_elements(By.XPATH, '//*[contains(text(), "more review") or contains(text(), "More review")]')
        for el in more_text_elements:
            if el.is_displayed():
                print(f"Found text mentioning more reviews: {el.text}")
                # Click on the element or its parent
                try:
                    el.click()
                    print("Clicked directly on text element")
                except Exception:
                    try:
                        parent = driver.execute_script("return arguments[0].parentNode;", el)
                        driver.execute_script("arguments[0].click();", parent)
                        print("Clicked on parent of text element")
                    except Exception:
                        continue
                time.sleep(3)
                return True
    except Exception as e:
        print(f"Error in fallback method: {e}")

    return False
###
def count_review_elements(driver):
    """Count the number of review elements currently on the page."""
    # Use a more specific selector that won't count duplicates
    review_container_xpaths = [
        '//div[@data-review-id and not(ancestor::div[@data-review-id])]',  # Only top-level review elements
        '//div[contains(@class, "jftiEf") and not(ancestor::div[contains(@class, "jftiEf")])]',
    ]

    max_count = 0
    for xpath in review_container_xpaths:
        try:
            reviews = driver.find_elements(By.XPATH, xpath)
            current_count = len(reviews)
            if current_count > max_count:
                max_count = current_count
                print(f"Found {current_count} reviews with xpath: {xpath}")
        except Exception:
            continue

    return max_count
###
def navigate_to_reviews_tab(driver, url, branch_name=""):
    """
    Navigate to the reviews tab using various methods.
    Returns True if successfully navigated to reviews.
    """
    print(f"\nAttempting to navigate to reviews tab for: {branch_name}")

    # First try: Direct navigation via URL modification
    try:
        # Extract place ID from the URL
        match = re.search(r"!1s([^!]+)!", url)
        if match:
            place_id = match.group(1)
            print(f"Extracted place ID: {place_id}")

            # Construct a direct URL to the reviews tab
            safe_branch_name = branch_name.replace(' ', '+') if branch_name else "location"
            reviews_url = f"https://www.google.com/maps/place/{safe_branch_name}/@0,0,10z/data=!4m7!3m6!1s{place_id}!8m2!3d0!4d0!9m1!1b1"
            print(f"Navigating directly to reviews URL: {reviews_url}")

            driver.get(reviews_url)
            time.sleep(5)  # Wait for the page to load

            # Check if we're on a page with reviews
            if count_review_elements(driver) > 0:
                print("Successfully navigated to reviews via URL modification")
                return True
            else:
                print("No reviews found via URL modification, trying alternative methods")
        else:
            print("Could not extract place ID from URL")
    except Exception as e:
        print(f"Error during direct navigation to reviews: {e}")

    # Second try: Click on the reviews tab or button
    try:
        # Try various ways to access the reviews section
        wait = WebDriverWait(driver, 15)

        # First make sure we're on the main page if URL modification failed
        if not url.endswith("/reviews"):
            driver.get(url)
            time.sleep(5)

        # Look for reviews button with various xpaths
        review_button_xpaths = [
            '//button[contains(@aria-label, " reviews")]',
            '//button[contains(@aria-label, "reviews")]',
            '//div[contains(@role, "button")][contains(., "reviews")]',
            '//div[contains(@jsaction, "pane.rating.moreReviews")]',
            '//span[contains(text(), "reviews")]/ancestor::button',
            '//div[contains(@class, "F7nice")]/span[2]/span/span',
            '//button[contains(@jsaction, "pane.rating.moreReviews")]',
            '//button[contains(@data-tab-index, "0") and contains(., "review")]',
            # New patterns specifically for reviews tab
            '//button[@data-tab="reviews" or @data-item-id="reviews"]',
            '//button[contains(@aria-label, "Reviews")]',
            '//div[contains(@role, "tab") and contains(., "review")]'
        ]

        for xpath in review_button_xpaths:
            try:
                print(f"Trying to find reviews button with: {xpath}")
                reviews_button = wait.until(EC.element_to_be_clickable((By.XPATH, xpath)))
                print(f"Found reviews button with xpath: {xpath}")
                print(f"Button text: {reviews_button.text}")

                # Try to click the button
                try:
                    reviews_button.click()
                    print("Clicked reviews button with direct click")
                except Exception:
                    driver.execute_script("arguments[0].click();", reviews_button)
                    print("Clicked reviews button with JavaScript")

                time.sleep(4)

                # Check if reviews are visible
                if count_review_elements(driver) > 0:
                    print("Successfully navigated to reviews via button click")
                    return True
                else:
                    print("Clicked button but no reviews found, trying next method")
            except Exception as e:
                print(f"Error with xpath {xpath}: {e}")
                continue
    except Exception as e:
        print(f"Error navigating to reviews tab: {e}")

    # Third try: Look for a reviews section that's already visible
    if count_review_elements(driver) > 0:
        print("Reviews already visible on page")
        return True

    print("All navigation methods to reviews tab failed")
    return False
########
def extract_reviews(driver):
    """Extract individual reviews from the loaded reviews section."""
    reviews = []
    processed_ids = set()  # Track processed review IDs
    
    # Use a more specific selector to avoid nesting issues
    review_xpath = '//div[@data-review-id and not(ancestor::div[@data-review-id])]'
    
    try:
        review_containers = driver.find_elements(By.XPATH, review_xpath)
        print(f"Found {len(review_containers)} reviews with selector: {review_xpath}")
    except Exception as e:
        print(f"Error with review selector: {e}")
        # Fallback to another selector if the first one fails
        try:
            review_xpath = '//div[contains(@class, "jftiEf") and not(ancestor::div[contains(@class, "jftiEf")])]'
            review_containers = driver.find_elements(By.XPATH, review_xpath)
            print(f"Found {len(review_containers)} reviews with fallback selector: {review_xpath}")
        except Exception as e:
            print(f"Error with fallback selector: {e}")
            return reviews
    
    print(f"Processing {len(review_containers)} review containers...")
    
    for i, review_container in enumerate(review_containers):
        try:
            # Get review ID to avoid duplicates
            try:
                review_id = review_container.get_attribute("data-review-id")
                if review_id in processed_ids:
                    print(f"Skipping duplicate review ID: {review_id}")
                    continue
                processed_ids.add(review_id)
            except Exception:
                # If can't get ID, use another unique identifier
                try:
                    reviewer = review_container.find_element(By.XPATH, './/div[contains(@class, "d4r55")]').text
                    review_date = review_container.find_element(By.XPATH, './/span[contains(@class, "rsqaWe")]').text
                    unique_key = f"{reviewer}_{review_date}"
                    if unique_key in processed_ids:
                        print(f"Skipping duplicate review: {unique_key}")
                        continue
                    processed_ids.add(unique_key)
                except Exception:
                    pass
            
            # Skip if this is an owner response rather than a review
            if is_owner_response(review_container):
                print(f"Skipping owner response in container {i+1}")
                continue
            
            # Extract review data
            review_data = {
                "reviewer": "Unknown",
                "rating": "N/A",
                "review_text": "N/A",
                "review_date": "N/A"
            }
            
            # Get reviewer name
            try:
                reviewer_xpaths = [
                    './/div[contains(@class, "d4r55")]',
                    './/div[contains(@class, "WNxzHc")]',
                    './/span[contains(@class, "X43Kjb")]'
                ]
                for xpath in reviewer_xpaths:
                    try:
                        reviewer_element = review_container.find_element(By.XPATH, xpath)
                        if reviewer_element.text:
                            review_data["reviewer"] = reviewer_element.text
                            break
                    except NoSuchElementException:
                        continue
            except Exception as e:
                print(f"Error extracting reviewer name: {e}")
            
            # Get rating
            try:
                rating_xpaths = [
                    './/span[contains(@aria-label, " star")]',
                    './/div[contains(@aria-label, " star")]',
                    './/span[contains(@class, "kvMYJc")]'
                ]
                for xpath in rating_xpaths:
                    try:
                        rating_element = review_container.find_element(By.XPATH, xpath)
                        aria_label = rating_element.get_attribute("aria-label")
                        if aria_label:
                            # Extract number from "X stars" or "X star"
                            rating_match = re.search(r'(\d+(?:\.\d+)?)', aria_label)
                            if rating_match:
                                review_data["rating"] = rating_match.group(1)
                                break
                    except NoSuchElementException:
                        continue
            except Exception as e:
                print(f"Error extracting rating: {e}")
            
            # Get review text
            try:
                review_text_xpaths = [
                    './/span[contains(@class, "wiI7pd")]',
                    './/div[contains(@class, "MyEned")]',
                    './/span[@jsaction="mouseup:JgzDve"]'
                ]
                for xpath in review_text_xpaths:
                    try:
                        text_elements = review_container.find_elements(By.XPATH, xpath)
                        if text_elements:
                            review_data["review_text"] = text_elements[0].text
                            break
                    except NoSuchElementException:
                        continue
                
                # If review text is empty, check for a "More" button and click it
                if review_data["review_text"] == "N/A" or not review_data["review_text"]:
                    more_buttons = review_container.find_elements(By.XPATH, './/button[contains(text(), "More") or contains(text(), "more")]')
                    for button in more_buttons:
                        if button.is_displayed():
                            try:
                                driver.execute_script("arguments[0].click();", button)
                                time.sleep(1)
                                # Try getting the text again
                                for xpath in review_text_xpaths:
                                    try:
                                        text_elements = review_container.find_elements(By.XPATH, xpath)
                                        if text_elements:
                                            review_data["review_text"] = text_elements[0].text
                                            break
                                    except NoSuchElementException:
                                        continue
                                break
                            except Exception:
                                continue
            except Exception as e:
                print(f"Error extracting review text: {e}")
            
            # Get review date
            try:
                date_xpaths = [
                    './/span[contains(@class, "rsqaWe")]',
                    './/span[contains(@class, "dehysf")]',
                    './/div[contains(@class, "DU9Pgb")]'
                ]
                for xpath in date_xpaths:
                    try:
                        date_elements = review_container.find_elements(By.XPATH, xpath)
                        if date_elements:
                            review_data["review_date"] = date_elements[0].text
                            break
                    except NoSuchElementException:
                        continue
            except Exception as e:
                print(f"Error extracting review date: {e}")
            
            # Add the review data to our collection
            reviews.append(review_data)
            print(f"Extracted review from {review_data['reviewer']} - {review_data['review_date']}")
            
        except Exception as e:
            print(f"Error processing review container {i+1}: {e}")
    
    print(f"Successfully extracted {len(reviews)} unique reviews")
    return reviews

def extract_branch_data(driver, url):
    """Extract branch details and reviews from a direct Google Maps branch URL."""
    print(f"\nLoading URL: {url}")
    try:
        driver.get(url)
        time.sleep(5)  # Adjust if the page loads slowly
    except Exception as e:
        print(f"Error loading URL: {e}")
        return {
            "name": "Error Loading Page",
            "address": "N/A",
            "rating": "N/A",
            "pincode": "N/A",
            "reviews": [],
            "url": url
        }

    # --- Extract Basic Branch Details ---
    branch_name = "Not found"
    address = "Not found"
    rating = "Not found"
    pincode = "Not found"

    try:
        branch_name = driver.find_element(By.XPATH, '//h1[contains(@class,"DUwDvf")]').text
        print(f"Found branch name: {branch_name}")
    except Exception as e:
        print(f"Error finding branch name: {e}")
        try:
            # Alternative way to find branch name
            name_elements = driver.find_elements(By.XPATH, '//h1')
            if name_elements:
                branch_name = name_elements[0].text
                print(f"Found branch name (alternative): {branch_name}")
        except Exception:
            print("Error finding branch name using both methods")

    try:
        address = driver.find_element(By.XPATH, '//button[contains(@aria-label, "Address")]/div').text
        print(f"Found address: {address}")
    except Exception as e:
        try:
            # Alternative way to find address
            address_elements = driver.find_elements(By.XPATH, '//div[contains(@class, "Io6YTe")]/div')
            if address_elements:
                address = address_elements[0].text
                print(f"Found address (alternative): {address}")
        except Exception:
            print("Error finding address using both methods")

    try:
        rating_xpaths = [
            '//div[contains(@aria-label, " stars")]',
            '//span[contains(@aria-label, " stars")]',
            '//div[contains(@class, "F7nice")]/span[1]/span'
        ]
        
        for xpath in rating_xpaths:
            try:
                rating_element = driver.find_element(By.XPATH, xpath)
                rating = rating_element.get_attribute("aria-label")
                if rating:
                    print(f"Found rating: {rating}")
                    break
            except NoSuchElementException:
                continue
    except Exception as e:
        print(f"Error finding rating: {e}")

    if address != "Not found":
        match = re.search(r"\b(\d{6})\b", address)  # Extract 6-digit pincode
        if match:
            pincode = match.group(1)
            print(f"Found pincode: {pincode}")
        else:
            print("Pincode not found in address")
    else:
        print("Address not found, skipping pincode extraction")

    # --- Navigate to Reviews Tab and Extract Reviews ---
    if navigate_to_reviews_tab(driver, url, branch_name):
        # Take a screenshot before scrolling
        try:
            driver.save_screenshot(f"before_scroll_{branch_name.replace(' ', '_')}.png")
        except Exception:
            pass
            
        # Load all reviews
        scroll_to_load_all_reviews(driver)
        
        # Take a screenshot after scrolling
        try:
            driver.save_screenshot(f"after_scroll_{branch_name.replace(' ', '_')}.png")
        except Exception:
            pass
            
        # Extract reviews
        reviews = extract_reviews(driver)
    else:
        reviews = []  # Empty list if reviews tab could not be found

    # --- Assemble Branch Data ---
    branch_data = {
        "name": branch_name,
        "address": address,
        "rating": rating,
        "pincode": pincode,
        "reviews": reviews,
        "url": url
    }

    return branch_data

def save_to_csv(branches_data, output_file="uco_bank_reviews.csv"):
    """Save extracted branch data and reviews to a CSV file."""
    if not branches_data:
        print("No data to save")
        return False
        
    try:
        # Create a list to hold all reviews
        all_reviews = []
        
        # Process each branch's reviews
        for branch in branches_data:
            branch_name = branch["name"]
            branch_address = branch["address"]
            branch_rating = branch["rating"]
            branch_pincode = branch["pincode"]
            branch_url = branch["url"]
            
            if branch["reviews"]:
                for review in branch["reviews"]:
                    review_entry = {
                        "branch_name": branch_name,
                        "branch_address": branch_address,
                        "branch_rating": branch_rating,
                        "branch_pincode": branch_pincode,
                        "branch_url": branch_url,
                        "reviewer": review.get("reviewer", "N/A"),
                        "rating": review.get("rating", "N/A"),
                        "review_text": review.get("review_text", "N/A"),
                        "review_date": review.get("review_date", "N/A")
                    }
                    all_reviews.append(review_entry)
            else:
                # Add an entry even if there are no reviews
                review_entry = {
                    "branch_name": branch_name,
                    "branch_address": branch_address,
                    "branch_rating": branch_rating,
                    "branch_pincode": branch_pincode,
                    "branch_url": branch_url,
                    "reviewer": "No reviews found",
                    "rating": "N/A",
                    "review_text": "N/A",
                    "review_date": "N/A"
                }
                all_reviews.append(review_entry)
        
        # Convert to DataFrame and save to CSV
        df = pd.DataFrame(all_reviews)
        df.to_csv(output_file, index=False, encoding='utf-8-sig')
        print(f"Successfully saved {len(all_reviews)} reviews to {output_file}")
        return True
    except Exception as e:
        print(f"Error saving data to CSV: {e}")
        return False

def main():
    """Main function to extract and save UCO Bank branch reviews."""
    print("Starting UCO Bank review extraction process...")
    
    # Set up Chrome driver with options
    chrome_options = webdriver.ChromeOptions()
    chrome_options.add_argument('--headless')  # Comment this out to see the browser window
    chrome_options.add_argument('--disable-gpu')
    chrome_options.add_argument('--no-sandbox')
    chrome_options.add_argument('--disable-dev-shm-usage')
    chrome_options.add_argument('--window-size=1920,1080')
    
    # User agent to mimic a real browser
    chrome_options.add_argument("user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36")
    
    # Initialize Chrome WebDriver
    try:
        # Use ChromeDriverManager for automatic webdriver installation
        service = Service(ChromeDriverManager().install())
        driver = webdriver.Chrome(service=service, options=chrome_options)
        print("Chrome WebDriver initialized successfully")
    except Exception as e:
        print(f"Error initializing Chrome WebDriver: {e}")
        return
    
    try:
        # Load branch URLs from JSON file
        limit = 5  # Set to a number to limit branches, or None for all
        branch_urls = load_branch_urls(JSON_PATH, limit)
        
        if not branch_urls:
            print("No branch URLs found in the JSON file")
            driver.quit()
            return
        
        print(f"Found {len(branch_urls)} branch URLs")
        
        # Extract data for each branch
        branches_data = []
        for i, url in enumerate(branch_urls):
            print(f"\nProcessing branch {i+1}/{len(branch_urls)}")
            branch_data = extract_branch_data(driver, url)
            branches_data.append(branch_data)
            
            # Save progress every 10 branches
            if (i + 1) % 10 == 0:
                save_to_csv(branches_data, f"uco_bank_reviews_progress_{i+1}.csv")
                print(f"Progress saved after {i+1} branches")
        
        # Final save
        save_to_csv(branches_data)
        print("\nCompleted extracting data for all branches")
        
    except Exception as e:
        print(f"Error in main process: {e}")
    finally:
        # Clean up
        driver.quit()
        print("Chrome WebDriver closed")

if __name__ == "__main__":
    main()

Starting UCO Bank review extraction process...
Chrome WebDriver initialized successfully
First item in JSON data: {'name': 'UCO Bank', 'url': 'https://www.google.com/maps/place/UCO+Bank/data=!4m7!3m6!1s0x3a0275439a3339b3:0xd657b11b66a9982b!8m2!3d22.581091!4d88.440661!16s%2Fg%2F11mfwx57_k!19sChIJszkzmkN1AjoRK5ipZhuxV9Y?authuser=0&hl=en&rclk=1'}
Found 5 branch URLs

Processing branch 1/5

Loading URL: https://www.google.com/maps/place/UCO+Bank/data=!4m7!3m6!1s0x3a0275439a3339b3:0xd657b11b66a9982b!8m2!3d22.581091!4d88.440661!16s%2Fg%2F11mfwx57_k!19sChIJszkzmkN1AjoRK5ipZhuxV9Y?authuser=0&hl=en&rclk=1
Found branch name: UCO Bank
Found address: 
Krishnapur, near Mahisgote Playground, Tarulia, AP Block, Sector V, Bidhannagar, Kolkata, West Bengal 700102
Found rating: 3.2 stars
Found pincode: 700102

Attempting to navigate to reviews tab for: UCO Bank
Extracted place ID: 0x3a0275439a3339b3:0xd657b11b66a9982b
Navigating directly to reviews URL: https://www.google.com/maps/place/UCO+Bank/@0,0,1