In [1]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from webdriver_manager.chrome import ChromeDriverManager
import pandas as pd
import time
import json

In [2]:
# Set up performance logging
options = Options()
options.set_capability("goog:loggingPrefs", {"performance": "ALL"})

# Start ChromeDriver
service = Service()  # Ensure the correct ChromeDriver path is set here
driver = webdriver.Chrome(service=service, options=options)

# Load the webpage
url = "https://www.google.de/maps/place/Pan's+Asian+Kitchen/@48.509597,9.0573815,17z/data=!3m1!5s0x4799fac4ec54a3f5:0x76c4900c0c96b646!4m16!1m9!3m8!1s0x4799fb71f7a06517:0x124d32ed37caaf7c!2sPan's+Asian+Kitchen!8m2!3d48.509597!4d9.0599564!9m1!1b1!16s%2Fg%2F11vrdbkbsl!3m5!1s0x4799fb71f7a06517:0x124d32ed37caaf7c!8m2!3d48.509597!4d9.0599564!16s%2Fg%2F11vrdbkbsl?entry=ttu&g_ep=EgoyMDI0MTEyNC4xIKXMDSoASAFQAw%3D%3D"
driver.get(url)

# Consent to cookies
try:
    wait = WebDriverWait(driver, 10)
    button = wait.until(EC.element_to_be_clickable((By.XPATH, "//button[@class='VfPpkd-LgbsSe VfPpkd-LgbsSe-OWXEXe-k8QpJ VfPpkd-LgbsSe-OWXEXe-dgl2Hf nCP5yc AjY5Oe DuMIQc LQeN7 XWZjwc']")))
    button.click()
    print("Clicked consent to cookies.")
except:
    print("No consent required.")

# Click the reviews tab
try:
    wait = WebDriverWait(driver, 20)
    button = wait.until(EC.element_to_be_clickable((By.XPATH, "//button[.//div[@class='Gpq6kf fontTitleSmall' and text()='Rezensionen']]")))
    if button.is_displayed() and button.is_enabled():
        button.click()
        print("Clicked to show reviews.")
        time.sleep(2)
    else:
        print("Button is not clickable.")
except Exception as e:
    print(f"Error: {e}")
    print("Reviews button not found.")

def scroll_and_fetch_reviews(driver, panel_xpath, max_scrolls, batch_size, pause_time):
    all_reviews_data = []  # To store consolidated review data

    try:
        # Locate the reviews panel
        panel_element = WebDriverWait(driver, 10).until(
            EC.presence_of_element_located((By.XPATH, panel_xpath))
        )
        print("Located the reviews panel.")

        # Initialize ActionChains for scrolling
        actions = ActionChains(driver)
        actions.move_to_element(panel_element).click().perform()

        for i in range(0, max_scrolls):
            # Scroll within the reviews panel
            actions.send_keys(Keys.PAGE_DOWN).perform()
            time.sleep(pause_time)

            # Fetch `listugcposts` data every `batch_size` scrolls
            if i % batch_size == 0:
                print(f"Fetching reviews data after {i} scrolls...")
                logs = driver.get_log("performance")

                for log_entry in logs:
                    try:
                        message = json.loads(log_entry["message"])
                        if message["message"]["method"] == "Network.responseReceived":
                            response_url = message["message"]["params"]["response"]["url"]
                            request_id = message["message"]["params"]["requestId"]

                            # Check for the reviews API URL
                            if "listugcposts" in response_url:
                                print(f"Detected reviews URL: {response_url}")
                                
                                # Retry fetching response body
                                response_body = None
                                retry_attempts = 3
                                for attempt in range(retry_attempts):
                                    try:
                                        response_body = driver.execute_cdp_cmd('Network.getResponseBody', {'requestId': request_id})
                                        if response_body and 'body' in response_body and response_body['body'].strip():
                                            break  # Exit retry loop if valid body is found
                                    except Exception as e:
                                        print(f"Retry {attempt + 1}/{retry_attempts} failed: {e}")
                                        time.sleep(1)

                                # Ensure the response body is valid
                                if response_body and 'body' in response_body and response_body['body'].strip():
                                    try:
                                        # Strip prefix and parse JSON data
                                        result_data = json.loads(response_body["body"][4:])  # Strip the prefix `)]}'`
                                        all_reviews_data.append(result_data)
                                        print(f"Appended reviews data from {response_url}.")
                                    except json.JSONDecodeError as e:
                                        print(f"JSONDecodeError while parsing response body: {e}")
                                else:
                                    print(f"No valid body found for {response_url}. Skipping...")
                    except KeyError as e:
                        print(f"KeyError while processing logs: {e}")
                    except json.JSONDecodeError as e:
                        print(f"JSONDecodeError while parsing response: {e}")
                    except Exception as e:
                        print(f"Unexpected error: {e}")

        print("Scrolling and fetching completed.")
        return all_reviews_data

    except Exception as e:
        print(f"Error while scrolling or fetching reviews: {e}")
        return all_reviews_data


# XPATH for the reviews panel
panel_xpath = '//*[contains(concat( " ", @class, " " ), concat( " ", "jftiEf", " " ))]'

# Scroll and fetch the reviews data
reviews_data = scroll_and_fetch_reviews(driver, panel_xpath, max_scrolls=150, batch_size=5, pause_time=0.3)

# Print the number of fetched review batches
print(f"Total review batches fetched: {len(reviews_data)}")

# Optionally, write the reviews data to a file
with open('reviews_data.json', 'w', encoding='utf-8') as f:
    json.dump(reviews_data, f, ensure_ascii=False, indent=4)

print("Saved reviews data to 'reviews_data.json'.")

# Quit the browser
driver.quit()


Clicked consent to cookies.
Clicked to show reviews.
Located the reviews panel.
Fetching reviews data after 0 scrolls...
Detected reviews URL: https://www.google.de/maps/rpc/listugcposts?authuser=0&hl=de&gl=de&pb=!1m6!1s0x4799fb71f7a06517%3A0x124d32ed37caaf7c!6m4!4m1!1e1!4m1!1e3!2m2!1i10!2s!5m2!1sjhNSZ-nkPNm5hbIPoL2tyAc!7e81!8m5!1b1!2b1!3b1!5b1!7b1!11m0!13m1!1e1
Appended reviews data from https://www.google.de/maps/rpc/listugcposts?authuser=0&hl=de&gl=de&pb=!1m6!1s0x4799fb71f7a06517%3A0x124d32ed37caaf7c!6m4!4m1!1e1!4m1!1e3!2m2!1i10!2s!5m2!1sjhNSZ-nkPNm5hbIPoL2tyAc!7e81!8m5!1b1!2b1!3b1!5b1!7b1!11m0!13m1!1e1.
Fetching reviews data after 5 scrolls...
Fetching reviews data after 10 scrolls...
Fetching reviews data after 15 scrolls...
Detected reviews URL: https://www.google.de/maps/rpc/listugcposts?authuser=0&hl=de&gl=de&pb=!1m6!1s0x4799fb71f7a06517%3A0x124d32ed37caaf7c!6m4!4m1!1e1!4m1!1e3!2m2!1i10!2sCAESY0NBRVFDaHBFUTJwRlNVRlNTWEJEWjI5QlVEZGZURUZOU0hCZlgxOWZSV2hDTmpGVGVXOUtMWFJCU0VoV2RFRj

In [3]:
print(len(reviews_data))
print(len(reviews_data[13][2]))

14
7


In [4]:
test_review_list = reviews_data[0][2]
test_review = test_review_list[8]
print(len(test_review[0][2][6]))
test_review_more_info = test_review[0][2][6]
test_review_more_info

11


[[['GUIDED_DINING_MODE'],
  'Hast du vor Ort gegessen, Essen zum Mitnehmen bestellt oder dir Essen liefern lassen?',
  [[[['E:DINE_IN'],
     'Verzehr im Restaurant',
     2,
     None,
     None,
     '0ahUKEwjbiNSYwpGKAxX9vv0HHWZfKvQQ3YcHCNABKAA',
     None,
     None,
     0]],
   1],
  None,
  None,
  'Servicetyp',
  None,
  '0ahUKEwjbiNSYwpGKAxX9vv0HHWZfKvQQ3IcHCM8BKAU',
  None,
  None,
  None,
  None,
  None,
  1],
 [['GUIDED_DINING_MEAL_TYPE'],
  'Was hast du bestellt?',
  [[[['E:DINNER'],
     'Abendessen',
     2,
     None,
     None,
     '0ahUKEwjbiNSYwpGKAxX9vv0HHWZfKvQQ3YcHCNIBKAA',
     None,
     None,
     0]],
   1],
  None,
  None,
  'Art der Mahlzeit',
  None,
  '0ahUKEwjbiNSYwpGKAxX9vv0HHWZfKvQQ3IcHCNEBKAY',
  None,
  None,
  None,
  None,
  None,
  1],
 [['GUIDED_DINING_PRICE_RANGE'],
  'Wie viel hast du pro Person ausgegeben?',
  [[[['E:EUR_20_TO_30'],
     '20–30\xa0€',
     2,
     None,
     '20\xa0€ bis 30\xa0€',
     '0ahUKEwjbiNSYwpGKAxX9vv0HHWZfKvQQ3YcHCNQ

In [5]:
def get_more_information_details(list_more_information):
    # not happy with this hard coding but for now it should work and maybe I have an idea later
    # idea dictionary
    dining_mode = ""
    dining_meal_type = ""
    dining_price_range = ""
    dining_food = ""
    dining_service = ""
    dining_atmosphere = ""
    dining_dish_recommend = ""
    dining_recommend_for_vegetarians = ""
    dining_dish_recommend_veggie = ""
    dining_veggie_tips = ""
    # dining_tips = ""
    dining_parking_options = ""
    dining_parking_tips = ""
    dining_parking_space_availability = ""
    dining_accessibility_tips= ""
    dining_kid_friendliness = ""

    for entry in list_more_information:
        key = entry[0][0]

        # info:
        # [11][0] -> star rating
        # [2][0][0][1] -> single select
        # for loop -> multi select
        # [10][0] -> text input field


        match key:
            case "GUIDED_DINING_MODE":
                dining_mode = entry[2][0][0][1]
                
            case "GUIDED_DINING_MEAL_TYPE":
                dining_meal_type = entry[2][0][0][1]

            case "GUIDED_DINING_PRICE_RANGE":
                dining_price_range = entry[2][0][0][1]

            case "GUIDED_DINING_FOOD_ASPECT":
                dining_food = entry[11][0]

            case "GUIDED_DINING_SERVICE_ASPECT":
                dining_service = entry[11][0]

            case "GUIDED_DINING_ATMOSPHERE_ASPECT":
                dining_atmosphere = entry[11][0]

            case "GUIDED_DINING_DISH_RECOMMENDATION":
                for recom in entry[3][0]:
                    if dining_dish_recommend:
                        dining_dish_recommend += f", {recom[1]}"
                    else:
                        dining_dish_recommend += recom[1]

            case "GUIDED_DINING_RECOMMEND_TO_VEGETARIANS":
                dining_recommend_for_vegetarians = entry[2][0][0][1]

            case "GUIDED_DINING_VEGETARIAN_OFFERINGS_INFO":
                for recom in entry[3][0]:
                    if dining_dish_recommend_veggie:
                        dining_dish_recommend_veggie += f", {recom[1]}"
                    else:
                        dining_dish_recommend_veggie += recom[1]

            case "GUIDED_DINING_VEGETARIAN_OPTIONS_TIPS":
                dining_veggie_tips = entry[10][0]

            case "GUIDED_DINING_PARKING_SPACE_AVAILABILITY":
                dining_parking_space_availability = entry[2][0][0][1]

            case "GUIDED_DINING_PARKING_OPTIONS":
                for option in entry[3][0]:
                    if dining_parking_options:
                        dining_parking_options += f", {option[1]}"
                    else:
                        dining_parking_options += option[1]

            case "GUIDED_DINING_PARKING_TIPS":
                dining_parking_tips = entry[10][0]

            case "GUIDED_DINING_ACCESSIBILITY_TIPS":
                dining_accessibility_tips = entry[10][0]

            case "GUIDED_DINING_KID_FRIENDLINESS_TIPS":
                dining_kid_friendliness = entry[10][0]

            # case "GUIDED_DINING_TIPS_TOPICS":
                # dining_tips = entry[3][0][0][1]


    return (dining_mode, dining_meal_type, dining_price_range, 
            dining_food, dining_service, dining_atmosphere, 
            dining_dish_recommend, dining_kid_friendliness, 
            dining_recommend_for_vegetarians, dining_dish_recommend_veggie, dining_veggie_tips, 
            dining_parking_space_availability, dining_parking_options, dining_parking_tips, dining_accessibility_tips)

In [6]:
get_more_information_details(test_review_more_info)

('Verzehr im Restaurant',
 'Abendessen',
 '20–30\xa0€',
 5,
 5,
 5,
 'Spezial Ramen',
 '',
 '',
 '',
 '',
 'Eher schwierige Parkplatzsituation',
 'Kostenlose Parkplätze an der Straße',
 'Wenig Parkplätze direkt am Restaurant.',
 '')

In [7]:
def get_data_from_reviews(reviews_data_frame, amount_reviews):
    review_index = 0
    # making empty dataframe to save the reviews in 
    reviews_df = pd.DataFrame({
    'Name': [None] * amount_reviews,
    'Datum': [None] * amount_reviews,
    'Stars': [None] * amount_reviews,
    'more_info': [[]] * amount_reviews,  # list to save information from more information
    'language': [None] * amount_reviews,
    'text': [None] * amount_reviews
    })
    
    for i in range(len(reviews_data_frame)):
        review_list = reviews_data_frame[i][2]
        a = 0

        for review in review_list:
            reviews_df.iloc[review_index, 0] = review[0][1][4][5][0]        # name
            reviews_df.iloc[review_index, 1] = review[0][1][6]              # date
            reviews_df.iloc[review_index, 2] = review[0][2][0][0]           # stars

            # at reviews with no text, list is shorter and can therefore not be accessed

            try:                
                more_information = get_more_information_details(review[0][2][6]) # more info    
            except Exception as e:
                print(f"{e} occured at review {i}-{a} (more info)")

            try:                
                reviews_df.iloc[review_index, 4] = review[0][2][14][0]      # language      
            except Exception as e:
                print(f"{e} occured at review {i}-{a} (language)")

            try:               
                reviews_df.iloc[review_index, 5] = review[0][2][15][0][0]   # text
            except Exception as e:
                print(f"{e} occured at review {i}-{a} (text)")
            review_index += 1
            a += 1
        
    return reviews_df


In [8]:
test = get_data_from_reviews(reviews_data, 139)

'NoneType' object is not iterable occured at review 0-9 (more info)
'NoneType' object is not iterable occured at review 4-6 (more info)
'NoneType' object is not iterable occured at review 6-2 (more info)
'NoneType' object is not iterable occured at review 6-7 (more info)
'NoneType' object is not iterable occured at review 8-2 (more info)
'NoneType' object is not iterable occured at review 9-4 (more info)
list index out of range occured at review 10-9 (language)
list index out of range occured at review 10-9 (text)
list index out of range occured at review 11-0 (language)
list index out of range occured at review 11-0 (text)
list index out of range occured at review 11-1 (language)
list index out of range occured at review 11-1 (text)
list index out of range occured at review 11-2 (language)
list index out of range occured at review 11-2 (text)
list index out of range occured at review 11-3 (language)
list index out of range occured at review 11-3 (text)
list index out of range occured 

In [9]:
test

Unnamed: 0,Name,Datum,Stars,more_info,language,text
0,Manfred Wiedemann,vor 5 Monaten,5,[],de,Kurz und knapp: WOW!!!\nWir waren spontan zum ...
1,Harald Allmendinger,vor 7 Monaten,5,[],de,Sehr guter Service. Die Gerichte wurden gut er...
2,Nia,vor 10 Monaten,5,[],de,Wir waren über die Feiertage auf Besuch in Tüb...
3,Alex McQueeney,vor 4 Monaten,5,[],de,Wahnsinnig lecker. Das Gericht Ente Udon und ...
4,Marius Preuss,vor 10 Monaten,5,[],de,Ich fande das Restaurant ehrlich ganz gut.\nAm...
...,...,...,...,...,...,...
134,Carmen Ristau,vor 10 Monaten,4,[],,
135,Thomas Euler,vor 10 Monaten,5,[],,
136,rigeus,vor 11 Monaten,5,[],,
137,,,,[],,


### Codefriedhof

In [10]:
test_review_list = reviews_data[2][2]
test_review = test_review_list[9]
print(test_review[0][1][4][5][0])
print(len(test_review[0][2][6]))
test_review_more_info = test_review[0][2][6]
# test_review_more_info

Max A
11


In [11]:
test_review_list = reviews_data[0][2]
test_review = test_review_list[9]
print(len(test_review[0][2][6]))
test_review_more_info = test_review[0][2][6]
test_review_more_info

TypeError: object of type 'NoneType' has no len()

In [12]:
test_review_list = reviews_data[0][2]
test_review = test_review_list[0]
test_review

[['ChdDSUhNMG9nS0VJQ0FnSUR6MnNfTTlBRRAB',
  ['0x0:0x124d32ed37caaf7c',
   None,
   1717933607064380,
   1717933607064380,
   [None,
    None,
    ['https://www.google.com/maps/contrib/111689532179076076463/reviews?hl=de'],
    None,
    None,
    ['Manfred Wiedemann',
     'https://lh3.googleusercontent.com/a/ACg8ocLQz6px5Qjh0A4uvT6wITupL_JuCrGbzdKMnAJhqdK-qp9OqyA=s120-c-rp-mo-ba3-br100',
     ['https://www.google.com/maps/contrib/111689532179076076463?hl=de'],
     '111689532179076076463',
     None,
     22,
     42,
     None,
     [1, 5, 1],
     9,
     ['Local Guide · 22\xa0Rezensionen',
      None,
      None,
      None,
      None,
      [None, '0ahUKEwjbiNSYwpGKAxX9vv0HHWZfKvQQ7LoGCAMoAA']]]],
   None,
   'vor 5 Monaten',
   None,
   None,
   None,
   None,
   None,
   None,
   ['Google',
    'https://www.gstatic.com/images/branding/product/1x/googleg_48dp.png',
    None,
    'google',
    5],
   None,
   1],
  [[5],
   None,
   [['AF1QipPx4xfhzHG3xFhSn3SIPtme2-BK5hMijJP5JLS6

In [13]:
test_review_more_info[9]
# test_review_more_info[7][2][0][0][1]

[['GUIDED_DINING_VEGETARIAN_OPTIONS_TIPS'],
 'Vegetarische Gerichte',
 None,
 None,
 'Andere über vegetarische Gerichte informieren',
 'Vegetarische Gerichte',
 None,
 '0ahUKEwiQ5LOdwpGKAxU5xL4IHYioAxIQ3IcHCH0oCw',
 None,
 None,
 ['Der Ramen ist sehr empfehlenswert. Mit Tofu sehr zu empfehlen.'],
 None,
 None,
 3]