# Preparations

In [28]:
#!pip install -r requirements.txt
import requests
from urllib.parse import urlencode
import googlemaps
import os
from dotenv import load_dotenv
import time
import pandas as pd

In [29]:
# import the api_key from the api_key.py file
from api_key import api_key

# 1. City boundaries & grid

In [30]:
# Function to get the city boundaries
def get_city_boundaries(city_name):
    # Initialize the Google Maps client
    gmaps = googlemaps.Client(key=api_key) # googlemaps package
    
    # Get the city boundaries
    geocode_result = gmaps.geocode(city_name)

    # save the boundaries
    low_lat = geocode_result[0]['geometry']['bounds']['southwest']['lat']
    low_long = geocode_result[0]['geometry']['bounds']['southwest']['lng']
    high_lat = geocode_result[0]['geometry']['bounds']['northeast']['lat']
    high_long = geocode_result[0]['geometry']['bounds']['northeast']['lng']

    
    return low_lat, low_long, high_lat, high_long

In [31]:
# Function to divide the city into a grid
def divide_area_in_grid(boundary, step_size = 0.01):
    low_lat, low_long, high_lat, high_long = boundary
    grid = []
    lat = low_lat
    while lat < high_lat:
        long = low_long
        while long < high_long:
            cell = (lat, long, min(lat + step_size, high_lat), min(long + step_size, high_long))
            grid.append(cell)
            long += step_size
        lat += step_size
    return grid    

# 2. Get places data

In [32]:
# all fields that are interesting for us
search_fields = [
    "places.displayName", "places.id", "places.types",
    "places.businessStatus", "places.pureServiceAreaBusiness", "places.formattedAddress",
    "places.location", "places.primaryType", "places.primaryTypeDisplayName", "places.internationalPhoneNumber", "places.websiteUri",
    "places.editorialSummary", "places.regularOpeningHours", "places.utcOffsetMinutes",
    "places.priceLevel", "places.priceRange", "places.rating", "places.userRatingCount",
    "places.curbsidePickup", "places.delivery", "places.dineIn", "places.liveMusic",
    "places.parkingOptions", "places.paymentOptions", "places.outdoorSeating",
    "places.reservable", "places.restroom", "places.servesBeer", "places.servesBreakfast",
    "places.servesBrunch", "places.servesCocktails", "places.servesCoffee", "places.servesDessert",
    "places.servesDinner", "places.servesLunch", "places.servesVegetarianFood", "places.servesWine",
    "places.takeout", "places.accessibilityOptions", "places.allowsDogs", "places.goodForChildren",
    "places.goodForGroups", "places.goodForWatchingSports", "places.menuForChildren"
]

In [33]:
def get_places_data(boundary):
    low_lat, low_long, high_lat, high_long = boundary

    # Base URL
    base_url = "https://places.googleapis.com/v1/places:searchText"
    
    # Headers
    headers = {
        "Content-Type": "application/json",
        "X-Goog-Api-Key": api_key,
        "X-Goog-FieldMask": ",".join(search_fields) 
    }

    results = [] # List to store the results
    page_token = None  # Initialize the page token to None

    while True:
        # JSON request body
        body = {
            "includedType": "restaurant",  # Restrict to restaurants
            "strictTypeFiltering": True,  # Only return results of the specified type
            "textQuery": "restaurant",
            "pageSize": 20,  # max results per page
            "pageToken": page_token,  # Next page token, if any
            "languageCode": "en",  # Language for results
            "locationRestriction": {
                "rectangle": {
                    "low": {"latitude": low_lat, "longitude": low_long},
                    "high": {"latitude": high_lat, "longitude": high_long}
                }}
        }

        # Send the POST request to the API
        response = requests.post(base_url, headers=headers, json=body)
        # Check if the request was successful
        if response.status_code == 200:
            data = response.json()

            # Check if "places" key is present in the response
            if "places" in data:
                results.extend(data["places"])
            else: break # Break the loop if no results are available

            # Check if there is a next page token
            if "nextPageToken" in data:
                page_token = data["nextPageToken"]
            else:
                break
        else:
            print("Error:", response.status_code, "Message:", response.text)
            break
    
    return results

# 3. Get the data from the grid

In [34]:
def get_places_data_in_grid(grid):
    all_results = []

    for cell in grid:
        results = get_places_data(cell)
        time.sleep(2)

        if len(results) != 60:
            all_results.extend(results)

        # If API returns full results, recursively split the cell
        if len(results) == 60:
            print(f"Overcrowded cell detected: {cell}")
            low_lat, low_long, high_lat, high_long = cell
            mid_lat = (low_lat + high_lat) / 2
            mid_long = (low_long + high_long) / 2
            sub_cells = [
                (low_lat, low_long, mid_lat, mid_long),  # Bottom-left
                (low_lat, mid_long, mid_lat, high_long),  # Bottom-right
                (mid_lat, low_long, high_lat, mid_long),  # Top-left
                (mid_lat, mid_long, high_lat, high_long)  # Top-right
            ]

            # Recursive call for each sub-cell
            subdivided_results = get_places_data_in_grid(sub_cells)
            all_results.extend(subdivided_results)  # Append all subdivided results
    
    return all_results

# 4. Clean dataset

In [35]:
def clean_dataset(results):
    cleaned_basics = []
    cleaned_general = []
    cleaned_additional = []

    for result in results:
        # the most basic information
        basic_info = { 
            "id": result.get("id"),
            "name": result.get("displayName"),
            "types": result.get("types"),
            "primaryType": result.get("primaryType"),
            "primaryTypeDisplayName": result.get("primaryTypeDisplayName"),
            "businessStatus": result.get("businessStatus"),
            "pureServiceAreaBusiness": result.get("pureServiceAreaBusiness"),
            "formattedAddress": result.get("formattedAddress"),
            "location": result.get("location")}
        cleaned_basics.append(basic_info)

        # general information
        general_info = {
            "id": result.get("id"),
            "internationalPhoneNumber": result.get("internationalPhoneNumber"),
            "placesWebsiteUri": result.get("websiteUri"),
            "editorialSummary": result.get("editorialSummary"), 
            "regularOpeningHours": result.get("regularOpeningHours"),
            "utcOffsetMinutes": result.get("utcOffsetMinutes"),
            "priceLevel": result.get("priceLevel"),
            "priceRange": result.get("priceRange"),
            "rating": result.get("rating"),
            "userRatingCount": result.get("userRatingCount")}
        cleaned_general.append(general_info)

        # additional information
        additional_info = { 
            "id": result.get("id"),
            "curbsidePickup": result.get("curbsidePickup"),
            "delivery": result.get("delivery"),
            "dineIn": result.get("dineIn"),
            "liveMusic": result.get("liveMusic"),
            "parkingOptions": result.get("parkingOptions"),
            "paymentOptions": result.get("paymentOptions"),
            "outdoorSeating": result.get("outdoorSeating"),
            "reservable": result.get("reservable"),
            "restroom": result.get("restroom"),
            "servesBeer": result.get("servesBeer"),
            "servesBreakfast": result.get("servesBreakfast"),
            "servesBrunch": result.get("servesBrunch"),
            "servesCocktails": result.get("servesCocktails"),
            "servesCoffee": result.get("servesCoffee"),
            "servesDessert": result.get("servesDessert"),
            "servesDinner": result.get("servesDinner"),
            "servesLunch": result.get("servesLunch"),
            "servesVegetarianFood": result.get("servesVegetarianFood"),
            "servesWine": result.get("servesWine"),
            "takeout": result.get("takeout"),
            "accessibilityOptions": result.get("accessibilityOptions"),
            "allowsDogs": result.get("allowsDogs"),
            "goodForChildren": result.get("goodForChildren"),
            "goodForGroups": result.get("goodForGroups"),
            "goodForWatchingSports": result.get("goodForWatchingSports"),
            "menuForChildren": result.get("menuForChildren")}
        cleaned_additional.append(additional_info)


    cleaned_basics = pd.DataFrame(cleaned_basics)
    cleaned_general = pd.DataFrame(cleaned_general)
    cleaned_additional = pd.DataFrame(cleaned_additional)

    return cleaned_basics, cleaned_general, cleaned_additional

# Final function to combine all 

In [36]:
# Final function to fetch all restaurants for a given city
def fetch_restaurants_in_city(city):
    boundary = get_city_boundaries(city)
    grid = divide_area_in_grid(boundary)
    results = get_places_data_in_grid(grid)
    cleaned_basics, cleaned_general, cleaned_additional = clean_dataset(results)
    
    # You can choose to return the cleaned data in any form you need
    return cleaned_basics, cleaned_general, cleaned_additional


In [None]:
cities = ["München", "Stuttgart", "Nürnberg", "Mannheim", "Karlsruhe", "Augsburg", "Freiburg", "Heidelberg", "Regensburg", "Tübingen"]
API_basics = pd.DataFrame()
API_general = pd.DataFrame()
API_additional = pd.DataFrame()

for city in cities:
    print(f"Fetching data for {city}")

    cleaned_basics, cleaned_general, cleaned_additional = fetch_restaurants_in_city(city)
    cleaned_basics['city'] = city # add the city to the dataframe
    
    API_basics = pd.concat([API_basics, cleaned_basics], ignore_index=True)
    API_general = pd.concat([API_general, cleaned_general], ignore_index=True)
    API_additional = pd.concat([API_additional, cleaned_additional], ignore_index=True)
    print(f"Number of restaurants in {city}: {len(cleaned_basics)}")
    print(len(API_basics))
    print()

In [53]:
# Save the data to a csv file
API_basics.to_csv("API_Basics.csv", index=False)
API_general.to_csv("API_General.csv", index=False)
API_additional.to_csv("API_Additional.csv", index=False)

In [54]:
print(API_general["userRatingCount"].sum())

4182871.0
