# Obtaining nearby Place Count from Google Maps API

In [0]:
import pandas as pd
import os
# For Google Maps API
import googlemaps
from time import sleep
# For Azure connection:
from azure.storage.blob import BlobServiceClient
from io import BytesIO
# For multiprocessing
from concurrent.futures import ThreadPoolExecutor, as_completed

## Setting up Google Maps API

The API key is for the Google account:
datalakeshslu@outlook.com

**There is a $200 monthly limit in API calls.**

# Your Google Maps API key
API_KEY = 'AIzaSyB7PNQBdHgpz-HyDVWd9CqRzgkKY66OEuI'

In [0]:
# Initialize the client
try:
    gmaps = googlemaps.Client(key=API_KEY)
    print("Connected successfully to Google Maps API!")
except Exception as e:
    print(f"Couldn't connect to Google Maps API: {e}")

## Loading data from Blob Storage

In [0]:
# Azure Blob Storage
connection_string = "DefaultEndpointsProtocol=https;AccountName=datalakestoragerentscape;AccountKey=w6Edf3np1A18vQIei31unvKWjGpyDUBqexvVauAwCeqOmnF1Bq7WsIEVplSEW+hT0q4ZzDi2KNh4+AStrOcI6g==;EndpointSuffix=core.windows.net"
container_name = "rentscape-blob"

In [0]:
try:
    blob_service_client = BlobServiceClient.from_connection_string(connection_string)
    container_client = blob_service_client.get_container_client(container_name)
    print("Connected to Azure Blob Storage.")
except Exception as e:
    print("Failed to connect to Azure Blob Storage:", e)

In [0]:
blob_list = container_client.list_blobs()
print(container_name)
for blob in blob_list:
    print(f"+---{blob.name}")

In [0]:
# Function to load a CSV file from Azure Blob Storage into a Pandas DataFrame
def load_csv_from_blob(blob_path):
    blob_client = container_client.get_blob_client(blob_path)
    stream = BytesIO(blob_client.download_blob().readall())
    return pd.read_csv(stream)

In [0]:
# Load the Prague and Barcelona listings
try:
    prg_listings = load_csv_from_blob("prague_listings.csv")
    bcn_listings = load_csv_from_blob("barcelona_listings.csv")
    print("Data loaded successfully!")
except Exception as e:
    print("Failed to load data from Azure Blob Storage:", e)

## Obtain data from Google Maps API based on listings

The function defined above is adapted to include the dataframe's latitude and longitude parameters, in order to include the place count directly into the original sets:

In [0]:
prg_listings[['id', 'latitude', 'longitude']].head(2)

In [0]:
def get_place_counts_for_row(row, radius, keywords):
    """Helper function to get place counts for a single row."""
    lat, lng = row['latitude'], row['longitude']
    counts = {}
    if pd.notnull(lat) and pd.notnull(lng):  # Ensure coordinates are valid
        for keyword in keywords:
            counts[f"{keyword}_count"] = get_places_count_with_pagination(lat, lng, radius, keyword)
    else:
        print(f"Skipping row due to missing coordinates.")
        for keyword in keywords:
            counts[f"{keyword}_count"] = 0  # Default to 0 if coordinates are missing
    return counts

def add_place_counts_to_listings(dataframe, radius, keywords, max_workers=5):
    """
    Function to obtain a count of nearby places by keyword within a radius from coordinates in the original set,
    leveraging parallel processing.
    """
    # Prepare new columns for the counts
    for keyword in keywords:
        dataframe[f"{keyword}_count"] = 0

    # Initialize ThreadPoolExecutor
    total_rows = len(dataframe)
    with ThreadPoolExecutor(max_workers=max_workers) as executor:
        # Create a dictionary of future tasks
        futures = {
            executor.submit(get_place_counts_for_row, row, radius, keywords): index
            for index, row in dataframe.iterrows()
        }

        # Process completed tasks
        for future in as_completed(futures):
            index = futures[future]
            try:
                counts = future.result()
                for keyword, count in counts.items():
                    dataframe.at[index, keyword] = count
            except Exception as e:
                print(f"Error processing row {index}: {e}")

            # Print progress
            print(f"Processed row {index + 1} / {total_rows}")

    return dataframe

Settings:

In [0]:
radius = 100  # in meters
keywords = ['restaurant', 'hotel', 'park', 'museum', 'mall']

For **Prague**:

# PRAGUE

output_file = "prg_listings_places.csv"

if os.path.exists(output_file):
    # Load the CSV into the variable
    prg_listings_places = pd.read_csv(output_file)
    print(f"Loaded existing file '{output_file}'.")
else:
    # Run the function and save the result if the file does not exist
    prg_listings_places = add_place_counts_to_listings(prg_listings, radius, keywords)
    prg_listings_places.to_csv(output_file, index=False)
    print(f"Generated and saved '{output_file}'.")

For **Barcelona**:

# BARCELONA

output_file = "bcn_listings_places.csv"

if os.path.exists(output_file):
    # Load the CSV into the variable
    bcn_listings_places = pd.read_csv(output_file)
    print(f"Loaded existing file '{output_file}'.")
else:
    # Run the function and save the result if the file does not exist
    bcn_listings_places = add_place_counts_to_listings(bcn_listings, radius, keywords)
    bcn_listings_places.to_csv(output_file, index=False)
    print(f"Generated and saved '{output_file}'.")

In [0]:
prg_listings_places[['id', 'restaurant_count', 'hotel_count', 'park_count', 'museum_count', 'mall_count']].head(10)

In [0]:
bcn_listings_places[['id', 'restaurant_count', 'hotel_count', 'park_count', 'museum_count', 'mall_count']].head(2)