# Nearby Places from Overpass & Google Maps APIs

In [72]:
import requests
import pandas as pd
import os
import math
# For Google Maps API
import googlemaps
from time import sleep
# For Azure connection:
from azure.storage.blob import BlobServiceClient
from io import BytesIO
# For multiprocessing
from concurrent.futures import ThreadPoolExecutor, as_completed

## Data Import from Blob Storage

In [61]:
# Azure Blob Storage
connection_string = "DefaultEndpointsProtocol=https;AccountName=datalakestoragerentscape;AccountKey=w6Edf3np1A18vQIei31unvKWjGpyDUBqexvVauAwCeqOmnF1Bq7WsIEVplSEW+hT0q4ZzDi2KNh4+AStrOcI6g==;EndpointSuffix=core.windows.net"
container_name = "rentscape-blob"

In [62]:
try:
    blob_service_client = BlobServiceClient.from_connection_string(connection_string)
    container_client = blob_service_client.get_container_client(container_name)
    print("Connected to Azure Blob Storage.")
except Exception as e:
    print("Failed to connect to Azure Blob Storage:", e)

Connected to Azure Blob Storage.


In [63]:
blob_list = container_client.list_blobs()
print(container_name)
for blob in blob_list:
    print(f"+---{blob.name}")

rentscape-blob
+---barcelona_listings.csv
+---barcelona_reviews.csv
+---prague_listings.csv
+---prague_reviews.csv


In [64]:
# Function to load a CSV file from Azure Blob Storage into a Pandas DataFrame
def load_csv_from_blob(blob_path):
    blob_client = container_client.get_blob_client(blob_path)
    stream = BytesIO(blob_client.download_blob().readall())
    return pd.read_csv(stream)

In [65]:
# Load the Prague and Barcelona listings
try:
    prg_listings = load_csv_from_blob("prague_listings.csv")
    bcn_listings = load_csv_from_blob("barcelona_listings.csv")
    print("Data loaded successfully!")
except Exception as e:
    print("Failed to load data from Azure Blob Storage:", e)

Data loaded successfully!


## Data Preparation

In [66]:
#Copy the df to another df
bcn_listings_reduced = bcn_listings.copy()
prg_listings_reduced = prg_listings.copy()

#Reduce the df to the three columns that we need
bcn_listings_reduced = bcn_listings_reduced[['id', 'latitude', 'longitude']]
prg_listings_reduced = prg_listings_reduced[['id', 'latitude', 'longitude']]

In [67]:
bcn_listings_reduced

Unnamed: 0,id,latitude,longitude
0,18674,41.405560,2.172620
1,23197,41.412432,2.219750
2,32711,41.405660,2.170150
3,34241,41.380620,2.175170
4,34981,41.379780,2.176230
...,...,...,...
19477,1234621063113396232,41.380450,2.164980
19478,1234633310288552263,41.380551,2.164990
19479,1234636058660290258,41.382344,2.164517
19480,1234659029532006287,41.386875,2.178370


In [68]:
prg_listings_reduced

Unnamed: 0,id,latitude,longitude
0,23163,50.082290,14.415520
1,23169,50.088300,14.422720
2,26755,50.087290,14.431790
3,30762,50.088210,14.426230
4,42514,50.082280,14.413450
...,...,...,...
9061,1184244083406270577,50.075804,14.421940
9062,1184640937586907240,50.070050,14.379610
9063,1184712727251428871,50.061652,14.436929
9064,1184771207632327200,50.062839,14.448081


# OVERPASS API

## Function Definition

In [None]:
# Function to query amenities near a location
def get_amenity_count(lat, lng, radius=1000, key="amenity", value="restaurant", overpass_url=OVERPASS_URLS[0]):
    """
    Query the Overpass API for a specific amenity count near a location.
    """
    query = f"""
    [out:json];
    node["{key}"="{value}"](around:{radius},{lat},{lng});
    out body;
    """
    try:
        # Send request to the Overpass API
        response = requests.get(overpass_url, params={"data": query})
        response.raise_for_status()  # Raise error for bad responses
        data = response.json()
        return len(data.get("elements", []))
    except requests.exceptions.HTTPError as e:
        print(f"Error querying Overpass API ({overpass_url}) for location ({lat}, {lng}) with {key}={value}: {e}")
        raise

## Get Amenities for Barcelona

In [None]:
# Define Overpass API endpoints
OVERPASS_URLS = [
    "https://overpass-api.de/api/interpreter",
    "https://lz4.overpass-api.de/api/interpreter",
    "https://z.overpass-api.de/api/interpreter",
    "https://kumi.systems/api/interpreter"
]

# Example DataFrame with locations
bcn_listings_updated = bcn_listings_reduced.copy()

# Update the amenities list to include shops
amenities = [
    {"key": "amenity", "value": "restaurant", "column": "restaurant_count"},
    {"key": "tourism", "value": "hotel", "column": "hotel_count"},
    {"key": "leisure", "value": "park", "column": "park_count"}, 
    {"key": "tourism", "value": "museum", "column": "museum_count"},
    {"key": "shop", "value": "mall", "column": "shopping_center_count"}
]

# Process DataFrame in batches of 10
batch_size = 10
results = []
current_url_index = 0  # Index for Overpass URLs

for start in range(0, len(bcn_listings_updated), batch_size):
    end = start + batch_size
    batch = bcn_listings_updated.iloc[start:end].copy()  # Get the batch
    
    try:
        for amenity in amenities:
            batch[amenity["column"]] = batch.apply(
                lambda row: get_amenity_count(
                    row["latitude"], row["longitude"],
                    key=amenity["key"], value=amenity["value"],
                    overpass_url=OVERPASS_URLS[current_url_index]
                ), axis=1
            )
        results.append(batch)  # Store the processed batch
        print(f"Processed batch {start} to {end - 1} with {OVERPASS_URLS[current_url_index]}")
    
    except requests.exceptions.HTTPError as e:
        if "403" in str(e):  # Handle 403 Forbidden error
            print(f"Switching Overpass API endpoint due to error: {e}")
            current_url_index = (current_url_index + 1) % len(OVERPASS_URLS)  # Switch to the next URL
            sleep(5)  # Wait before retrying
        else:
            print(f"Unhandled error: {e}")
            raise  # Re-raise for other errors
    
    except Exception as e:
        print(f"Unexpected error: {e}")
        raise  # Re-raise unexpected errors

# Combine all batches back into a single DataFrame
bcn_places_count = pd.concat(results, ignore_index=True)

# Display the updated DataFrame
print(bcn_places_count)

# Save in csv
bcn_places_count.to_csv("bcn_places_count.csv", index=False)


In [111]:
import os
import pandas as pd

# List of specific CSV filenames to load
csv_files_bcn = [
    r"C:\Users\Victor\Documents\GitHub\datalakesairbnb\victor\data\bcn_listings_final_1.csv",
    r"C:\Users\Victor\Documents\GitHub\datalakesairbnb\victor\data\bcn_listings_final_2.csv",
    r"C:\Users\Victor\Documents\GitHub\datalakesairbnb\victor\data\bcn_listings_final_2_1_2.csv",
    r"C:\Users\Victor\Documents\GitHub\datalakesairbnb\victor\data\bcn_listings_final_2_1_3.csv",
    r"C:\Users\Victor\Documents\GitHub\datalakesairbnb\victor\data\bcn_listings_final_2_2_1.csv",
    r"C:\Users\Victor\Documents\GitHub\datalakesairbnb\victor\data\bcn_listings_final_2_2_2.csv",
    r"C:\Users\Victor\Documents\GitHub\datalakesairbnb\victor\data\bcn_listings_final_2_2_3.csv",
    r"C:\Users\Victor\Documents\GitHub\datalakesairbnb\victor\data\bcn_listings_final_3.csv",
    r"C:\Users\Victor\Documents\GitHub\datalakesairbnb\victor\data\bcn_listings_final_3_1.csv",
    r"C:\Users\Victor\Documents\GitHub\datalakesairbnb\victor\data\bcn_listings_final_3_2.csv",
    r"C:\Users\Victor\Documents\GitHub\datalakesairbnb\victor\data\bcn_listings_final_4.csv",
    r"C:\Users\Victor\Documents\GitHub\datalakesairbnb\victor\data\bcn_listings_final_4_1.csv",
    r"C:\Users\Victor\Documents\GitHub\datalakesairbnb\victor\data\bcn_listings_final_4_2.csv",
    r"C:\Users\Victor\Documents\GitHub\datalakesairbnb\victor\data\bcn_listings_final_4_3.csv",
    r"C:\Users\Victor\Documents\GitHub\datalakesairbnb\victor\data\bcn_listings_final_5.csv",
    r"C:\Users\Victor\Documents\GitHub\datalakesairbnb\victor\data\bcn_listings_final_5_1.csv",
    r"C:\Users\Victor\Documents\GitHub\datalakesairbnb\victor\data\bcn_listings_final_5_2.csv",
    r"C:\Users\Victor\Documents\GitHub\datalakesairbnb\victor\data\bcn_listings_final_5_3.csv",
    r"C:\Users\Victor\Documents\GitHub\datalakesairbnb\victor\data\bcn_listings_final_6.csv",
    r"C:\Users\Victor\Documents\GitHub\datalakesairbnb\victor\data\bcn_listings_final_6_1.csv",
    r"C:\Users\Victor\Documents\GitHub\datalakesairbnb\victor\data\bcn_listings_final_6_2.csv",
    r"C:\Users\Victor\Documents\GitHub\datalakesairbnb\victor\data\bcn_listings_final_6_3.csv",
    r"C:\Users\Victor\Documents\GitHub\datalakesairbnb\victor\data\bcn_listings_final_7.csv",
    r"C:\Users\Victor\Documents\GitHub\datalakesairbnb\victor\data\bcn_listings_missing_rows.csv"
]

csv_files_prg = [
    r"C:\Users\Victor\Documents\GitHub\datalakesairbnb\victor\data\prg_places_count_1.csv",
    r"C:\Users\Victor\Documents\GitHub\datalakesairbnb\victor\data\prg_places_count_2.csv",
    r"C:\Users\Victor\Documents\GitHub\datalakesairbnb\victor\data\prg_places_count_3.csv",
    r"C:\Users\Victor\Documents\GitHub\datalakesairbnb\victor\data\prg_places_count_4.csv",
    r"C:\Users\Victor\Documents\GitHub\datalakesairbnb\victor\data\prg_places_count_5.csv",
    r"C:\Users\Victor\Documents\GitHub\datalakesairbnb\victor\data\prg_places_count_6.csv",
    r"C:\Users\Victor\Documents\GitHub\datalakesairbnb\victor\data\prg_places_count_7.csv",
    r"C:\Users\Victor\Documents\GitHub\datalakesairbnb\victor\data\prg_places_count_8.csv",
    r"C:\Users\Victor\Documents\GitHub\datalakesairbnb\victor\data\prg_places_count_9.csv",
    r"C:\Users\Victor\Documents\GitHub\datalakesairbnb\victor\data\prg_places_count.csv"
]


# Load all CSV files into a list of dfs, skipping missing files
dfs = []
for file in csv_files_prg:
    if os.path.exists(file):
        try:
            dfs.append(pd.read_csv(file))
            print(f"Loaded file: {file}")
        except Exception as e:
            print(f"Error loading file {file}: {e}")
    else:
        print(f"File not found: {file}, skipping.")

# Combine all loaded dfs into one
combined_df = pd.concat(dfs, ignore_index=True)

# Remove duplicates based on the 'id' column
prg_places_count = combined_df.drop_duplicates(subset='id')

# Save the combined DataFrame to a CSV file
output_path = r"C:\Users\Victor\Documents\GitHub\datalakesairbnb\victor\prg_places_count.csv"
prg_places_count.to_csv(output_path, index=False)
print(f"Combined CSV saved as '{output_path}'.")

# Display the first few rows of the combined DataFrame
print(prg_places_count.head())


Loaded file: C:\Users\Victor\Documents\GitHub\datalakesairbnb\victor\data\prg_places_count_1.csv
Loaded file: C:\Users\Victor\Documents\GitHub\datalakesairbnb\victor\data\prg_places_count_2.csv
Loaded file: C:\Users\Victor\Documents\GitHub\datalakesairbnb\victor\data\prg_places_count_3.csv
Loaded file: C:\Users\Victor\Documents\GitHub\datalakesairbnb\victor\data\prg_places_count_4.csv
Loaded file: C:\Users\Victor\Documents\GitHub\datalakesairbnb\victor\data\prg_places_count_5.csv
Loaded file: C:\Users\Victor\Documents\GitHub\datalakesairbnb\victor\data\prg_places_count_6.csv
Loaded file: C:\Users\Victor\Documents\GitHub\datalakesairbnb\victor\data\prg_places_count_7.csv
Loaded file: C:\Users\Victor\Documents\GitHub\datalakesairbnb\victor\data\prg_places_count_8.csv
Loaded file: C:\Users\Victor\Documents\GitHub\datalakesairbnb\victor\data\prg_places_count_9.csv
Loaded file: C:\Users\Victor\Documents\GitHub\datalakesairbnb\victor\data\prg_places_count.csv
Combined CSV saved as 'C:\Users\

In [112]:
prg_places_count

Unnamed: 0,id,latitude,longitude,restaurant_count,hotel_count,park_count,museum_count,shopping_center_count
0,23163,50.08229,14.41552,499,154,0,49,0
1,23169,50.08830,14.42272,495,141,0,51,0
2,26755,50.08729,14.43179,416,109,0,35,0
3,30762,50.08821,14.42623,472,131,0,48,0
4,42514,50.08228,14.41345,460,146,0,44,0
...,...,...,...,...,...,...,...,...
15363,38023891,50.07848,14.42791,421,160,0,26,0
15364,38044675,50.07848,14.42791,421,160,0,26,0
15365,38056289,50.12552,14.49682,18,1,0,0,0
15366,38062553,50.09025,14.42739,397,108,0,32,0


In [108]:
# Extract 'id' columns from both DataFrames
reference_ids = prg_listings_reduced['id']
final_ids = prg_places_count['id']

# Identify missing IDs
missing_ids = reference_ids[~reference_ids.isin(final_ids)]

# Create a DataFrame of rows with missing IDs from prg_listings_reduced
missing_rows_df = prg_listings_reduced[prg_listings_reduced['id'].isin(missing_ids)]

# Save the missing rows DataFrame to a CSV file
missing_rows_output_path = r"C:\Users\Victor\Documents\GitHub\datalakesairbnb\victor\missing_rows.csv"
missing_rows_df.to_csv(missing_rows_output_path, index=False)
print(f"Missing rows saved to '{missing_rows_output_path}'.")

# Display the first few rows of the missing rows DataFrame
print(missing_rows_df.head())


Missing rows saved to 'C:\Users\Victor\Documents\GitHub\datalakesairbnb\victor\missing_rows.csv'.
            id   latitude  longitude
1090  13180954  50.086370  14.418850
1091  13181016  50.086370  14.418850
1092  13181043  50.086370  14.418850
1093  13217443  50.084526  14.416456
1094  13221779  50.083400  14.454802


In [109]:
# Define Overpass API endpoints
OVERPASS_URLS = [
    "https://overpass-api.de/api/interpreter",
    "https://lz4.overpass-api.de/api/interpreter",
    "https://z.overpass-api.de/api/interpreter",
    "https://kumi.systems/api/interpreter"
]

# Function to query amenities near a location
def get_amenity_count(lat, lng, radius=1000, key="amenity", value="restaurant", overpass_url=OVERPASS_URLS[0]):
    """
    Query the Overpass API for a specific amenity count near a location.
    """
    query = f"""
    [out:json];
    node["{key}"="{value}"](around:{radius},{lat},{lng});
    out body;
    """
    try:
        # Send request to the Overpass API
        response = requests.get(overpass_url, params={"data": query})
        response.raise_for_status()  # Raise error for bad responses
        data = response.json()
        return len(data.get("elements", []))
    except requests.exceptions.HTTPError as e:
        print(f"Error querying Overpass API ({overpass_url}) for location ({lat}, {lng}) with {key}={value}: {e}")
        raise

# Example DataFrame with locations
prg_listings_updated = missing_rows_df.copy()

# Update the amenities list to include shops
amenities = [
    {"key": "amenity", "value": "restaurant", "column": "restaurant_count"},
    {"key": "tourism", "value": "hotel", "column": "hotel_count"},
    {"key": "leisure", "value": "park", "column": "park_count"}, 
    {"key": "tourism", "value": "museum", "column": "museum_count"},
    {"key": "shop", "value": "mall", "column": "shopping_center_count"}
]

# Process DataFrame in batches of 10
batch_size = 10
results = []
current_url_index = 0  # Index for Overpass URLs

for start in range(0, len(prg_listings_updated), batch_size):
    end = start + batch_size
    batch = prg_listings_updated.iloc[start:end].copy()  # Get the batch
    
    try:
        for amenity in amenities:
            batch[amenity["column"]] = batch.apply(
                lambda row: get_amenity_count(
                    row["latitude"], row["longitude"],
                    key=amenity["key"], value=amenity["value"],
                    overpass_url=OVERPASS_URLS[current_url_index]
                ), axis=1
            )
        results.append(batch)  # Store the processed batch
        print(f"Processed batch {start} to {end - 1} with {OVERPASS_URLS[current_url_index]}")
    
    except requests.exceptions.HTTPError as e:
        if "403" in str(e):  # Handle 403 Forbidden error
            print(f"Switching Overpass API endpoint due to error: {e}")
            current_url_index = (current_url_index + 1) % len(OVERPASS_URLS)  # Switch to the next URL
            sleep(5)  # Wait before retrying
        else:
            print(f"Unhandled error: {e}")
            raise  # Re-raise for other errors
    
    except Exception as e:
        print(f"Unexpected error: {e}")
        raise  # Re-raise unexpected errors


Processed batch 0 to 9 with https://overpass-api.de/api/interpreter
Processed batch 10 to 19 with https://overpass-api.de/api/interpreter
Processed batch 20 to 29 with https://overpass-api.de/api/interpreter
Processed batch 30 to 39 with https://overpass-api.de/api/interpreter
Processed batch 40 to 49 with https://overpass-api.de/api/interpreter
Processed batch 50 to 59 with https://overpass-api.de/api/interpreter
Processed batch 60 to 69 with https://overpass-api.de/api/interpreter
Processed batch 70 to 79 with https://overpass-api.de/api/interpreter
Processed batch 80 to 89 with https://overpass-api.de/api/interpreter
Processed batch 90 to 99 with https://overpass-api.de/api/interpreter
Processed batch 100 to 109 with https://overpass-api.de/api/interpreter
Processed batch 110 to 119 with https://overpass-api.de/api/interpreter
Processed batch 120 to 129 with https://overpass-api.de/api/interpreter
Processed batch 130 to 139 with https://overpass-api.de/api/interpreter
Processed batc

In [110]:
# Combine all batches back into a single DataFrame
prg_places_count = pd.concat(results, ignore_index=True)

# Display the updated DataFrame
print(prg_places_count)

prg_places_count.to_csv("prg_places_count_9.csv", index=False)

           id   latitude  longitude  restaurant_count  hotel_count  \
0    13180954  50.086370  14.418850               516          159   
1    13181016  50.086370  14.418850               516          159   
2    13181043  50.086370  14.418850               516          159   
3    13217443  50.084526  14.416456               529          160   
4    13221779  50.083400  14.454802               158           33   
..        ...        ...        ...               ...          ...   
235  15723804  50.079290  14.425100               469          183   
236  15739896  50.066370  14.422830                85           48   
237  15749959  50.083530  14.404210               227           87   
238  15757628  50.068950  14.401490               123           17   
239  15759088  50.055700  14.433390                47           11   

     park_count  museum_count  shopping_center_count  
0             0            54                      0  
1             0            54                    

# GOOGLE MAPS API

## Setup Google Maps API

The API key is for the Google account:
datalakeshslu@outlook.com

**There is a $200 monthly limit in API calls.**

In [59]:
# Google Maps API key
API_KEY = 'AIzaSyB7PNQBdHgpz-HyDVWd9CqRzgkKY66OEuI'

# Initialize the client
try:
    gmaps = googlemaps.Client(key=API_KEY)
    print("Connected successfully to Google Maps API!")
except Exception as e:
    print(f"Couldn't connect to Google Maps API: {e}")

Connected successfully to Google Maps API!


## Function Definition

In [20]:
def get_place_counts_for_row(row, radius, keywords):
    """Helper function to get place counts for a single row."""
    lat, lng = row['latitude'], row['longitude']
    counts = {}
    if pd.notnull(lat) and pd.notnull(lng):  # Ensure coordinates are valid
        for keyword in keywords:
            counts[f"{keyword}_count"] = get_places_count_with_pagination(lat, lng, radius, keyword)
    else:
        print(f"Skipping row due to missing coordinates.")
        for keyword in keywords:
            counts[f"{keyword}_count"] = 0  # Default to 0 if coordinates are missing
    return counts

def add_place_counts_to_listings(dataframe, radius, keywords, max_workers=5):
    """
    Function to obtain a count of nearby places by keyword within a radius from coordinates in the original set,
    leveraging parallel processing.
    """
    # Prepare new columns for the counts
    for keyword in keywords:
        dataframe[f"{keyword}_count"] = 0

    # Initialize ThreadPoolExecutor
    total_rows = len(dataframe)
    with ThreadPoolExecutor(max_workers=max_workers) as executor:
        # Create a dictionary of future tasks
        futures = {
            executor.submit(get_place_counts_for_row, row, radius, keywords): index
            for index, row in dataframe.iterrows()
        }

        # Process completed tasks
        for future in as_completed(futures):
            index = futures[future]
            try:
                counts = future.result()
                for keyword, count in counts.items():
                    dataframe.at[index, keyword] = count
            except Exception as e:
                print(f"Error processing row {index}: {e}")

            # Print progress
            print(f"Processed row {index + 1} / {total_rows}")

    return dataframe

## Get amenities for Prague

In [0]:
radius = 100  # in meters
keywords = ['restaurant', 'hotel', 'park', 'museum', 'mall']

# Combine all batches back into a single DataFrame
prg_places_count = pd.concat(results, ignore_index=True)

# Display the updated DataFrame
print(prg_places_count)

prg_places_count.to_csv("prg_places_count_9.csv", index=False)

# Calculate distances to Monuments based on Coordenates

## Function Definition

In [None]:
def calculate_distance(lat1, lon1, lat2, lon2):
    """
    Calcula la distancia entre dos puntos (latitud, longitud) en kilómetros usando la fórmula del haversine.

    :param lat1: Latitud del primer punto
    :param lon1: Longitud del primer punto
    :param lat2: Latitud del segundo punto
    :param lon2: Longitud del segundo punto
    :return: Distancia en kilómetros
    """
    # Radio de la Tierra en kilómetros
    R = 6371.0

    # Convertir coordenadas de grados a radianes
    lat1_rad, lon1_rad = math.radians(lat1), math.radians(lon1)
    lat2_rad, lon2_rad = math.radians(lat2), math.radians(lon2)

    # Diferencias entre las coordenadas
    delta_lat = lat2_rad - lat1_rad
    delta_lon = lon2_rad - lon1_rad

    # Fórmula del haversine
    a = math.sin(delta_lat / 2)**2 + math.cos(lat1_rad) * math.cos(lat2_rad) * math.sin(delta_lon / 2)**2
    c = 2 * math.atan2(math.sqrt(a), math.sqrt(1 - a))
    distance = R * c

    return distance

## BCN - Barcelona

In [None]:
bcn_places = bcn_places_count.copy()

bcn_monuments = {
    "Sagrada Familia": {"latitude": 41.4036, "longitude": 2.1744},
    "Casa Milà": {"latitude": 41.3953, "longitude": 2.1619},
    "Casa Batlló": {"latitude": 41.3917, "longitude": 2.1649},
    "Parc Güell": {"latitude": 41.4145, "longitude": 2.1527},
    "La Rambla": {"latitude": 41.3818, "longitude": 2.1725},
    "Montjuïc": {"latitude": 41.3636, "longitude": 2.1522}
}

# Calcular la distancia a cada monumento y añadirla como una nueva columna
for monument_name, coords in bcn_monuments.items():
    column_name = f"dist_to_{monument_name.replace(' ', '_').lower()}"  
    bcn_places[column_name] = bcn_places.apply(
        lambda row: calculate_distance(
            row["latitude"], row["longitude"],
            coords["latitude"], coords["longitude"]
        ),
        axis=1
    )

bcn_places.to_csv("bcn_places.csv", index=False)

bcn_places.head(5)

## PRG - Prague

In [None]:
prg_places = prg_places_count.copy()

# Coordenadas de los principales monumentos de Praga
prg_monuments = {
    "Prague Castle": {"latitude": 50.0903, "longitude": 14.4005},
    "Charles Bridge": {"latitude": 50.0865, "longitude": 14.4114},
    "Old Town Square": {"latitude": 50.0875, "longitude": 14.4211},
    "St. Vitus Cathedral": {"latitude": 50.0900, "longitude": 14.4009},
    "Vysehrad": {"latitude": 50.0644, "longitude": 14.4189},
    "Petrin Tower": {"latitude": 50.0835, "longitude": 14.3959}
}

# Calcular la distancia a cada monumento y añadirla como una nueva columna
for monument_name, coords in prg_monuments.items():
    column_name = f"dist_to_{monument_name.replace(' ', '_').lower()}"  
    prg_places[column_name] = prg_places.apply(
        lambda row: calculate_distance(
            row["latitude"], row["longitude"],
            coords["latitude"], coords["longitude"]
        ),
        axis=1
    )

prg_places.to_csv("prg_places.csv", index=False)

prg_places.head(5)

## Data Upload to Blob Storage

In [None]:
from azure.storage.blob import BlobServiceClient
import pandas as pd

# SAS token and container URL
sas_token = "sp=racwdl&st=2024-12-01T01:41:21Z&se=2025-01-28T09:41:21Z&spr=https&sv=2022-11-02&sr=c&sig=OpjDBOtKtITxiEAnzF7wkFAvAyLCmijQLyz93sxQi78%3D"
container_url = "https://datalakestoragerentscape.blob.core.windows.net/mapsapi-rentscape-blob"

# Function to upload a DataFrame to Blob Storage
def upload_to_blob(df, blob_name):
    file_path = f"{blob_name}"
    df.to_csv(file_path, index=False)

    # Connect to the Blob Container
    blob_service_client = BlobServiceClient(account_url=container_url, credential=sas_token)
    container_client = blob_service_client.get_container_client(container="mapsapi-rentscape-blob")

    # Upload the Parquet file to the Blob Storage
    with open(file_path, "rb") as data:
        container_client.upload_blob(name=blob_name, data=data, overwrite=True)
    
    print(f"Uploaded {blob_name} to {container_url}")

# Replace `prague_listings` with your actual DataFrame variable
upload_to_blob(bcn_places, "bcn_places.csv")
upload_to_blob(prg_places, "prg_places.csv")
