# DISTANCE MATRIX

## Libraries

In [1]:
import googlemaps
import numpy as np
import os
import pandas as pd
import re
from scipy.spatial.distance import cdist

## Constants

In [10]:
input_folder_path = "/Users/sammcmanagan/Library/Mobile Documents/com~apple~CloudDocs/Documents/M.Sc Statistics & Data Science/Modern Data Analytics/MDA-Project/Data"
output_folder_path = "/Users/sammcmanagan/Library/Mobile Documents/com~apple~CloudDocs/Documents/M.Sc Statistics & Data Science/Modern Data Analytics/MDA-Project/Data/6_DistanceMatricesTest"

cities = ["Antwerpen", "Brugge", "Brussels", "Charleroi", "Gent", "Leuven", "Liege", "Oostende"]


GOOGLE_API_KEY = "AIzaSyAjzD4huHqQgEQkLA63nkErB_Z2781_e_M"
gmaps = googlemaps.Client(key = GOOGLE_API_KEY)

BIG_LENGTH = 1000
CLOSEST_AEDS = 8 # Number of closest AEDs to consider

## Functions

In [4]:
# Extracts latitude and longitude from geometry column
def extract_coordinates(geometry):
    match = re.match(r'POINT \(([^ ]+) ([^ ]+)\)', geometry)
    if match:
        longitude, latitude = match.groups()
        return float(latitude), float(longitude)
    else:
        raise ValueError("Invalid geometry format")

# Function to calculate walking distance between two sets of coordinates
def calculate_distance(origin, destination):
    try:
        # Call the Distance Matrix API
        result = gmaps.distance_matrix(origins=[origin], destinations=[destination], mode="walking")
        
        # Check if the response contains the distance information
        if 'rows' in result and len(result['rows']) > 0:
            elements = result['rows'][0]['elements']
            if len(elements) > 0 and 'distance' in elements[0]:
                # Parse the result and extract the walking distance
                distance = elements[0]['distance']['value']  # Distance in kilometers
                return distance
    except Exception as e:
        print(f"Error calculating distance between {origin} and {destination}: {e}")
    # Return a default value if distance calculation fails
    return float('inf')  # or any other default value like -1 or 0

# Function to replace all 1 entries with calculated distances
def replace_1_with_distances(closest_matrix_df):
    for i in range(closest_matrix_df.shape[0]):
        for j in range(closest_matrix_df.shape[1]):
            if closest_matrix_df.iat[i, j] == 1:
                
                # Get the origin coordinates from the row index
                origin_coords = closest_matrix_df.index[i]
                # Get the destination coordinates from the column names
                destination_coords = closest_matrix_df.columns[j]

                try:
                    # Calculate walking distance
                    distance = calculate_distance(origin_coords, destination_coords)

                    # Replace 1 with calculated distance
                    closest_matrix_df.iat[i, j] = distance
                except ValueError as e:
                    print(f"Error parsing coordinates at row {i}, col {j}: {e}")

    return closest_matrix_df

## Calculating Distance Matrix

Importing city data, making mandatory columns, preparing matrices

In [5]:
city_data = {}

for city in cities:
    # Import data on the city
    os.chdir(input_folder_path + "/2_DataSegmented")
    aeds = pd.read_csv(city + "_aeds.csv")
    cards = pd.read_csv(city + "_cards_test.csv")
    os.chdir(input_folder_path + "/3_DataNewAeds")
    new_aeds = pd.read_csv(city + "_new_aeds.csv")
    os.chdir(input_folder_path + "/5_DataOptimalAeds")
    status = pd.read_csv(city + "_optimal_aeds.csv")["SelectionStatus"]
    print("Importing " + city + " successful.")

    aeds_coordinates = aeds[['latitude', 'longitude']]
    
    # Extract latitude and longitude from geometry column in new_aeds
    new_aeds[['latitude', 'longitude']] = new_aeds['geometry'].apply(
        lambda x: pd.Series(extract_coordinates(x))
    )
    new_aeds_coordinates = new_aeds[['latitude', 'longitude']]
    
    cards_coordinates = cards[['latitude', 'longitude']]
    
    # Combine aeds_coordinates and new_aeds_coordinates
    combined_aeds_coordinates = pd.concat([aeds_coordinates, new_aeds_coordinates], ignore_index=True)
    optimal_aeds = combined_aeds_coordinates[status]
    combined_aeds_coordinates = pd.concat([aeds_coordinates, optimal_aeds], ignore_index=True)
    
    # Calculate the Euclidean distance matrix
    distance_matrix = cdist(cards_coordinates, combined_aeds_coordinates, metric='euclidean')  # Transposed
    
    # Process the distance matrix to mark the closest AEDs
    closest_matrix = np.zeros_like(distance_matrix)
    for row in range(distance_matrix.shape[0]):
        row_indices = np.argsort(distance_matrix[row, :])[:CLOSEST_AEDS]
        closest_matrix[row, row_indices] = 1
    
    # Convert coordinates to string for use as row names
    cards_coordinates_str = cards_coordinates.apply(lambda x: f"{x['latitude']}, {x['longitude']}", axis=1)
    
    # Create DataFrame with MultiIndex using cards_coordinates and combined_aeds_coordinates
    closest_matrix_df = pd.DataFrame(closest_matrix, index=cards_coordinates_str, columns=combined_aeds_coordinates.apply(tuple, axis=1))
    
    # Change 0 values to 1000 for sake of distance matrix
    closest_matrix_df.replace(0, BIG_LENGTH, inplace=True)
    
    # Store the processed data for the city in the dictionary
    city_data[city] = closest_matrix_df

Importing Antwerpen successful.
Importing Brugge successful.
Importing Brussels successful.
Importing Charleroi successful.
Importing Gent successful.
Importing Leuven successful.
Importing Liege successful.
Importing Oostende successful.


In [19]:
for city, df in city_data.items():
    print(city)
    print(df.shape)

Antwerpen
(578, 1444)
Brugge
(135, 322)
Brussels
(1323, 4048)
Charleroi
(356, 730)
Gent
(317, 778)
Leuven
(106, 334)
Liege
(510, 1372)
Oostende
(100, 138)


In [18]:
#os.chdir("/Users/Jovan/Desktop")

for city in cities:
    print("Current city: " + city)

    num = len(city_data[city]) * CLOSEST_AEDS
    confirmation = input(f"This will initialize {num} API requests. Are you sure? (yes/no): ")

    if confirmation == "yes":            
        # Replace all 1 entries with calculated distances for the current city DataFrame
        closest_matrix_with_distances = replace_1_with_distances(city_data[city])
            
        csv_file_path = os.path.join(output_folder_path, f"{city}_cost_matrix.csv")
        
        # Write the resulting DataFrame to a CSV file
        closest_matrix_with_distances.to_csv(csv_file_path)
    else:
        print("OK. Will not procced.\n")

Current city: Antwerpen
OK. Will not procced.

Current city: Brugge
Current city: Brussels
Current city: Charleroi
Current city: Gent
Current city: Leuven
Current city: Liege
Current city: Oostende
