In [1]:
import pandas as pd
import numpy as np
from math import radians, cos, sin, asin, sqrt
import time

In [2]:
# Load the data
stations = pd.read_csv('candidate_stations_P300_kmeans_no_snap.csv')
pois = pd.read_csv('reduced_pois2.csv')

In [3]:
# Print basic details
print(f"Stations: {len(stations)} candidates")
print(f"POIs: {len(pois)} demand points")
print(f"Station columns: {stations.columns.tolist()}")
print(f"POI columns: {pois.columns.tolist()}")

Stations: 379 candidates
POIs: 885 demand points
Station columns: ['candidate_id', 'centroid_lon', 'centroid_lat', 'is_existing_station', 'original_candidate_id', 'snapped_station_id', 'method']
POI columns: ['poi_id', 'lat', 'lon', 'category', 'weight', 'original_poi_count', 'original_poi_ids']


In [4]:
I = len(pois)      # Number of POIs (demand points)
J = len(stations)  # Number of stations (candidate locations)
print(f"Loaded {I} POIs and {J} stations")

Loaded 885 POIs and 379 stations


In [5]:
# Define haversine distance function
def haversine(lat1, lon1, lat2, lon2):
    """
    Calculate the great circle distance between two points 
    on the earth (specified in decimal degrees)
    Returns distance in kilometers
    """
    # Convert decimal degrees to radians
    lat1, lon1, lat2, lon2 = map(radians, [lat1, lon1, lat2, lon2])
    
    # Haversine formula
    dlat = lat2 - lat1
    dlon = lon2 - lon1
    a = sin(dlat/2)**2 + cos(lat1) * cos(lat2) * sin(dlon/2)**2
    c = 2 * asin(sqrt(a))
    r = 6371  # Radius of earth in kilometers
    return c * r

In [6]:
# Calculate distance matrix
print(f"\n Calculating distance matrix ({I} × {J})...")

start_time = time.time()
distance_matrix = np.zeros((I, J))

# Progress tracking
for i in range(I):
    if (i + 1) % 5000 == 0:  # Print progress every 5000 POIs
        elapsed = time.time() - start_time
        print(f"  Progress: {i+1}/{I} POIs ({100*(i+1)/I:.1f}%) - {elapsed:.1f}s elapsed")
    
    poi_lat = pois.iloc[i]['lat']
    poi_lon = pois.iloc[i]['lon']
    
    for j in range(J):
        station_lat = stations.iloc[j]['centroid_lat']
        station_lon = stations.iloc[j]['centroid_lon']
        
        distance_matrix[i, j] = haversine(poi_lat, poi_lon, station_lat, station_lon)

elapsed = time.time() - start_time
print(f" Distance matrix calculated in {elapsed:.1f} seconds")


 Calculating distance matrix (885 × 379)...
 Distance matrix calculated in 8.2 seconds


In [7]:
np.savetxt('distance_matrix_reduce2.csv', distance_matrix, delimiter=',', fmt='%.6f')
print("✓ Saved: distance_matrix_reduce2.csv")

✓ Saved: distance_matrix_reduce2.csv
