In [1]:
import pandas as pd
import numpy as np
from math import radians, cos, sin, asin, sqrt
import time

In [2]:
# Load the data
stations = pd.read_csv('station_data.csv')
pois = pd.read_csv('candidate_stations_P50_kmeans_snap.csv')

In [3]:
print("="*70)
print("STEP 1: DATA LOADING & VALIDATION")
print("="*70)

STEP 1: DATA LOADING & VALIDATION


In [4]:
# Print basic details
print(f"Stations: {len(stations)} candidates")
print(f"POIs: {len(pois)} demand points")
print(f"Station columns: {stations.columns.tolist()}")
print(f"POI columns: {pois.columns.tolist()}")

Stations: 85 candidates
POIs: 50 demand points
Station columns: ['station_id', 'name', 'address', 'rental_uris', 'lat', 'lon', 'capacity']
POI columns: ['poi_id', 'name', 'geometry', 'lat', 'lon', 'category', 'demand', 'method', 'centroid_lon', 'centroid_lat', 'dist_to_centroid_m']


In [5]:
I = len(pois)      # Number of POIs (demand points)
J = len(stations)  # Number of stations (candidate locations)
print(f"Loaded {I} POIs and {J} stations")

Loaded 50 POIs and 85 stations


In [6]:
# Define haversine distance function
def haversine(lat1, lon1, lat2, lon2):
    """
    Calculate the great circle distance between two points 
    on the earth (specified in decimal degrees)
    Returns distance in kilometers
    """
    # Convert decimal degrees to radians
    lat1, lon1, lat2, lon2 = map(radians, [lat1, lon1, lat2, lon2])
    
    # Haversine formula
    dlat = lat2 - lat1
    dlon = lon2 - lon1
    a = sin(dlat/2)**2 + cos(lat1) * cos(lat2) * sin(dlon/2)**2
    c = 2 * asin(sqrt(a))
    r = 6371  # Radius of earth in kilometers
    return c * r

In [7]:
# Calculate distance matrix
print(f"\n Calculating distance matrix ({I} × {J})...")
print("  This may take 2-5 minutes...\n")

start_time = time.time()
distance_matrix = np.zeros((I, J))

# Progress tracking
for i in range(I):
    if (i + 1) % 5000 == 0:  # Print progress every 5000 POIs
        elapsed = time.time() - start_time
        print(f"  Progress: {i+1}/{I} POIs ({100*(i+1)/I:.1f}%) - {elapsed:.1f}s elapsed")
    
    poi_lat = pois.iloc[i]['lat']
    poi_lon = pois.iloc[i]['lon']
    
    for j in range(J):
        station_lat = stations.iloc[j]['lat']
        station_lon = stations.iloc[j]['lon']
        
        distance_matrix[i, j] = haversine(poi_lat, poi_lon, station_lat, station_lon)

elapsed = time.time() - start_time
print(f"✓ Distance matrix calculated in {elapsed:.1f} seconds")


 Calculating distance matrix (50 × 85)...
  This may take 2-5 minutes...

✓ Distance matrix calculated in 0.1 seconds


In [8]:
# Save distance matrix
print("\n⏳ Saving distance matrix...")
np.savetxt('distance_matrix50.csv', distance_matrix, delimiter=',', fmt='%.6f')
print("✓ Saved: distance_matrix50.csv")


⏳ Saving distance matrix...
✓ Saved: distance_matrix50.csv
