In [18]:
import json
import pandas as pd
import osmnx as ox
import networkx as nx
import os
import seaborn as sns
import matplotlib.pyplot as plt
from tqdm import tqdm
from os.path import join
from datetime import timedelta
from itertools import product
import random


import numpy as np
import pandas as pd
from datetime import datetime, timedelta
from scipy.spatial import cKDTree
from scipy.special import factorial
import pickle

In [19]:
input_path = join("io", "input")
output_path = join("io", "output")
experiments_path = join("io", "experiments")
plots_path = join(experiments_path, "plots")

In [20]:
def compute_stop_weights(stops_df, poi_df, min_weight=1.0, max_weight=1.3):
    """
    Computes proximity-based weights for bus stops based on distance to nearest POI.
    
    Args:
        stops_df (pd.DataFrame): Contains columns ['id', 'latitude', 'longitude'].
        poi_df (pd.DataFrame): Contains columns ['poi_name', 'latitude', 'longitude'].
        min_weight (float): Minimum weight for farthest stops.
        max_weight (float): Maximum weight for stops closest to POIs.
    
    Returns:
        dict: {stop_id: weight}
    """
    # Build KDTree for fast distance lookup
    poi_coords = np.radians(poi_df[['latitude', 'longitude']].values)
    stop_coords = np.radians(stops_df[['latitude', 'longitude']].values)
    tree = cKDTree(poi_coords)

    # Haversine distance calculation
    def haversine_dist(r_latlon1, r_latlon2):
        R = 6371  # Earth radius in km
        dlat = r_latlon2[:, 0] - r_latlon1[:, 0]
        dlon = r_latlon2[:, 1] - r_latlon1[:, 1]
        a = np.sin(dlat / 2)**2 + np.cos(r_latlon1[:, 0]) * np.cos(r_latlon2[:, 0]) * np.sin(dlon / 2)**2
        return 2 * R * np.arcsin(np.sqrt(a))

    # Compute nearest POI distances
    distances = []
    for stop in stop_coords:
        dists = haversine_dist(np.array([stop]*len(poi_coords)), poi_coords)
        distances.append(np.min(dists))

    distances = np.array(distances)
    
    # Normalize distances and scale
    norm_dist = (distances - distances.min()) / (distances.max() - distances.min() + 1e-8)
    weights = max_weight - norm_dist * (max_weight - min_weight)

    # Use attribute access for itertuples
    stop_factor = {str(row.id): round(float(w), 2) for row, w in zip(stops_df.itertuples(index=False), weights)}
    
    return stop_factor


In [21]:
def save_dict_to_file(dictionary, filepath):
    """
    Save a dictionary to a file using pickle.
    """
    with open(filepath, 'wb') as file:
        pickle.dump(dictionary, file)

In [22]:
stops_df = pd.read_csv(join(input_path, "base_stops.csv"), encoding='utf-8', sep=';')
poi_df = pd.read_csv(join(input_path, "base_point_of_interest.csv"), encoding='utf-8', sep=',')
stop_factor_dict = compute_stop_weights(stops_df, poi_df)
save_dict_to_file(stop_factor_dict, join(output_path, "interm_stop_factor"))