### Import Libraries

In [7]:

# Standard library imports
import csv
import math
import os
import time
import warnings
import zipfile
from datetime import datetime, timedelta
from typing import Any, Dict, List, Optional, Tuple, Union

# Third-party imports
import geopandas as gpd
import numpy as np
import pandas as pd
import requests
from geopy.distance import geodesic
from shapely import wkt
from shapely.geometry import Point, Polygon, MultiPolygon, mapping
from tqdm import tqdm

from library.tesselatePrepareData import TripDataProcessor, run_complete_analysis_with_time_features


### Input Parameters

In [9]:
# Suppress warnings for cleaner output
import warnings
import pandas as pd

warnings.filterwarnings('ignore')

# ============================
# CONFIGURATION PARAMETERS
# ============================

# City name and settings
city = 'orléans'
scenario_name = 'DRT'
directory = f"study areas/{city}/"

# OSRM server URLs
# The first url is for computing walking distance, the second one for car driving distance
osrm_url_walk = "http://localhost:5000"  # Change if your OSRM server is at a different URL
osrm_url_car = "http://localhost:5001"   # Change if your OSRM server is at a different URL

# Study area boundaries (EPSG:4326)
bbox = [1.7676734, 47.7601594, 2.1089651, 48.0134634]  # orleans metropole area

# File paths
trips_input_filename = str(directory + 'sample.csv')  # File is in the same folder as script
trips_output_filename = str(directory + 'trips_' + scenario_name + '_processed.csv')
hexagons_path = f"{directory}zones_hexagons_converted.csv"  # Change to your file path
trips_output_path = f"{directory}trips_{scenario_name}_predicted.CSV"  # Optional output path


# Input parameters for hexagons filtering
walking_time_sec = 600  # 10 minutes
walking_speed_ms = 1.31  # 4.71 km/h - as default value for OSRM
hex_edge_km = 0.8
min_travel_time = 60
max_travel_time = 6000
calculate_distances = True
batch_size = 20

# Time-related parameters
analysis_date = "15/12/2024"  # DD/MM/YYYY format

# Analysis parameters
gtfs_path = "study areas/orléans/gtfs/gtfs_PT.zip"
hexagon_edge_km = 0.8
max_walking_time_min = 15
time_windows=[("07:00:00", "10:00:00"), ("10:00:00", "12:00:00")],
base_departure_hours=[7, 10],




## Step 0 - Process output trip .csv file

In [10]:
# Initialize trip data processor
processor = TripDataProcessor()
processed_trips_df = processor.run_complete_pipeline(
    input_file=trips_input_filename,
    output_file=trips_output_filename,
    min_travel_time=min_travel_time,
    max_travel_time=max_travel_time,
    calculate_distances=calculate_distances,
    distance_batch_size=batch_size
)
       

STARTING COMPLETE TRIP DATA PROCESSING PIPELINE (IN MEMORY)
Processing time columns from study areas/orléans/sample.csv...
Processing time columns from study areas/orléans/sample.csv...
Processing complete: 16 rows processed, 0 rows skipped
Loaded 16 trips
Filtering trips with travel time between 60 and 6000 seconds...
Filtered dataset: 16 trips remaining (0 removed)

Calculating distances using OSRM...
Starting distance calculation...
Extracting unique coordinates...
Found 11 unique source points
Found 7 unique destination points


Processing distance matrix: 100%|█████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 29.32it/s]

Assigning distances to dataframe...
Distance calculation complete. Added distances for 16 trips

Reconstructing datetime columns...
Engineering time-based features...
Feature engineering complete: 21 total columns
Selecting 18 final features...

Final dataset saved to study areas/orléans/trips_DRT_processed.csv
Final dataset shape: (16, 17)
Final features: ['origin_lat', 'origin_lon', 'destination_lat', 'destination_lon', 'travel_time', 'departure_time_hour', 'departure_time_minute', 'departure_time_day_of_week', 'departure_time_day_of_month', 'departure_time_month', 'departure_time_hour_sin', 'departure_time_hour_cos', 'departure_time_day_of_week_sin', 'departure_time_day_of_week_cos', 'departure_time_month_sin', 'departure_time_month_cos', 'distance']

PIPELINE COMPLETED SUCCESSFULLY





In [11]:
processed_trips_df

Unnamed: 0,origin_lat,origin_lon,destination_lat,destination_lon,travel_time,departure_time_hour,departure_time_minute,departure_time_day_of_week,departure_time_day_of_month,departure_time_month,departure_time_hour_sin,departure_time_hour_cos,departure_time_day_of_week_sin,departure_time_day_of_week_cos,departure_time_month_sin,departure_time_month_cos,distance
0,47.928462,1.925677,47.924541,1.947182,616,7,11,0,1,1,0.965926,-0.258819,0.0,1.0,0.5,0.866025,2208.4
1,47.883374,2.035291,47.910543,1.970241,1324,7,18,0,1,1,0.965926,-0.258819,0.0,1.0,0.5,0.866025,6359.6
2,47.934959,1.966643,47.929945,1.926868,973,7,26,0,1,1,0.965926,-0.258819,0.0,1.0,0.5,0.866025,3773.8
3,47.925176,1.957858,47.929945,1.926868,753,7,30,0,1,1,0.965926,-0.258819,0.0,1.0,0.5,0.866025,3158.4
4,47.921676,1.99134,47.910543,1.970241,563,7,31,0,1,1,0.965926,-0.258819,0.0,1.0,0.5,0.866025,2418.0
5,47.920582,1.964986,47.910543,1.970241,319,7,33,0,1,1,0.965926,-0.258819,0.0,1.0,0.5,0.866025,1489.5
6,47.928462,1.925677,47.924778,1.953145,586,7,37,0,1,1,0.965926,-0.258819,0.0,1.0,0.5,0.866025,2644.6
7,47.927285,1.952031,47.929945,1.926868,143,7,53,0,1,1,0.965926,-0.258819,0.0,1.0,0.5,0.866025,2494.1
8,47.923362,1.970069,47.913873,1.978212,484,7,54,0,1,1,0.965926,-0.258819,0.0,1.0,0.5,0.866025,1514.2
9,47.919154,1.974255,47.910543,1.970241,891,7,56,0,1,1,0.965926,-0.258819,0.0,1.0,0.5,0.866025,1180.1


In [14]:
processed_trips_df.columns

Index(['origin_lat', 'origin_lon', 'destination_lat', 'destination_lon',
       'travel_time', 'departure_time_hour', 'departure_time_minute',
       'departure_time_day_of_week', 'departure_time_day_of_month',
       'departure_time_month', 'departure_time_hour_sin',
       'departure_time_hour_cos', 'departure_time_day_of_week_sin',
       'departure_time_day_of_week_cos', 'departure_time_month_sin',
       'departure_time_month_cos', 'distance'],
      dtype='object')

## Step 1 - Tessellation with hexagons and Create DRT virtual trips

In [12]:
# Load your DRT trips data
drt_trips = pd.read_csv(trips_output_filename)

# Run enhanced analysis with time features
results = run_complete_analysis_with_time_features(
    bbox=bbox,
    drt_trips_df=drt_trips.head(10),  
    analysis_date=analysis_date,
    gtfs_path=gtfs_path,
    hexagon_edge_km=hexagon_edge_km,
    max_walking_time_min=max_walking_time_min,
    time_windows=[("07:00:00", "10:00:00"), ("10:00:00", "12:00:00")],
    base_departure_hours=[7, 10],
    osrm_url=osrm_url_walk,  
    output_dir=directory,
    batch_size=batch_size,
    add_time_features=True  
)

DRT ACCESSIBILITY ANALYSIS WITH MULTIPLE TIME WINDOWS
Analysis Date: 15/12/2024
Departure Hours: [7, 10]
Time Windows: [('07:00:00', '10:00:00'), ('10:00:00', '12:00:00')]
DEBUG - Raw inputs:
  time_windows type: <class 'list'>, content: [('07:00:00', '10:00:00'), ('10:00:00', '12:00:00')]
  base_departure_hours type: <class 'list'>, content: [7, 10]
Parsed date: 2024-12-15 (Sunday)

1. GENERATING HEXAGONAL GRID
------------------------------
Generating hexagonal grid for study_area...
Generated 1826 hexagons

2. CALCULATING BARYCENTERS
------------------------------
Calculating barycenters of DRT stops...
Calculated barycenters for 11 hexagons with DRT stops

3. LOADING CPT DATA FROM GTFS
------------------------------
Loaded 1395 CPT stops from GTFS
Loaded 429712 schedule entries from GTFS

4. COMPUTING ALL WALKING TIMES (OPTIMIZED)
------------------------------
Computing all walking times using optimized batch operation...
Processing 4521 coordinate pairs in batches...


Processing walking times: 100%|███████████████████████████████████████████████████████| 227/227 [00:09<00:00, 25.06it/s]


Computed 9042 walking time connections

5. GENERATING SYNTHETIC DRT TRIPS FOR MULTIPLE TIME WINDOWS
------------------------------

  Processing departure hour 7 with time window ('07:00:00', '10:00:00')
    Time window type: <class 'tuple'>, Content: ('07:00:00', '10:00:00')
Generating synthetic DRT trips...
  Generating first mile DRT trips...
  Generating last mile DRT trips...
Generated 25300 synthetic DRT trips
    Generated 25300 trips for departure hour 7

  Processing departure hour 10 with time window ('10:00:00', '12:00:00')
    Time window type: <class 'tuple'>, Content: ('10:00:00', '12:00:00')
Generating synthetic DRT trips...
  Generating first mile DRT trips...
  Generating last mile DRT trips...
Generated 15930 synthetic DRT trips
    Generated 15930 trips for departure hour 10

  Total synthetic trips generated: 41230
    Departure hour 7: 25300 trips
    Departure hour 10: 15930 trips

6. ENGINEERING TIME FEATURES
------------------------------
Engineering time-based 

In [4]:
trips_df=results["synthetic_drt_trips"]
trips_df

Unnamed: 0,start_x,start_y,end_x,end_y,departure_time,road_distance,trip_type,from_hexagon_id,to_hexagon_id,via_cpt_stop,...,departure_time_day_of_week,departure_time_day_of_month,departure_time_month,departure_time_hour_sin,departure_time_hour_cos,departure_time_day_of_week_sin,departure_time_day_of_week_cos,departure_time_month_sin,departure_time_month_cos,analysis_date
0,1.926392,47.929352,1.950786,47.925535,07:05:41,408.1,first_mile_drt,857,981,,...,6,15,12,0.965926,-0.258819,-0.781831,0.62349,-2.449294e-16,1.0,15/12/2024
1,1.926392,47.929352,1.957858,47.925176,07:05:41,408.1,first_mile_drt,857,1023,,...,6,15,12,0.965926,-0.258819,-0.781831,0.62349,-2.449294e-16,1.0,15/12/2024
2,1.926392,47.929352,1.970241,47.910543,07:05:41,408.1,first_mile_drt,857,1062,,...,6,15,12,0.965926,-0.258819,-0.781831,0.62349,-2.449294e-16,1.0,15/12/2024
3,1.926392,47.929352,1.964986,47.920582,07:05:41,408.1,first_mile_drt,857,1063,,...,6,15,12,0.965926,-0.258819,-0.781831,0.62349,-2.449294e-16,1.0,15/12/2024
4,1.926392,47.929352,1.970069,47.923362,07:05:41,408.1,first_mile_drt,857,1064,,...,6,15,12,0.965926,-0.258819,-0.781831,0.62349,-2.449294e-16,1.0,15/12/2024
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
41225,1.991340,47.921676,2.035291,47.883374,11:21:32,903.1,last_mile_drt,1188,1431,ORLEANS:StopArea:00026810,...,6,15,12,0.258819,-0.965926,-0.781831,0.62349,-2.449294e-16,1.0,15/12/2024
41226,1.991340,47.921676,2.035291,47.883374,11:23:32,903.1,last_mile_drt,1188,1431,ORLEANS:StopArea:00026810,...,6,15,12,0.258819,-0.965926,-0.781831,0.62349,-2.449294e-16,1.0,15/12/2024
41227,1.991340,47.921676,2.035291,47.883374,11:27:32,903.1,last_mile_drt,1188,1431,ORLEANS:StopArea:00026810,...,6,15,12,0.258819,-0.965926,-0.781831,0.62349,-2.449294e-16,1.0,15/12/2024
41228,1.991340,47.921676,2.035291,47.883374,11:48:32,903.1,last_mile_drt,1188,1431,ORLEANS:StopArea:00026810,...,6,15,12,0.258819,-0.965926,-0.781831,0.62349,-2.449294e-16,1.0,15/12/2024


## Step 3 - Predict travel time percentile values
...
Run the next notebook
...