In [None]:
import pandas as pd
import numpy as np
import networkx as nx
import copy
import random
from src.util import *
from src.algorithm import *
from src.transport_network import *
from validation.graph_test import *
from validation.path_test import *

## Generating Test Paths

We will define a function `generate_paths_for_tests` to generate test paths. This function includes random tests during peak times and boundary tests in the Lausanne area. We will use a seed to ensure reproducibility.

Additionally, `graph_sanity_check` is applied every time a new graph is built to perform the following sanity checks on the graph:
1. **No Zero Cycles**: Ensure there are no cycles with zero total weight.
2. **Edge Weight >= 0**: All edges should have non-negative weights (travel time).
3. **Walking Weight > 0**: Ensure that walking weights are positive.

In [20]:
def generate_paths_for_tests(seed=42):
    if seed is not None:
        random.seed(seed)

    sbb_network = TransportNetwork('data/sbb_timetable_stop_times.parquet', 'data/stop_to_stop.csv', 'data/stops.csv')
    stops = sbb_network.stops
    stop_to_stop = sbb_network.stop_to_stop
    id_to_stop = stops.set_index('stop_id')['stop_name'].to_dict()
    stop_to_id = stops.set_index('stop_name')['stop_id'].to_dict()

    # High peak time for random test
    peak_time = ('10:00', '12:00')  
    
    # late night and early morning for boundary test
    boundary_times = [('23:00', '01:00'), ('05:30', '07:30')] 

    # Sample some random pairs of stops 
    sample_size = 10
    sampled_pairs = random.sample(list(stop_to_id.keys()), sample_size * 2)  # Get 2 * sample_size to form pairs
    sampled_pairs = [(sampled_pairs[i], sampled_pairs[i + 1]) for i in range(0, len(sampled_pairs), 2)]

    # Only do the boundary test in Lausanne area
    lausanne_stops = [stop for stop in stop_to_id.keys() if 'Lausanne' in stop]
    lausanne_sample = random.sample(lausanne_stops, sample_size * 2)
    lausanne_pairs = [
        (lausanne_sample[i], lausanne_sample[i + 1]) 
        for i in range(0, len(lausanne_sample), 2)
    ]

    all_generated_paths = []

    # Peak time random test
    G_peak = sbb_network.build_graph(peak_time[0], peak_time[1])
    print(f'-----------Peak time test during {peak_time}-----------')
    graph_sanity_check(G_peak)
    print('...')
    for departure, destination in sampled_pairs:
        departure_id = stop_to_id.get(departure, None)
        destination_id = stop_to_id.get(destination, None)
        print(f"Testing path from {departure} (ID: {departure_id}) to {destination} (ID: {destination_id})")
        if not departure_id or not destination_id:
            print(f" Invalid departure or destination for test: {departure}, {destination}")
            continue
        try:
            paths = yen_ksp(G_peak, peak_time[0], departure_id, destination_id)
            if not paths:
                print(f" No paths found for {departure} to {destination} from {peak_time[0]} to {peak_time[1]}")
                continue
            all_generated_paths.extend(paths)
            print(f" Generated {len(paths)} paths for {departure} to {destination}")
        except KeyError as e:
            print(f" KeyError generating paths for {departure} to {destination}: {e}")
        except Exception as e:
            print(f" Error generating paths for {departure} to {destination}: {e}")

    # City center boundary test (contains 'Lausanne')
    for boundary_time in boundary_times:
        G_boundary = sbb_network.build_graph(boundary_time[0], boundary_time[1])
        print('\n')
        print(f'-----------Boundary time test during {boundary_time}-----------')
        graph_sanity_check(G_boundary)
        print('...')
        for departure, destination in lausanne_pairs:
            departure_id = stop_to_id.get(departure, None)
            destination_id = stop_to_id.get(destination, None)
            print(f"Testing path from {departure} (ID: {departure_id}) to {destination} (ID: {destination_id})")
            if not departure_id or not destination_id:
                print(f" Invalid departure or destination for test: {departure}, {destination}")
                continue
            try:
                paths = yen_ksp(G_boundary, boundary_time[0], departure_id, destination_id)
                if not paths:
                    print(f" No paths found for {departure} to {destination} from {boundary_time[0]} to {boundary_time[1]}")
                    continue
                all_generated_paths.extend(paths)
                print(f" Generated {len(paths)} paths for {departure} to {destination}")
            except KeyError as e:
                print(f" KeyError generating paths for {departure} to {destination}: {e}")
            except Exception as e:
                print(f" Error generating paths for {departure} to {destination}: {e}")

    return all_generated_paths


In [21]:
generated_paths = generate_paths_for_tests(seed=seed)

-----------Peak time test during ('10:00', '12:00')-----------
Graph with 504 nodes passed the sanity check!
...
Testing path from Lausanne, Bellerive (ID: 8591989) to Epalinges, Grand Chemin (ID: 8591968)
 Generated 5 paths for Lausanne, Bellerive to Epalinges, Grand Chemin
Testing path from Bussigny, Tatironne (ID: 8506905) to Lausanne, Praz-Berthoud (ID: 8592099)
 Generated 5 paths for Bussigny, Tatironne to Lausanne, Praz-Berthoud
Testing path from Lausanne, Riant-Cour (ID: 8592110) to Lausanne, Ours (ID: 8579237)
 Generated 5 paths for Lausanne, Riant-Cour to Lausanne, Ours
Testing path from Lausanne, Maladière-Figuiers (ID: 8593837) to Lausanne, Blécherette (ID: 8591994)
 Generated 5 paths for Lausanne, Maladière-Figuiers to Lausanne, Blécherette
Testing path from Lausanne, Petit Château (ID: 8592091) to Ecublens VD, Perrettes (ID: 8591954)
 Generated 5 paths for Lausanne, Petit Château to Ecublens VD, Perrettes
Testing path from Lausanne, EPSIC (ID: 8592041) to Pully, port (ID: 

## Path Sanity Check

We perform the following sanity checks on the generated paths:

1. **Departure Time >= Arrival Time of the Previous Stop**: Ensure that the departure time from each stop is not earlier than the arrival time at the previous stop.
2. **Transfer Time >= 2 Minutes**: Ensure that the departure time at any transfer stop is at least 2 minutes later than the last arrival time.
3. **Walking Time <= 10 Minutes**: Ensure that walking time between stops does not exceed 10 minutes.

These checks ensure that the generated paths are realistic and adhere to our assumptions.

In [19]:
path_sanity_check(generated_paths)

All paths passed the sanity check.
