[wait_time_estimation] Consider time frame start/end when evaluating …

…boarding cost (#135) * Consider start and end times * Thread through the start and end times settings * linting * Update tests
kuanb · Apr 30, 2019 · a9ef0bd · a9ef0bd
1 parent e9b038f
commit a9ef0bd
Show file tree

Hide file tree

Showing 3 changed files with 35 additions and 8 deletions.
diff --git a/peartree/parallel.py b/peartree/parallel.py
@@ -85,7 +85,10 @@ def generate_route_costs(self, route_id: str):
                 trips_and_stop_times.drop('direction_id', axis=1, inplace=True)
 
         wait_times = generate_wait_times(
-            trips_and_stop_times, self.stop_cost_method)
+            self.target_time_start,
+            self.target_time_end,
+            trips_and_stop_times,
+            self.stop_cost_method)
 
         # Used in the next two steps
         stop_id_col = trips_and_stop_times['stop_id'].copy()
@@ -108,6 +111,8 @@ def generate_route_costs(self, route_id: str):
 
 
 def generate_wait_times(
+        target_time_start: float,
+        target_time_end: float,
         trips_and_stop_times: pd.DataFrame,
         stop_cost_method: Any) -> Dict[int, List[float]]:
     wait_times = {0: {}, 1: {}}
@@ -128,7 +133,10 @@ def generate_wait_times(
                 # values associated with the specified direction so default NaN
                 average_wait = np.nan
             else:
-                average_wait = stop_cost_method(direction_subset.arrival_time)
+                average_wait = stop_cost_method(
+                    target_time_start,
+                    target_time_end,
+                    direction_subset.arrival_time)
 
             # Add according to which direction we are working with
             wait_times[direction][stop_id] = average_wait

diff --git a/peartree/paths.py b/peartree/paths.py
@@ -1,4 +1,4 @@
-from typing import Any, Dict
+from typing import Any, Dict, List
 
 import networkx as nx
 import numpy as np
@@ -22,16 +22,35 @@ class InvalidTimeBracket(Exception):
     pass
 
 
-def _calculate_means_default(arrival_times: np.array) -> float:
+def _calculate_means_default(
+        target_time_start: float,
+        target_time_end: float,
+        arrival_times: List) -> float:
     # This is the default method that is provided to the load feed operation
     # and applied to the observed arrival times at a given stop. From this
     # array of arrival times, the average delay between stops is calcualted
     if len(arrival_times) < 2:
         return np.nan
 
-    first = arrival_times[1:].values
-    second = arrival_times[:-1].values
-    wait_seconds = (first - second)
+    # Make sure that values are in ascending order (also converts to list)
+    arrival_times = np.array(arrival_times)
+    arrival_times.sort()
+
+    # Recast as numpy array
+    first = arrival_times[1:]
+    second = arrival_times[:-1]
+    wait_seconds = list(first - second)
+
+    # Recast arrival times as just a python list
+    arrival_times = list(arrival_times)
+
+    # Also ensure that both the first and last trip include context
+    # framed by the evaluation time period
+    from_start_time_to_first_arrival = arrival_times[0] - target_time_start
+    wait_seconds.append(from_start_time_to_first_arrival)
+
+    from_last_arrival_to_end_time = target_time_end - arrival_times[-1]
+    wait_seconds.append(from_last_arrival_to_end_time)
 
     # Note: Can implement something more substantial here that takes into
     #       account divergent/erratic performance or intentional timing

diff --git a/tests/test_graph.py b/tests/test_graph.py
@@ -61,7 +61,7 @@ def test_generate_summary_graph_elements():
 
         # Just a heuristic for avg_cost mean
         avg_cost_mean = wait_times_by_stop.avg_cost.mean()
-        assert avg_cost_mean == pytest.approx(1109.380, abs=0.1)
+        assert avg_cost_mean == pytest.approx(1013.68, abs=0.1)
 
         # Make sure that there are stop ids unique
         u = wait_times_by_stop.stop_id.unique()