add test to check summary edge output, comments

kuanb · Dec 20, 2017 · 03f5ced · 03f5ced
1 parent c063f3e
commit 03f5ced
Show file tree

Hide file tree

Showing 3 changed files with 46 additions and 4 deletions.
diff --git a/peartree/summarizer.py b/peartree/summarizer.py
@@ -91,6 +91,9 @@ def generate_all_observed_edge_costs(trips_and_stop_times: pd.DataFrame
 
 
 def summarize_edge_costs(df: pd.DataFrame) -> pd.DataFrame:
+    # Used as a function applied to a grouping
+    # operation, pulls out the mean edge cost for each
+    # unqiue edge pair (from node and to node)
     from_stop_id = df.from_stop_id.values[0]
     results_mtx = []
     for to_stop_id in df.to_stop_id.unique():
@@ -103,18 +106,26 @@ def summarize_edge_costs(df: pd.DataFrame) -> pd.DataFrame:
 
 
 def generate_summary_edge_costs(all_edge_costs: pd.DataFrame) -> pd.DataFrame:
+    # Given a dataframe of edges costs, get the average for each
+    # from node - to node pair
     summary_groupings = all_edge_costs.groupby('from_stop_id')
     summary = summary_groupings.apply(summarize_edge_costs)
     summary = summary.reset_index(drop=True)
     return summary
 
 
 def summarize_waits_at_one_stop(stop_df: pd.DataFrame) -> float:
-    # Calculate average wait time at this stop, given all observed
-    # wait times
+    # Calculates average wait time at this stop, given all observed
+    # TODO: Simply dividiing by two may not be appropriate - it is
+    #       go for estimation purposes, but I could introduce
+    #       more sophisticated wait time calculations here
     divide_by = (len(stop_df) * 2)
     dir_0_sum = stop_df.wait_dir_0.sum()
     dir_1_sum = stop_df.wait_dir_1.sum()
+
+    # A weighted average is performed, which could inaccurately8
+    # portrary a wait time at a given stop if one direction has
+    # significantly higher frequence than another
     calculated = ((dir_0_sum + dir_1_sum) / divide_by)
 
     return calculated

diff --git a/tests/test_graph.py b/tests/test_graph.py
@@ -1,7 +1,38 @@
-from peartree.graph import generate_empty_md_graph
+import os
+
+from peartree.graph import (generate_empty_md_graph,
+                            generate_summary_graph_elements)
+from peartree.paths import get_representative_feed
+
+
+def fixture(filename):
+    return os.path.join(os.path.dirname(__file__), 'fixtures', filename)
 
 
 def test_generate_empty_graph():
     G = generate_empty_md_graph('foo')
     assert len(G.edges()) == 0
     assert len(G.nodes()) == 0
+
+
+def test_generate_summary_graph_elements():
+    path_1 = fixture('caltrain-2017-07-24.zip')
+    feed_1 = get_representative_feed(path_1)
+
+    start = 7 * 60 * 60
+    end = 10 * 60 * 60
+
+    (summary_edge_costs,
+     wait_times_by_stop) = generate_summary_graph_elements(feed_1, start, end)
+
+    # Ensure that the summary edge cost dataframe looks as it should
+    ec_cols = ['edge_cost', 'from_stop_id', 'to_stop_id']
+    for c in ec_cols:
+        assert c in summary_edge_costs.columns
+
+    # Make sure that all edges are unique - there are no duplicated
+    # in the returned edge dataframe (each should be its own summary)
+    f = summary_edge_costs.from_stop_id
+    t = summary_edge_costs.to_stop_id
+    z = list(zip(f, t))
+    assert len(list(set(z))) == len(z)
diff --git a/tests/test_paths.py b/tests/test_paths.py
@@ -126,4 +126,4 @@ def test_feed_to_graph_path():
     for _, _, edge in G.edges(data=True):
         assert 'length' in edge.keys()
         assert isinstance(edge['length'], float)
-        assert edge['length'] > 0
+        assert edge['length'] >= 0