Skip to content

Commit

Permalink
add test to check summary edge output, comments
Browse files Browse the repository at this point in the history
  • Loading branch information
kuanb committed Dec 20, 2017
1 parent c063f3e commit 03f5ced
Show file tree
Hide file tree
Showing 3 changed files with 46 additions and 4 deletions.
15 changes: 13 additions & 2 deletions peartree/summarizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,9 @@ def generate_all_observed_edge_costs(trips_and_stop_times: pd.DataFrame


def summarize_edge_costs(df: pd.DataFrame) -> pd.DataFrame:
# Used as a function applied to a grouping
# operation, pulls out the mean edge cost for each
# unqiue edge pair (from node and to node)
from_stop_id = df.from_stop_id.values[0]
results_mtx = []
for to_stop_id in df.to_stop_id.unique():
Expand All @@ -103,18 +106,26 @@ def summarize_edge_costs(df: pd.DataFrame) -> pd.DataFrame:


def generate_summary_edge_costs(all_edge_costs: pd.DataFrame) -> pd.DataFrame:
# Given a dataframe of edges costs, get the average for each
# from node - to node pair
summary_groupings = all_edge_costs.groupby('from_stop_id')
summary = summary_groupings.apply(summarize_edge_costs)
summary = summary.reset_index(drop=True)
return summary


def summarize_waits_at_one_stop(stop_df: pd.DataFrame) -> float:
# Calculate average wait time at this stop, given all observed
# wait times
# Calculates average wait time at this stop, given all observed
# TODO: Simply dividiing by two may not be appropriate - it is
# go for estimation purposes, but I could introduce
# more sophisticated wait time calculations here
divide_by = (len(stop_df) * 2)
dir_0_sum = stop_df.wait_dir_0.sum()
dir_1_sum = stop_df.wait_dir_1.sum()

# A weighted average is performed, which could inaccurately8
# portrary a wait time at a given stop if one direction has
# significantly higher frequence than another
calculated = ((dir_0_sum + dir_1_sum) / divide_by)

return calculated
Expand Down
33 changes: 32 additions & 1 deletion tests/test_graph.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,38 @@
from peartree.graph import generate_empty_md_graph
import os

from peartree.graph import (generate_empty_md_graph,
generate_summary_graph_elements)
from peartree.paths import get_representative_feed


def fixture(filename):
return os.path.join(os.path.dirname(__file__), 'fixtures', filename)


def test_generate_empty_graph():
G = generate_empty_md_graph('foo')
assert len(G.edges()) == 0
assert len(G.nodes()) == 0


def test_generate_summary_graph_elements():
path_1 = fixture('caltrain-2017-07-24.zip')
feed_1 = get_representative_feed(path_1)

start = 7 * 60 * 60
end = 10 * 60 * 60

(summary_edge_costs,
wait_times_by_stop) = generate_summary_graph_elements(feed_1, start, end)

# Ensure that the summary edge cost dataframe looks as it should
ec_cols = ['edge_cost', 'from_stop_id', 'to_stop_id']
for c in ec_cols:
assert c in summary_edge_costs.columns

# Make sure that all edges are unique - there are no duplicated
# in the returned edge dataframe (each should be its own summary)
f = summary_edge_costs.from_stop_id
t = summary_edge_costs.to_stop_id
z = list(zip(f, t))
assert len(list(set(z))) == len(z)
2 changes: 1 addition & 1 deletion tests/test_paths.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,4 +126,4 @@ def test_feed_to_graph_path():
for _, _, edge in G.edges(data=True):
assert 'length' in edge.keys()
assert isinstance(edge['length'], float)
assert edge['length'] > 0
assert edge['length'] >= 0

0 comments on commit 03f5ced

Please sign in to comment.