Skip to content

Commit

Permalink
Refactor populate_graph and generate_cross_feed_edges (#7)
Browse files Browse the repository at this point in the history
* move generate_cross_feed_edges into populate_graph

* run _merge_stop_waits_and_attributes

* pass through new merged stops df

* add notes, remove hanging assert
  • Loading branch information
kuanb committed Dec 3, 2017
1 parent 6b4ad38 commit 8342d57
Show file tree
Hide file tree
Showing 2 changed files with 31 additions and 44 deletions.
64 changes: 28 additions & 36 deletions peartree/graph.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from typing import Dict
from typing import Dict, Union

import networkx as nx
import pandas as pd
Expand Down Expand Up @@ -38,8 +38,7 @@ def generate_summary_graph_elements(feed: ptg.gtfs.feed,


def generate_cross_feed_edges(G,
feed,
wait_times_by_stop,
stops_df,
connection_threshold):
# Terminate this process early if the graph is empty
if (G.number_of_nodes() == 0):
Expand All @@ -56,26 +55,13 @@ def generate_cross_feed_edges(G,

# TODO: Repeating this in populate_graph as well, there may
# be a way to condense these two steps well
for i, row in wait_times_by_stop.iterrows():
for i, row in stops_df.iterrows():
sid = str(row.stop_id)

# TODO: Join tables before hand to make
# this part go faster
id_mask = (feed.stops.stop_id == sid)
stop_data_head = feed.stops[id_mask].head(1)

# Handle the possibility that there are no values for that stop
# id in the feed subset of wait times
if not len(stop_data_head):
continue

# Once check has cleared, pull out the first row as a pd.Series
stop_data = stop_data_head.T.squeeze()

# Ensure that each value is typed correctly prior to being
# fed into the nearest node method
lat = float(stop_data.stop_lat)
lon = float(stop_data.stop_lon)
lat = float(row.stop_lat)
lon = float(row.stop_lon)
point = (lat, lon)
(nn, nn_dist) = get_nearest_node(node_df, point)

Expand All @@ -91,41 +77,42 @@ def generate_cross_feed_edges(G,
'distance': edge_costs})


def _merge_stop_waits_and_attributes(wait_times_by_stop: pd.DataFrame,
feed_stops: pd.DataFrame) -> pd.DataFrame:
wt_sub = wait_times_by_stop[['avg_cost', 'stop_id']]
fs_sub = feed_stops[['stop_lat', 'stop_lon', 'stop_id']]
mdf = pd.merge(wt_sub, fs_sub, on='stop_id', how='left')
return mdf[~mdf.isnull()]


def populate_graph(G: nx.MultiDiGraph,
name: str,
feed: ptg.gtfs.feed,
wait_times_by_stop: pd.DataFrame,
summary_edge_costs: pd.DataFrame,
cross_feed_edges: pd.DataFrame):
connection_threshold: Union[int, float]):
# As we convert stop ids to actual nodes, let's keep track of those names
# here so that we can reference them when we add connector edges across
# the various feeds loaded into the graph
sid_lookup = {}

for i, row in wait_times_by_stop.iterrows():
# Generate a merge of the wait time data and the feed stops data that will
# be used for both the addition of new stop nodes and the addition of
# cross feed edges later on (that join one feeds stops to the other if
# they are within the connection threshold).
stops_df = _merge_stop_waits_and_attributes(wait_times_by_stop, feed.stops)

for i, row in stops_df.iterrows():
sid = str(row.stop_id)
full_sid = nameify_stop_id(name, sid)

# TODO: Join tables before hand to make
# this part go faster
id_mask = (feed.stops.stop_id == sid)
stop_data_head = feed.stops[id_mask].head(1)

# Handle the possibility that there are no values for that stop
# id in the feed subset of wait times
if not len(stop_data_head):
continue

# Once check has cleared, pull out the first row as a pd.Series
stop_data = stop_data_head.T.squeeze()

# Add to the lookup crosswalk dictionary
sid_lookup[sid] = full_sid

G.add_node(full_sid,
boarding_cost=row.avg_cost,
y=stop_data.stop_lat,
x=stop_data.stop_lon)
y=row.stop_lat,
x=row.stop_lon)

for i, row in summary_edge_costs.iterrows():
sid_fr = nameify_stop_id(name, row.from_stop_id)
Expand All @@ -134,6 +121,11 @@ def populate_graph(G: nx.MultiDiGraph,
sid_to,
length=row.edge_cost)

# Generate cross feed edge values
cross_feed_edges = generate_cross_feed_edges(G,
stops_df,
connection_threshold)

# Now add the cross feed edge connectors to the graph to
# capture transfer points
for i, row in cross_feed_edges.iterrows():
Expand Down
11 changes: 3 additions & 8 deletions peartree/paths.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@
import networkx as nx
import partridge as ptg

from .graph import (generate_cross_feed_edges, generate_empty_md_graph,
generate_summary_graph_elements, populate_graph)
from .graph import (generate_empty_md_graph, generate_summary_graph_elements,
populate_graph)
from .utilities import log


Expand Down Expand Up @@ -127,14 +127,9 @@ def load_feed_as_graph(feed: ptg.gtfs.feed,
else:
G = generate_empty_md_graph(name)

cross_feed_edges = generate_cross_feed_edges(G,
feed,
wait_times_by_stop,
connection_threshold)

return populate_graph(G,
name,
feed,
wait_times_by_stop,
summary_edge_costs,
cross_feed_edges)
connection_threshold)

0 comments on commit 8342d57

Please sign in to comment.