Skip to content

Commit

Permalink
Merge pull request #4 from kuanb/kuanb-join-on-new-points
Browse files Browse the repository at this point in the history
Identify and generate x-feed transfer edges based proximity threshold
  • Loading branch information
kuanb committed Dec 3, 2017
2 parents 0592d75 + cad14ce commit 6b4ad38
Show file tree
Hide file tree
Showing 3 changed files with 216 additions and 6 deletions.
95 changes: 93 additions & 2 deletions peartree/graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from .summarizer import (generate_edge_and_wait_values,
generate_summary_edge_costs,
generate_summary_wait_times)
from .toolkit import generate_graph_node_dataframe, get_nearest_node


def generate_empty_md_graph(name: str,
Expand Down Expand Up @@ -36,19 +37,90 @@ def generate_summary_graph_elements(feed: ptg.gtfs.feed,
return (summary_edge_costs, wait_times_by_stop)


def generate_cross_feed_edges(G,
feed,
wait_times_by_stop,
connection_threshold):
# Terminate this process early if the graph is empty
if (G.number_of_nodes() == 0):
return pd.DataFrame({'stop_id': [],
'to_nodes': [],
'edge_costs': []})

# First, we need a DataFrame representation of the nodes in the graph
node_df = generate_graph_node_dataframe(G)

stop_ids = []
to_nodes = []
edge_costs = []

# TODO: Repeating this in populate_graph as well, there may
# be a way to condense these two steps well
for i, row in wait_times_by_stop.iterrows():
sid = str(row.stop_id)

# TODO: Join tables before hand to make
# this part go faster
id_mask = (feed.stops.stop_id == sid)
stop_data_head = feed.stops[id_mask].head(1)

# Handle the possibility that there are no values for that stop
# id in the feed subset of wait times
if not len(stop_data_head):
continue

# Once check has cleared, pull out the first row as a pd.Series
stop_data = stop_data_head.T.squeeze()

# Ensure that each value is typed correctly prior to being
# fed into the nearest node method
lat = float(stop_data.stop_lat)
lon = float(stop_data.stop_lon)
point = (lat, lon)
(nn, nn_dist) = get_nearest_node(node_df, point)

# Only generate a connector edge if it satisfies the
# meter distance threshold
if nn_dist < connection_threshold:
stop_ids.append(sid)
to_nodes.append(nn)
edge_costs.append(nn_dist)

return pd.DataFrame({'stop_id': stop_ids,
'to_node': to_nodes,
'distance': edge_costs})


def populate_graph(G: nx.MultiDiGraph,
name: str,
feed: ptg.gtfs.feed,
wait_times_by_stop: pd.DataFrame,
summary_edge_costs: pd.DataFrame):
summary_edge_costs: pd.DataFrame,
cross_feed_edges: pd.DataFrame):
# As we convert stop ids to actual nodes, let's keep track of those names
# here so that we can reference them when we add connector edges across
# the various feeds loaded into the graph
sid_lookup = {}

for i, row in wait_times_by_stop.iterrows():
sid = str(row.stop_id)
full_sid = nameify_stop_id(name, sid)

# TODO: Join tables before hand to make
# this part go faster
id_mask = (feed.stops.stop_id == sid)
stop_data = feed.stops[id_mask].head(1).T.squeeze()
stop_data_head = feed.stops[id_mask].head(1)

# Handle the possibility that there are no values for that stop
# id in the feed subset of wait times
if not len(stop_data_head):
continue

# Once check has cleared, pull out the first row as a pd.Series
stop_data = stop_data_head.T.squeeze()

# Add to the lookup crosswalk dictionary
sid_lookup[sid] = full_sid

G.add_node(full_sid,
boarding_cost=row.avg_cost,
Expand All @@ -62,4 +134,23 @@ def populate_graph(G: nx.MultiDiGraph,
sid_to,
length=row.edge_cost)

# Now add the cross feed edge connectors to the graph to
# capture transfer points
for i, row in cross_feed_edges.iterrows():
# Extract the row column values as discrete variables
sid = row.stop_id
to = row.to_node
d = row.distance

# Use the lookup table to get converted stop id name
full_sid = sid_lookup[sid]

# Convert to km/hour
kmph = (d / 1000) / 4.5

# Convert to seconds
in_seconds = kmph * 60 * 60

G.add_edge(full_sid, to, length=in_seconds)

return G
51 changes: 47 additions & 4 deletions peartree/paths.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@
import networkx as nx
import partridge as ptg

from .graph import (generate_empty_md_graph, generate_summary_graph_elements,
populate_graph)
from .graph import (generate_cross_feed_edges, generate_empty_md_graph,
generate_summary_graph_elements, populate_graph)
from .utilities import log


Expand Down Expand Up @@ -58,7 +58,44 @@ def load_feed_as_graph(feed: ptg.gtfs.feed,
start_time: int,
end_time: int,
name: str=None,
existing_graph: nx.MultiDiGraph=None):
existing_graph: nx.MultiDiGraph=None,
connection_threshold: float=50):
"""
Convert a feed object into a NetworkX Graph, connect to an existing
NetworkX graph if one is supplied
Parameters
----------
feed : partridge.feed
A feed object from Partridge holding a representation of the
desired schedule ids and their releated scheudule data from an
operator GTFS
start_time : int
Represented in seconds after midnight; indicates the start time
with which to take the subset of the target feed schedule
to be used to measure impedance between stops along
the route, as well as cost (wait time) to board at each stop
end_time : int
Represented in seconds after midnight; indicates the end time
with which to take the subset of the target feed schedule
to be used to measure impedance between stops along
the route, as well as cost (wait time) to board at each stop
name : str
Name of the operator, which is used to create a unique ID for each
of the stops, routes, etc. in the feed being supplied
existing_graph : networkx.Graph
An existing graph containing other operator or schedule data
connection_threshold : float
Treshold by which to create a connection with an existing stop
in the existing_graph graph, measured in meters
Returns
-------
G
networkx.Graph, the loaded, combined representation of the schedule
data from the feed subset by the time parameters provided
"""

# Generate a random name for name if it is None
if not name:
name = _generate_random_name()
Expand Down Expand Up @@ -90,8 +127,14 @@ def load_feed_as_graph(feed: ptg.gtfs.feed,
else:
G = generate_empty_md_graph(name)

cross_feed_edges = generate_cross_feed_edges(G,
feed,
wait_times_by_stop,
connection_threshold)

return populate_graph(G,
name,
feed,
wait_times_by_stop,
summary_edge_costs)
summary_edge_costs,
cross_feed_edges)
76 changes: 76 additions & 0 deletions peartree/toolkit.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
from typing import Tuple

import numpy as np
import osmnx as ox
import pandas as pd


def great_circle_vec(lat1: float,
lng1: float,
lat2: float,
lng2: float,
earth_radius: int=6371009):
# This method wraps the same in OSMnx, source:
# https://github.com/gboeing/osmnx/blob/
# b32f8d333c6965a0d2f27c1f3224a29de2f08d55/osmnx/utils.py#L262
return ox.utils.great_circle_vec(lat1, lng1, lat2, lng2, earth_radius)


def generate_graph_node_dataframe(G):
# This method breaks out a portion of a similar method from
# OSMnx's get_nearest_node; source:
# https://github.com/gboeing/osmnx/blob/
# b32f8d333c6965a0d2f27c1f3224a29de2f08d55/osmnx/utils.py#L326
if not G or (G.number_of_nodes() == 0):
raise ValueError('G argument must be not be empty or '
'should contain at least one node')

# Dump graph node coordinates array
clist = []
for node, data in G.nodes(data=True):
# Ensure that each items is cast as the correct typegi
x = float(data['x'])
y = float(data['y'])
clist.append([node, x, y])
coords = np.array(clist)

# Then make into a Pandas DataFrame, with the node as index
return pd.DataFrame(coords, columns=['node', 'x', 'y']).set_index('node')


def get_nearest_node(df_orig: pd.DataFrame,
point: Tuple[float, float]):
# This method breaks out a portion of a similar method from
# OSMnx's get_nearest_node; source:
# https://github.com/gboeing/osmnx/blob/
# b32f8d333c6965a0d2f27c1f3224a29de2f08d55/osmnx/utils.py#L326

# Make a copy of the DataFrame to prevent mutation outside of function
df = df_orig.copy()

# Add second column of reference points
df['reference_y'] = point[0]
df['reference_x'] = point[1]

# TODO: OSMnx supports euclidean as well, for now we have a stumped
# version of this same function

# Ensure each vectorized series is typed correctly
ref_ys = df['reference_y'].astype(float)
ref_xs = df['reference_x'].astype(float)
ys = df['y'].astype(float)
xs = df['x'].astype(float)

# Calculate distance vector using great circle distances (ie, for
# spherical lat-long geometries)
distances = great_circle_vec(lat1=ref_ys,
lng1=ref_xs,
lat2=ys,
lng2=xs)

# Calculate the final results to be returned
nearest_node = str(distances.idxmin())
nn_dist = distances.loc[nearest_node]

# Returna as tuple
return (nearest_node, nn_dist)

0 comments on commit 6b4ad38

Please sign in to comment.