Merge 9ff5469 into 3363f63

kuanb · Mar 3, 2018 · d603b44 · d603b44
2 parents 3363f63 + 9ff5469
commit d603b44
Show file tree

Hide file tree

Showing 5 changed files with 317 additions and 0 deletions.
diff --git a/peartree/graph.py b/peartree/graph.py
@@ -163,3 +163,78 @@ def populate_graph(G: nx.MultiDiGraph,
         G.add_edge(full_sid, to, length=in_seconds)
 
     return G
+
+
+def make_synthetic_system_network(
+        G: nx.MultiDiGraph,
+        name: str,
+        reference_geojson: Dict,
+        connection_threshold: Union[int, float],
+        walk_speed_kmph: float=4.5):
+    graph_name = _generate_random_name(5)
+    sid_lookup = {}
+
+    for feat in reference_geojson['features']:
+        ref_shape = shape(feat['geometry'])
+
+        # Pull out required properties
+        props = feat['properties']
+        headway = props['headway']
+        avg_speed = props['average_speed']
+        stop_dist = props['stop_distance_distribution']
+
+        # Generate reference geometry data
+        chunks = generate_meter_projected_chunks(ref_shape, stop_dist)
+        all_pts = generate_stop_points(ref_shape, stop_dist)
+
+        # Give each stop a unique id
+        stop_ids = generate_stop_ids(len(all_pts))
+
+        # Produce graph components
+        nodes = generate_nodes_df(stop_ids, all_pts, headway)
+        edges = generate_edges_df(stop_ids, all_points, chunks, avg_speed)
+
+        for i, row in nodes.iterrows():
+            sid = str(row.stop_id)
+            full_sid = nameify_stop_id(name, sid)
+
+            # Add to the lookup crosswalk dictionary
+            sid_lookup[sid] = full_sid
+
+            G.add_node(full_sid,
+                       boarding_cost=row.avg_cost,
+                       y=row.stop_lat,
+                       x=row.stop_lon)
+
+        for i, row in summary_edge_costs.iterrows():
+            sid_fr = nameify_stop_id(name, row.from_stop_id)
+            sid_to = nameify_stop_id(name, row.to_stop_id)
+            G.add_edge(sid_fr,
+                       sid_to,
+                       length=row.edge_cost)
+
+    # Generate cross feed edge values
+    cross_feed_edges = generate_cross_feed_edges(G,
+                                                 stops_df,
+                                                 connection_threshold)
+
+    # Now add the cross feed edge connectors to the graph to
+    # capture transfer points
+    for i, row in cross_feed_edges.iterrows():
+        # Extract the row column values as discrete variables
+        sid = row.stop_id
+        to = row.to_node
+        d = row.distance
+
+        # Use the lookup table to get converted stop id name
+        full_sid = sid_lookup[sid]
+
+        # Convert to km/hour
+        kmph = (d / 1000) / walk_speed_kmph
+
+        # Convert to seconds
+        in_seconds = kmph * 60 * 60
+
+        G.add_edge(full_sid, to, length=in_seconds)
+
+    return G
diff --git a/peartree/paths.py b/peartree/paths.py
@@ -137,3 +137,41 @@ def load_feed_as_graph(feed: ptg.gtfs.feed,
                           summary_edge_costs,
                           connection_threshold,
                           walk_speed_kmph)
+
+
+def load_synthetic_network_as_graph(
+        geojson_path: str,
+        name: str=None,
+        existing_graph: nx.MultiDiGraph=None,
+        connection_threshold: float=50.0,
+        walk_speed_kmph: float=4.5,
+        interpolate_times: bool=True):
+
+    # Load in the GeoJSON as a JSON and convert to a dictionary
+    with open(geojson_path, 'r') as gjf:
+        reference_geojson = json.load(gjf)
+
+    # Generate a random name for name if it is None
+    if not name:
+        name = _generate_random_name()
+
+    # This is a flag used to check if we need to run any additional steps
+    # after the feed is returned to ensure that new nodes and edge can connect
+    # with existing ones (if they exist/a graph is passed in)
+    existing_graph_supplied = bool(existing_graph)
+
+    # G is either a new MultiDiGraph or one pass from before
+    if existing_graph_supplied:
+        # TODO: If passed from before we should run some checks to ensure
+        #       it is valid as well as set a flag to create join points with
+        #       other feeds so that they can be linked when the next is added.
+        G = existing_graph
+    else:
+        G = generate_empty_md_graph(name)
+
+    return make_synthetic_system_network(
+        G,
+        name,
+        reference_geojson,
+        connection_threshold,
+        walk_speed_kmph)
diff --git a/peartree/synthetic.py b/peartree/synthetic.py
@@ -0,0 +1,145 @@
+from functools import partial
+from typing import Dict, List
+
+import pyproj
+from shapely.geometry import LineString, MultiPoint, Point, shape
+from shapely.ops import linemerge, split, transform
+
+from .paths import _generate_random_name
+
+
+def generate_meter_projected_chunks(
+        route_shape: LineString,
+        stop_distance_distribution: int) -> List[LineString]:
+
+    # Reproject 4326 lat/lon coordinates to equal area
+    project = partial(
+        pyproj.transform,
+        pyproj.Proj(init='epsg:4326'), # source coordinate system
+        pyproj.Proj(init='epsg:2163')) # destination coordinate system
+
+    rs2 = transform(project, route_shape)  # apply projection
+    stop_count = round(rs2.length / stop_distance_distribution)
+
+    # Create the array of break points/joints
+    mp_array = []
+    for i in range(1, stop_count):
+        fr = (i/stop_count)
+        mp_array.append(rs2.interpolate(fr, normalized=True))
+
+    # Cast array as a Shapely object
+    splitter = MultiPoint(mp_array)
+
+    # 1 meter buffer to address floating point discrepencies
+    chunks = split(rs2, splitter.buffer(1))
+
+    # TODO: Potential for length errors with this 1 meter
+    #       threshold check
+
+    # Take chunks and merge in the small lines
+    # from intersection inside of the buffered circles
+    # and attach to nearest larger line
+    clean_chunks = [chunks[0]]
+    r = len(chunks)
+    for c in range(1, r):
+        latest = clean_chunks[-1]
+        current = chunks[c]
+        # Again, this is a week point of the buffer of
+        # 1 meter method
+        if latest.length <= 2:
+            # Merge in the small chunks with the larger chunks
+            clean_chunks[-1] = linemerge([latest, current])
+        else:
+            clean_chunks.append(current)
+
+
+def generate_stop_points(
+        route_shape: LineString,
+        stop_distance_distribution: int):
+    # Rename variable for brevity
+    rs = route_shape
+
+    # Create the array of break points/joints
+    stop_count = round(rs.length / stop_distance_distribution)
+    mp_array = []
+    for i in range(1, stop_count):
+        fr = (i/stop_count)
+        mp_array.append(rs.interpolate(fr, normalized=True))
+
+    # Resulting array is compose of first and last point, plus
+    # splitter points in the middle
+    all_points = [Point(rs.coords[0])]
+    all_points += mp_array
+    all_points += [Point(rs.coords[-1])]
+    return all_points
+
+
+def generate_stop_ids(stops_count: int) -> List[str]:
+    shape_name = _generate_random_name(5)
+    stop_names = []
+    for i in range(stops_count):
+        stop_names.append('_'.join([shape_name, i]))
+    return stop_names
+
+
+def generate_nodes_df(
+        stop_ids: List[str],
+        all_points: List[Point],
+        headway: float) -> pd.DataFrame:
+    avg_costs = []
+    stop_lats = []
+    stop_lons = []
+
+    default_avg_cost = headway/2
+
+    for point in all_points:
+        avg_costs.append(default_avg_cost)
+        stop_lats.append(point.x)
+        stop_lons.append(point.y)
+
+    nodes_df = pd.DataFrame({
+        'stop_id': stop_ids,
+        'avg_cost': avg_costs,
+        'stop_lat': stop_lats,
+        'stop_lon': stop_lons,
+    })
+
+    return nodes_df
+
+
+def generate_edges_df(
+        stop_ids: List[str],
+        all_points: List[Point],
+        chunks: List[LineString],
+        avg_speed: float) -> pd.DataFrame:
+    from_stop_ids = []
+    to_stop_ids = []
+    edge_costs = []
+
+    paired_nodes = list(zip(stop_ids[:-1], stop_ids[1:]))
+
+    # Sanity check
+    if not len(chunks) == len(paired_nodes):
+        raise Exception('Chunking operation did not result '
+                        'correct route shape subdivisions.')
+
+    for i, nodes in enumerate(paired_nodes):
+        point_a = nodes[0]
+        point_b = nodes[1]
+        from_stop_ids.append(point_a)
+        to_stop_ids.append(point_b)
+
+        # Estimate the amount of time it would
+        # take to traverse this portion of the
+        # route path given the default speed
+        l = clean_chunks[i].length / 1000  # distance in km
+        # Note: Average speed is to be supplied in kmph
+        edge_costs.append(l / avg_speed)
+
+    edges_df = pd.DataFrame({
+        'from_stop_id': from_stop_ids,
+        'to_stop_id': to_stop_ids,
+        'edge_cost': edge_costs,
+    })
+
+    return edges_df
diff --git a/tests/fixtures/synthetic_example.geojson b/tests/fixtures/synthetic_example.geojson
@@ -0,0 +1,55 @@
+{
+    "type": "FeatureCollection",
+    "features": [
+        {
+            "type": "Feature",
+            "properties": {
+                "headway": 15 * 60,  # 15 min in seconds
+                "average_speed": 16,  # 16 kmph, approx. 10 mph
+                "stop_distance_distribution": 402,  # 1/4 mile in meters
+            },
+            "geometry": {
+                "type": "LineString",
+                "coordinates": [
+                    [ -122.29469776153564, 37.8044860626114 ],
+                    [ -122.29392528533934, 37.80426566213625 ],
+                    [ -122.29358196258545, 37.80513878323706 ],
+                    [ -122.29306697845459, 37.80623228927919 ],
+                    [ -122.29246616363524, 37.807647889331456 ],
+                    [ -122.29159712791443, 37.810165386451494 ],
+                    [ -122.29047060012817, 37.81318287932694 ],
+                    [ -122.28938698768616, 37.81620872446383 ],
+                    [ -122.28916168212889, 37.816912194500276 ],
+                    [ -122.28907585144043, 37.81802247694201 ],
+                    [ -122.28902220726013, 37.819548028632376 ],
+                    [ -122.28882908821106, 37.82224309289828 ],
+                    [ -122.28875935077666, 37.823077864855556 ],
+                    [ -122.28878617286682, 37.82341049978138 ],
+                    [ -122.28884518146513, 37.823683810158975 ],
+                    [ -122.2890356183052, 37.82415415588092 ],
+                    [ -122.2892314195633, 37.82452068447801 ],
+                    [ -122.28957742452623, 37.82532576980349 ],
+                    [ -122.28974103927614, 37.82598889927767 ],
+                    [ -122.28969007730483, 37.82657575329904 ],
+                    [ -122.28945404291152, 37.82708209652004 ],
+                    [ -122.28891491889954, 37.827615977659555 ],
+                    [ -122.28801369667053, 37.82809477253903 ],
+                    [ -122.28726267814636, 37.828234597006194 ],
+                    [ -122.28492379188538, 37.82870491372413 ],
+                    [ -122.28534221649169, 37.82985315185641 ],
+                    [ -122.27834701538086, 37.83131066825093 ],
+                    [ -122.27716684341429, 37.83116661268019 ],
+                    [ -122.27407693862915, 37.83062428330309 ],
+                    [ -122.27100849151611, 37.83016668979049 ],
+                    [ -122.26765036582945, 37.829607404976734 ],
+                    [ -122.26542949676514, 37.82929386466607 ],
+                    [ -122.26442098617554, 37.82910743466058 ],
+                    [ -122.26525783538818, 37.82648041632065 ],
+                    [ -122.26620197296141, 37.826751596736095 ],
+                    [ -122.26907730102539, 37.82719226278575 ],
+                    [ -122.26847648620605, 37.82971756747229 ]
+                ]
+            }
+        }
+    ]
+}
diff --git a/tests/test_paths.py b/tests/test_paths.py
@@ -89,6 +89,10 @@ def test_loading_in_invalid_timeframes():
         load_feed_as_graph(feed_1, start, end)
 
 
+def test_synthetic_network():
+    path = fixture('synthetic_example.geojson')
+
+
 def test_feed_to_graph_path():
     path_1 = fixture('caltrain-2017-07-24.zip')
     feed_1 = get_representative_feed(path_1)