In [3]:
import re
import logging
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import geopandas as gpd
from shapely.geometry import Point, LineString
import networkx as nx
#import gurobipy as gb
#from keplergl import KeplerGl
from gtfs_functions import Feed
from sklearn.metrics.pairwise import manhattan_distances

In [4]:
logging.getLogger().setLevel(logging.WARNING)

In [5]:
segments = pd.read_csv("segments.csv")
stops = pd.read_csv("stops.csv")

segments[['route_id', 'start_stop_id', 'end_stop_id']] = segments[['route_id', 'start_stop_id', 'end_stop_id']].astype(str)

In [6]:
def get_segment_stops(segment_df, route_id, direction_id=0):
    segment_distance = segment_df.query(
        "route_id == @route_id & direction_id == @direction_id"
    )

    stops_in_route = stops[
        stops["stop_id"].isin(
            list(
                set(
                    segment_distance[["start_stop_id", "end_stop_id"]].values.reshape(
                        -1
                    )
                )
            )
        )
    ]

    return segment_distance, stops_in_route

In [7]:
routes = ["35", "61", "15", "51", "18", "107", "144", "74", "12"]
route_details = {}

for r in routes:
    detail = get_segment_stops(segments, r, 0)

    route_details[r] = {}
    route_details[r]["segment"] = detail[0]
    route_details[r]["stops"] = detail[1]

In [8]:
stops_df = pd.concat([v["stops"] for k, v in route_details.items()]).drop_duplicates()
print(f"Number of total stops in routes: {len(stops_df)}")
stops_df.head()

Number of total stops in routes: 270


Unnamed: 0,stop_id,stop_code,stop_name,stop_lat,stop_lon,stop_url,location_type,parent_station,wheelchair_boarding,geometry
422,61743,61743,Notre-Dame / Guy,45.489959,-73.567233,https://www.stm.info/fr/recherche#stq=61743,0,,1,POINT (-73.567233 45.489959)
423,61744,61744,de la Montagne / Ottawa,45.492014,-73.561145,https://www.stm.info/fr/recherche#stq=61744,0,,1,POINT (-73.561145 45.492014)
424,61745,61745,de la Montagne / du Square-Gallery,45.491756,-73.558727,https://www.stm.info/fr/recherche#stq=61745,0,,1,POINT (-73.558727 45.491756)
438,61765,61765,Wellington / Prince,45.496656,-73.555554,https://www.stm.info/fr/recherche#stq=61765,0,,1,POINT (-73.555554 45.496656)
591,62063,62063,Notre-Dame / Bérard,45.480191,-73.579727,https://www.stm.info/fr/recherche#stq=62063,0,,1,POINT (-73.579727 45.480191)


## Visualize stops on a map

In [9]:
stops_df["geometry"] = stops_df.apply(
    lambda x: Point((float(x.stop_lon), float(x.stop_lat))), axis=1
)

stops_df_gpd = gpd.GeoDataFrame(stops_df, geometry="geometry").drop(
    columns=["location_type", "parent_station", "wheelchair_boarding"]
)

stops_df_gpd.head()

Unnamed: 0,stop_id,stop_code,stop_name,stop_lat,stop_lon,stop_url,geometry
422,61743,61743,Notre-Dame / Guy,45.489959,-73.567233,https://www.stm.info/fr/recherche#stq=61743,POINT (-73.56723 45.48996)
423,61744,61744,de la Montagne / Ottawa,45.492014,-73.561145,https://www.stm.info/fr/recherche#stq=61744,POINT (-73.56114 45.49201)
424,61745,61745,de la Montagne / du Square-Gallery,45.491756,-73.558727,https://www.stm.info/fr/recherche#stq=61745,POINT (-73.55873 45.49176)
438,61765,61765,Wellington / Prince,45.496656,-73.555554,https://www.stm.info/fr/recherche#stq=61765,POINT (-73.55555 45.49666)
591,62063,62063,Notre-Dame / Bérard,45.480191,-73.579727,https://www.stm.info/fr/recherche#stq=62063,POINT (-73.57973 45.48019)


In [10]:
random_stops_df_gpd = stops_df_gpd.sample(frac=0.2, random_state=421)

random_stops_df_gpd.head()

Unnamed: 0,stop_id,stop_code,stop_name,stop_lat,stop_lon,stop_url,geometry
423,61744,61744,de la Montagne / Ottawa,45.492014,-73.561145,https://www.stm.info/fr/recherche#stq=61744,POINT (-73.56114 45.49201)
2933,52517,52517,Station Place-des-Arts (Président-Kennedy / De...,45.507283,-73.569604,https://www.stm.info/fr/recherche#stq=52517,POINT (-73.56960 45.50728)
2931,52515,52515,Beaubien / de l'Assomption,45.576121,-73.567835,https://www.stm.info/fr/recherche#stq=52515,POINT (-73.56784 45.57612)
7030,56629,56629,Wellington / Rielle,45.459863,-73.567201,https://www.stm.info/fr/recherche#stq=56629,POINT (-73.56720 45.45986)
2823,52399,52399,Sainte-Catherine / Peel,45.499893,-73.572949,https://www.stm.info/fr/recherche#stq=52399,POINT (-73.57295 45.49989)


## Calculating the distance between stops using osmnx

### Importing libraries

In [11]:
import pandas as pd
import osmnx as ox
import networkx as nx
import numpy as np

### Downloading the street network of random stops area in Montreal

In [12]:
## Getting the north,south,east,west coordinates of the random stops df
north = random_stops_df_gpd["stop_lat"].max()
south = random_stops_df_gpd["stop_lat"].min()
east = random_stops_df_gpd["stop_lon"].max()
west = random_stops_df_gpd["stop_lon"].min()

In [13]:
## Crating a graph from the bounding box
G = ox.graph_from_bbox(north, south, east, west, network_type="drive")

### Calculating the distance between stops

In [35]:
# Initialize an empty matrix
distance_matrix = np.zeros((len(random_stops_df_gpd), len(random_stops_df_gpd)))

# Calculate the shortest path between all the stops in the random_stops_df_gpd
for i in range(len(random_stops_df_gpd)):
    for j in range(len(random_stops_df_gpd)):
        if i == j:
            continue
        else:
            orig_node = ox.distance.nearest_nodes(G, random_stops_df_gpd.iloc[i]['stop_lon'], random_stops_df_gpd.iloc[i]['stop_lat'])
            dest_node = ox.distance.nearest_nodes(G, random_stops_df_gpd.iloc[j]['stop_lon'], random_stops_df_gpd.iloc[j]['stop_lat'])
            # Calculate the shortest path
            try:
                shortest_path_length = nx.shortest_path_length(G, orig_node, dest_node, weight='length')
                distance_matrix[i, j] = shortest_path_length
            except nx.NetworkXNoPath:
                distance_matrix[i, j] = np.inf  # If there is no path between points i and j


In [37]:
distance_matrix = distance_matrix / 1000

# Creating a dataframe with the distance matrix with the stop_id as index and columns
distance_matrix_df = pd.DataFrame(distance_matrix, index=random_stops_df_gpd["stop_id"], columns=random_stops_df_gpd["stop_id"])
distance_matrix_df.head()

stop_id,61744,52517,52515,56629,52399,56627,51214,54136,56584,51146,...,52364,50799,56184,51805,54638,52186,52197,52887,60748,54398
stop_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
61744,0.0,2.44811,11.182902,3.904204,1.547278,4.093335,5.643907,8.685075,4.691903,5.678729,...,3.747441,7.520025,6.357288,8.507897,13.825617,10.325461,2.054784,1.781254,5.5027,6.981556
52517,2.444694,0.0,8.824372,6.178968,1.023427,6.368099,5.306529,6.326545,6.591552,5.341351,...,1.388911,7.182647,4.231152,6.149367,11.467087,7.966931,1.873402,4.056018,7.777464,4.623026
52515,11.193669,8.780232,0.0,14.945875,9.753825,15.135006,10.300127,3.374647,15.287781,11.837043,...,8.112645,13.803061,9.132763,3.551825,2.735666,1.474324,10.602744,12.822925,16.544371,6.337
56629,3.912054,6.237851,14.972643,0.0,5.342794,0.189131,8.104213,12.474816,0.788,6.573098,...,7.537182,7.131141,9.377308,12.297638,17.615358,14.115202,4.429139,2.120063,4.147337,10.712529
52399,1.880621,1.23666,9.774052,5.524021,0.0,5.713152,4.669068,7.276225,5.817379,4.70389,...,2.338591,6.545186,4.830314,7.099047,12.416767,8.916611,1.099229,3.651045,7.288926,5.472739


## FINAL FUNCTION TO GET THE DISTANCE BETWEEN STOPS OF RANDOM ROUTES

In [41]:
## Now Defininf the function which builds the distance matrix and take in the random stops df as input
def build_distance_matrix(random_stops_df_gpd):
    ## Getting the north,south,east,west coordinates of the random stops df
    north = random_stops_df_gpd["stop_lat"].max()
    south = random_stops_df_gpd["stop_lat"].min()
    east = random_stops_df_gpd["stop_lon"].max()
    west = random_stops_df_gpd["stop_lon"].min()

    ## Crating a graph from the bounding box
    G = ox.graph_from_bbox(north, south, east, west, network_type="drive")

    # Initialize an empty matrix
    distance_matrix = np.zeros((len(random_stops_df_gpd), len(random_stops_df_gpd)))

    # Calculate the shortest path between all the stops in the random_stops_df_gpd
    for i in range(len(random_stops_df_gpd)):
        for j in range(len(random_stops_df_gpd)):
            if i == j:
                continue
            else:
                orig_node = ox.distance.nearest_nodes(G, random_stops_df_gpd.iloc[i]['stop_lon'], random_stops_df_gpd.iloc[i]['stop_lat'])
                dest_node = ox.distance.nearest_nodes(G, random_stops_df_gpd.iloc[j]['stop_lon'], random_stops_df_gpd.iloc[j]['stop_lat'])
                # Calculate the shortest path
                try:
                    shortest_path_length = nx.shortest_path_length(G, orig_node, dest_node, weight='length')
                    distance_matrix[i, j] = shortest_path_length
                except nx.NetworkXNoPath:
                    distance_matrix[i, j] = np.inf  # If there is no path between points i and j

    ## Converting the distance matrix from meters to kilometers
    distance_matrix = distance_matrix / 1000

    # Creating a dataframe with the distance matrix with the stop_id as index and columns
    distance_matrix_df = pd.DataFrame(distance_matrix, index=random_stops_df_gpd["stop_id"], columns=random_stops_df_gpd["stop_id"])

    return distance_matrix_df