### Getting MTA station names, longitude, latitude

In [7]:
import pandas as pd
from sodapy import Socrata

# Unauthenticated client only works with public data sets. Note 'None'
# in place of application token, and no username or password:
client = Socrata("data.ny.gov", None)

# Example authenticated client (needed for non-public datasets):
# client = Socrata(data.ny.gov,
#                  MyAppToken,
#                  username="user@example.com",
#                  password="AFakePassword")

# First 2000 results, returned as JSON from API / converted to Python list of
# dictionaries by sodapy.
results = client.get("39hk-dx4f", limit=2000)

# Convert to pandas DataFrame
results_df = pd.DataFrame.from_records(results)

for index, row in results_df.iterrows():
    station_name = row['stop_name']
    longitude = row['gtfs_longitude']
    latitude = row['gtfs_latitude']
    station_id = row['gtfs_stop_id']

    # print(f"Station: {station_name}, Longitude: {longitude}, Latitude: {latitude}")

stations_df = results_df[['stop_name', 'gtfs_longitude', 'gtfs_latitude','gtfs_stop_id']]
print(stations_df)

stations_dict = stations_df.to_dict(orient='records')
print(stations_dict)



                stop_name gtfs_longitude gtfs_latitude gtfs_stop_id
0    Astoria-Ditmars Blvd     -73.912034     40.775036          R01
1            Astoria Blvd     -73.917843     40.770258          R03
2                   30 Av     -73.921479     40.766779          R04
3                Broadway     -73.925508      40.76182          R05
4                   36 Av     -73.929575     40.756804          R06
..                    ...            ...           ...          ...
491          Prince's Bay     -74.200064     40.525507          S15
492       Pleasant Plains     -74.217847      40.52241          S14
493       Richmond Valley     -74.229141     40.519631          S13
494           Tottenville     -74.251961     40.512764          S09
495           Arthur Kill     -74.242096     40.516578          S11

[496 rows x 4 columns]
[{'stop_name': 'Astoria-Ditmars Blvd', 'gtfs_longitude': '-73.912034', 'gtfs_latitude': '40.775036', 'gtfs_stop_id': 'R01'}, {'stop_name': 'Astoria Blvd', 'gtfs

### Optimize the path we take for objective of minimizing time traveled

In [18]:
import networkx as nx
from ortools.constraint_solver import routing_enums_pb2
from ortools.constraint_solver import pywrapcp
from scipy.optimize import minimize
import numpy as np


def haversine(lat1, lon1, lat2, lon2):
    """
    Calculates great circle distance in meters
    between two points on Earth in decimal degrees
    """
    lat1 = float(lat1)
    lon1 = float(lon1)
    lat2 = float(lat2)
    lon2 = float(lon2)
    R = 6371000
    phi1, phi2 = np.radians(lat1), np.radians(lat2)
    delta_phi = np.radians(lat2 - lat1)
    delta_lambda = np.radians(lon2 - lon1)
    a = np.sin(delta_phi/2.0) ** 2 + np.cos(phi1) * np.cos(phi2) * np.sin(delta_lambda/2.0) ** 2
    c = 2 * np.arctan2(np.sqrt(a), np.sqrt(1-a))
    meters = R*c
    miles = meters * 0.000621371  # Convert meters to miles
    return miles

def calculate_travel_time(distance):
    speed_mph = 17.4 # Average MTA subway speed
    travel_time_hours = distance / speed_mph
    return travel_time_hours * 60.0 # Hours to minutes

def create_distance_matrix(stations_data):
    num_stations = len(stations_data)
    stations_data = pd.DataFrame(stations_data)
    distance_matrix = np.zeros((num_stations,num_stations))

    for i in range(num_stations):
        # Access data using column names
        from_lat = stations_data.loc[i, 'gtfs_latitude']
        from_lng = stations_data.loc[i, 'gtfs_longitude']
        for j in range(num_stations):
            if i != j:
                to_lat = stations_data.loc[j, 'gtfs_latitude']
                to_lng = stations_data.loc[j, 'gtfs_longitude']
                distance = haversine(from_lat, from_lng, to_lat, to_lng)
                distance_matrix[i, j] = distance

    return distance_matrix
                
    return distance_matrix

def find_nearest_neighbor(distance_matrix,current_index,visited):
    min_distance = float('inf')
    nearest_index = -1
    for i, distance in enumerate(distance_matrix[current_index]):
        if i not in visited and distance < min_distance:
            min_distance = distance
            nearest_index = i
    return nearest_index

def solve_tsp_knn(stations_data):
    distance_matrix = create_distance_matrix(stations_data)
    num_stations = len(stations_data)

    start_index = 0
    visited = set()
    route = [start_index]
    visited.add(start_index)

    current_index = start_index
    while len(visited) < num_stations:
        nearest_index = find_nearest_neighbor(distance_matrix,current_index,visited)
        route.append(nearest_index)
        visited.add(nearest_index)
        current_index = nearest_index

    return route

def print_route_with_distances(stations_data,optimized_route):
    total_distance = 0
    total_time = 0
    output_lines = []
    output_lines.append("Optimized Route with Distances and Times in Miles and Minutes: ")

    for i in range(len(optimized_route)-1):
        from_station = stations_data[optimized_route[i]]['stop_name']
        to_station = stations_data[optimized_route[i+1]]['stop_name']

        from_lat, from_lng = stations_data[optimized_route[i]]['gtfs_latitude'],stations_data[optimized_route[i]]['gtfs_longitude']
        to_lat, to_lng = stations_data[optimized_route[i+1]]['gtfs_latitude'],stations_data[optimized_route[i+1]]['gtfs_longitude']

        distance = haversine(from_lat,from_lng,to_lat,to_lng)

        travel_time = calculate_travel_time(distance)

        total_distance += distance
        total_time += travel_time

        output_lines.append(f"From {from_station} to {to_station}: {distance:.2f} miles, {travel_time:.2f} minutes")

    output_lines.append(f"Total distance traveled: {total_distance:.2f} miles")
    output_lines.append(f"Total time traveled: {total_time:.2f} minutes")

    for line in output_lines:
        print(line)

def main_knn():
    stations_data_list = [dict(row) for _,row in pd.DataFrame(stations_df).iterrows()]
    optimized_route_indices = solve_tsp_knn(stations_data_list)
    print_route_with_distances(stations_data_list,optimized_route_indices)

if __name__ == "__main__":
    main_knn()

    




Optimized Route with Distances and Times in Miles and Minutes: 
From Astoria-Ditmars Blvd to Astoria Blvd: 0.45 miles, 1.55 minutes
From Astoria Blvd to 30 Av: 0.31 miles, 1.06 minutes
From 30 Av to Broadway: 0.40 miles, 1.39 minutes
From Broadway to 36 Av: 0.41 miles, 1.40 minutes
From 36 Av to 39 Av-Dutch Kills: 0.32 miles, 1.10 minutes
From 39 Av-Dutch Kills to 36 St: 0.22 miles, 0.74 minutes
From 36 St to Queens Plaza: 0.49 miles, 1.69 minutes
From Queens Plaza to Queensboro Plaza: 0.19 miles, 0.66 minutes
From Queensboro Plaza to Queensboro Plaza: 0.00 miles, 0.00 minutes
From Queensboro Plaza to 21 St-Queensbridge: 0.29 miles, 0.99 minutes
From 21 St-Queensbridge to Court Sq-23 St: 0.47 miles, 1.62 minutes
From Court Sq-23 St to Court Sq: 0.07 miles, 0.24 minutes
From Court Sq to Court Sq: 0.08 miles, 0.28 minutes
From Court Sq to 21 St: 0.35 miles, 1.22 minutes
From 21 St to Hunters Point Av: 0.13 miles, 0.46 minutes
From Hunters Point Av to Vernon Blvd-Jackson Av: 0.25 miles, 0

In [22]:
import googlemaps
import pandas as pd
import folium
import polyline
from IPython.display import display

# Your Google Maps API key
API_KEY = 'AIzaSyDmno08HDL11dIRev5_tGNjLUfoOGKfB3g'

# Initialize the Google Maps client
gmaps = googlemaps.Client(key=API_KEY)

def convert_to_optimized_route_format(stations_dict):
    optimized_route = []
    for station in stations_dict:
        optimized_route.append({'name': station['stop_name'], 'lat': station['gtfs_latitude'], 'lng': station['gtfs_longitude']})
    return optimized_route

optimized_route = convert_to_optimized_route_format(stations_dict)
print(optimized_route)

# Function to get the polyline for the path between two points
def get_polyline(start, end, api_key):
    directions_result = gmaps.directions(start, end, mode="transit", transit_mode="subway")
    if directions_result:
        polyline_str = directions_result[0]['overview_polyline']['points']
        return polyline.decode(polyline_str)
    else:
        return []

# Function to plot the entire route
def plot_optimized_route(optimized_route, api_key):
    # Create a Folium map centered around the first station
    first_station = optimized_route[0]
    map_center = (first_station['lat'], first_station['lng'])
    my_map = folium.Map(location=map_center, zoom_start=13)

    # Iterate through the optimized route and plot the path
    for i in range(len(optimized_route) - 1):
        start = (optimized_route[i]['lat'], optimized_route[i]['lng'])
        end = (optimized_route[i + 1]['lat'], optimized_route[i + 1]['lng'])
        path = get_polyline(start, end, api_key)
        
        if path:
            folium.PolyLine(locations=path, color='blue', weight=5, opacity=0.7).add_to(my_map)
        
        # Add markers for the start and end stations
        folium.Marker(location=start, popup=optimized_route[i]['name']).add_to(my_map)
        folium.Marker(location=end, popup=optimized_route[i + 1]['name']).add_to(my_map)

    # Display the map
    display(my_map)

# Plot the optimized route
plot_optimized_route(optimized_route, API_KEY)

[{'name': 'Astoria-Ditmars Blvd', 'lat': '40.775036', 'lng': '-73.912034'}, {'name': 'Astoria Blvd', 'lat': '40.770258', 'lng': '-73.917843'}, {'name': '30 Av', 'lat': '40.766779', 'lng': '-73.921479'}, {'name': 'Broadway', 'lat': '40.76182', 'lng': '-73.925508'}, {'name': '36 Av', 'lat': '40.756804', 'lng': '-73.929575'}, {'name': '39 Av-Dutch Kills', 'lat': '40.752882', 'lng': '-73.932755'}, {'name': 'Lexington Av/59 St', 'lat': '40.76266', 'lng': '-73.967258'}, {'name': '5 Av/59 St', 'lat': '40.764811', 'lng': '-73.973347'}, {'name': '57 St-7 Av', 'lat': '40.764664', 'lng': '-73.980658'}, {'name': '49 St', 'lat': '40.759901', 'lng': '-73.984139'}, {'name': 'Times Sq-42 St', 'lat': '40.754672', 'lng': '-73.986754'}, {'name': '34 St-Herald Sq', 'lat': '40.749567', 'lng': '-73.98795'}, {'name': '28 St', 'lat': '40.745494', 'lng': '-73.988691'}, {'name': '23 St', 'lat': '40.741303', 'lng': '-73.989344'}, {'name': '14 St-Union Sq', 'lat': '40.735736', 'lng': '-73.990568'}, {'name': '8 St