In [244]:
import geopandas as gp
import pandas as pd
import folium as fol
import matplotlib.pyplot as plt
import seaborn as sns
from folium.plugins import MarkerCluster
from shapely.geometry import Point, LineString, MultiLineString
from shapely import Geometry
import numpy as np
import pyproj as prj

#### Preprocessing:

##### MBTA Arc Shape File:

In [80]:
arc_gdf = gp.read_file('geodata/MBTA_ARC.shp')

# Remove Silver Line
arc_gdf.drop(arc_gdf[arc_gdf.LINE == 'SILVER'].index, inplace=True)
arc_gdf

Unnamed: 0,LINE,ROUTE,GRADE,SHAPE_LEN,geometry
0,ORANGE,Forest Hills to Oak Grove,1,1342.326419,"LINESTRING (234806.058 905421.773, 234805.768 ..."
1,ORANGE,Forest Hills to Oak Grove,4,368.215569,"LINESTRING (234806.833 905053.560, 234806.833 ..."
2,ORANGE,Forest Hills to Oak Grove,1,3010.706909,"LINESTRING (235735.315 902429.206, 235694.225 ..."
3,GREEN,C - Cleveland Circle,2,3803.501983,"LINESTRING (228902.422 898464.344, 229190.733 ..."
4,GREEN,D E,4,1621.644873,"LINESTRING (235839.891 901789.977, 235797.490 ..."
...,...,...,...,...,...
119,RED,B - Braintree C - Alewife,4,356.328195,"LINESTRING (236938.312 896104.302, 236939.741 ..."
137,GREEN,D - Union Square,4,906.641859,"LINESTRING (234518.634 902706.022, 234485.866 ..."
138,GREEN,E - Medford/Tufts,1,4710.683462,"MULTILINESTRING ((234241.720 903021.703, 23424..."
139,GREEN,E - Medford/Tufts,4,422.992560,"LINESTRING (234518.634 902706.022, 234518.353 ..."


##### MBTA Node Shape File:

In [235]:
node_gdf = gp.read_file('geodata/MBTA_NODE.shp')
node_gdf.drop(node_gdf[node_gdf.LINE == 'SILVER'].index, inplace=True)
node_gdf['STATION_lower'] = node_gdf.STATION.apply(str.lower)

stop_names = set(node_gdf.STATION.array)
print(stop_names)
node_gdf

{'Kenmore', 'Dean Road', 'Eliot', 'Jackson Square', 'Butler', 'Roxbury Crossing', 'Central', 'Boylston', 'Waban', 'Summit Avenue', 'Woodland', 'Hawes Street', 'Malden Center', 'Longwood Medical Area', 'Union Square', 'Wonderland', 'Science Park/West End', 'Wellington', 'Massachusetts Ave', 'Orient Heights', 'South Street', 'Griggs Street', 'Bowdoin', 'Charles/MGH', 'Tappan Street', 'Beachmont', 'Stony Brook', 'Washington Square', 'Riverside', 'State', 'North Quincy', 'Copley', 'Downtown Crossing', 'Fenway', 'Saint Paul Street', 'Aquarium', 'Medford/Tufts', 'Fairbanks Street', 'Porter', 'Quincy Adams', 'Boston College', 'JFK/UMass', 'Babcock Street', 'Maverick', 'Northeastern', 'Braintree', 'Blandford Street', 'Community College', 'Shawmut', 'Harvard', 'South Station', 'Haymarket', 'Fenwood Road', 'East Somerville', 'Amory Street', 'Park Street', 'Airport', 'Ruggles', 'Longwood', 'Chestnut Hill', 'Heath Street', 'Central Avenue', 'Wood Island', 'Chiswick Road', 'Back Of The Hill', 'Cool

Unnamed: 0,STATION,LINE,TERMINUS,ROUTE,geometry,STATION_lower
0,Park Street,GREEN/RED,N,GREEN B C D E / RED A - Ashmont B - Braintree...,POINT (236064.005 900737.761),park street
1,JFK/UMass,RED,N,A - Ashmont B - Braintree C - Alewife,POINT (236899.089 896780.048),jfk/umass
2,State,BLUE/ORANGE,N,BLUE Bowdoin to Wonderland / ORANGE Forest Hil...,POINT (236427.189 901016.630),state
3,Roxbury Crossing,ORANGE,N,Forest Hills to Oak Grove,POINT (233318.703 897936.590),roxbury crossing
42,Hynes Convention Center,GREEN,N,B C D,POINT (234003.161 899802.946),hynes convention center
...,...,...,...,...,...,...
165,Newton Highlands,GREEN,N,D - Riverside,POINT (224275.009 896915.911),newton highlands
166,Eliot,GREEN,N,D - Riverside,POINT (223362.442 896537.835),eliot
167,Woodland,GREEN,N,D - Riverside,POINT (221158.522 898069.784),woodland
168,Riverside,GREEN,Y,D - Riverside,POINT (220391.713 898574.647),riverside


In [268]:
input_prj = prj.Proj(open('geodata/MBTA_NODE.prj').read())
transformer = prj.Transformer.from_proj(input_prj, input_prj.to_latlong())

def to_lat_long(x_in, y_in):
    return transformer.transform(x_in, y_in)

In [140]:
stops_df = pd.read_csv('mbta_gtfs/stops.txt')

In [262]:
def find_line_from_points(point1, point2):
    lines_dist_to_pts = arc_gdf.geometry.apply(lambda line: line.distance(point1) + line.distance(point2))
    index = np.argmin(lines_dist_to_pts)
    return arc_gdf.iloc[index]['geometry']

def get_line_location_from_stop_id(stop_id:str):
    station_1_id, station_2_id = stop_id.split(' | ')
    station_1_point = get_location_from_stop_id(station_1_id)
    station_2_point = get_location_from_stop_id(station_2_id)

    line = find_line_from_points(station_1_point, station_2_point)
    station_1_dist = line.line_locate_point(station_1_point)
    station_2_dist = line.line_locate_point(station_2_point)
    distances_list = [station_1_dist, station_2_dist]
    distances_list.sort()

    random_dist = np.random.randint(distances_list[0], distances_list[1])

    return line.line_interpolate_point(random_dist)

def get_location_from_stop_id(stop_id):
    if '|' in stop_id:
        return get_line_location_from_stop_id(stop_id)
    
    name = stop_name_from_stop_id(stop_id)
    if name.lower() not in set(node_gdf['STATION_lower'].array):
        if "'" in name: name = name.replace("'", '')
        elif 'Ave' in name: name = name[: -3]
        elif name == 'Northeastern University': name = 'northeastern'
    geometry = node_gdf.geometry[node_gdf['STATION_lower'] == name.lower()].values[0]
    return geometry

def stop_name_from_stop_id(stop_id):
    return stops_df.stop_name[stops_df['stop_id'] == stop_id].values[0]

In [269]:
base_map = arc_gdf.explore(
    color=arc_gdf['LINE'],
    tooltip='LINE',
    popup=['LINE', 'ROUTE']
)
node_gdf.explore(
    m=base_map,
    color='black',
    tooltip='STATION',
    popup=['STATION', 'LINE', 'ROUTE']
)

In [284]:
sr_df = pd.read_csv('speed_restrictions.csv')

speed_restriction_markers = {}

cluster_dict = {}

for stop_id in set(sr_df.Loc_GTFS_Stop_ID.array):
    cluster = MarkerCluster(options={'singleMarkerMode':True}).add_to(base_map)
    cluster_dict[stop_id] = cluster

for i in range(len(sr_df)):
    # Attributes for tags
    speed = sr_df.iloc[i]['Restriction_Speed_MPH']
    distance = sr_df.iloc[i]['Restriction_Distance_Feet']
    pct = sr_df.iloc[i]['Line_Restricted_Track_Pct']
    length = sr_df.iloc[i]['Restriction_Length_Days']
    start_date = sr_df.iloc[i]['start_date']
    end_date = sr_df.iloc[i]['end_date']
    loc_type = sr_df.iloc[i]['Location_Type']
    line = str(sr_df.iloc[i]['Line'])
    stop_id = sr_df.iloc[i]['Loc_GTFS_Stop_ID']

    # Determine Location for marker
    location_point = get_location_from_stop_id(stop_id)
    location = location_point.coords.xy[0][0], location_point.coords.xy[1][0]
    converted_long, converted_lat = to_lat_long(location[0], location[1])

    # Create Marker
    marker = fol.Marker(
        location=[converted_lat, converted_long],
        popup = fol.Popup(f'Restriction Speed: {speed}\nRestriction Distance: {distance}\nPercent of Line Restricted: {pct * 100 : 3}%\nRestriction Length: {length} ({start_date} to {end_date})'),
        tags=[speed, distance, pct, length, start_date, end_date, loc_type],
        icon=fol.Icon(
            color=line.split()[0].lower(),
            icon='triangle-exclamation',
            prefix='fa'
            )
    )

    cluster = cluster_dict[stop_id]
    marker.add_to(cluster)

In [286]:
filter = fol.plugins.TagFilterButton(['10', '11', '12', '13']).add_to(base_map)

In [287]:

base_map