In [1]:
import pandas as pd 
import numpy as np
import matplotlib.pyplot as plt 
import seaborn as sns 
import plotly.graph_objects as go
import folium
from folium.plugins import FastMarkerCluster
from folium.plugins import HeatMap
from matplotlib.sankey import Sankey
import utm 
import os
import requests
import json
from scipy.spatial.distance import cdist
import matplotlib.cm as cm
import matplotlib.colors as colors


In [2]:
folder_path = os.path.join(os.getcwd(), "data", "Subway_Capacities.xlsx")
subway= pd.read_excel(folder_path)
print('subway available')

%run functions.ipynb

folder_path = os.path.join(os.getcwd(), "data", "2019")
file_names = os.listdir(folder_path)
chunks = []
for file_name in file_names:
    if file_name.endswith(".pkl.gz"):
        file_path = os.path.join(folder_path, file_name)
        chunk = pd.read_pickle(file_path, compression='gzip')
        chunks.append(chunk)

# Concatenate all DataFrame chunks into a single DataFrame
data_2019 = pd.concat(chunks, ignore_index=True)

print("Accessible dataframes    Description                            # of columns  ")
print("data_2019                Gives data for 2019-Baseline           15")


url = 'https://gbfs.lyft.com/gbfs/2.3/bkn/en/station_information.json'
response = requests.get(url)
if response.status_code == 200:
    # Parse the JSON response
    json_data = response.json()
    station_data = pd.DataFrame(json_data['data']['stations'])
    bike_stations = station_data[['short_name', 'name', 'region_id', 'lat', 'lon', 'capacity']]
    print('bike_stations Available')
else:
    print('Failed to retrieve data:', response.status_code)





subway available
Accessible dataframes    Description                            # of columns  
data_2019                Gives data for 2019-Baseline           15
bike_stations Available


In [3]:
April=data_2019[(data_2019['starttime'].dt.month == 4)]
September=data_2019[(data_2019['starttime'].dt.month == 9)]

cd = data_2019

In [4]:
s_stat = data_2019.iloc[:,3:7].drop_duplicates().rename(columns={'start station id': "ID", 'start station name': "Name", 'start station latitude': "Latitude", 'start station longitude': "Longitude"})
e_stat = data_2019.iloc[:,7:11].drop_duplicates().rename(columns={'end station id': "ID", 'end station name': "Name", 'end station latitude': "Latitude", 'end station longitude': "Longitude"})
stations = pd.concat([s_stat,e_stat]).drop_duplicates()
stations = stations[stations['Latitude'] != 0]
print(len(stations))
stations.tail()

1107


Unnamed: 0,ID,Name,Latitude,Longitude
17111921,3272.0,Jersey & 3rd,40.723332,-74.045953
17190239,3273.0,Manila & 1st,40.721651,-74.042884
19032094,3187.0,Warren St,40.721124,-74.038051
20208636,212.0,W 16 St & The High Line,40.743,-74.007
20548266,3062.0,Myrtle Ave & Marcy Ave,40.695,-73.95


In [17]:
def flow_data(cd,stations,Start,End):
    station_start_count = []
    station_end_count = []
    for station_name in stations['Name']:
        s_count = len(cd[(cd['start station name'] == station_name) & (cd['starttime'].dt.hour >=Start) & (cd['starttime'].dt.hour <=End)] )
        station_start_count.append(s_count)
        e_count = len(cd[(cd['end station name'] == station_name) & (cd['starttime'].dt.hour >=Start) & (cd['starttime'].dt.hour <=End)])
        station_end_count.append(e_count)
    stations['Start'] = station_start_count
    stations['End'] = station_end_count
    return stations
    

In [20]:
cd = September
station_start_count = []
station_end_count = []
for station_name in stations['Name']:
    s_count = len(cd[(cd['start station name'] == station_name) & (cd['starttime'].dt.hour >=7) & (cd['starttime'].dt.hour <=9)] )
    station_start_count.append(s_count)
    e_count = len(cd[(cd['end station name'] == station_name) & (cd['starttime'].dt.hour >=7) & (cd['starttime'].dt.hour <=9)])
    station_end_count.append(e_count)
stations['AM Start'] = station_start_count
stations['AM End'] = station_end_count


In [19]:
#flow_data(September,0,23)
#stations['Start'] = station_start_count
#stations['End'] = station_end_count

#flow_data(September,stations,7,9)
#stations['AM Start'] = station_start_count
#stations['AM End'] = station_end_count

#flow_data(September,17,19)
#stations['PM Start'] = station_start_count
#stations['PM End'] = station_end_count

In [None]:
from math import sin, cos, sqrt, atan2, radians
def distance_lat_long(lat1, lon1, lat2, lon2):
    R = 6371000.0

    lat1_rad = radians(lat1)
    lon1_rad = radians(lon1)
    lat2_rad = radians(lat2)
    lon2_rad = radians(lon2)

    dlat = lat2_rad - lat1_rad
    dlon = lon2_rad - lon1_rad

    a = sin(dlat / 2)**2 + cos(lat1_rad) * cos(lat2_rad) * sin(dlon / 2)**2
    c = 2 * atan2(sqrt(a), sqrt(1 - a))
    distance = R * c

    return distance


In [5]:

subway.head(2)


Unnamed: 0,Station,Lat,Long,Footfall
0,Far Rockaway-Mott Av,40.603995,-73.755405,10118.5748
1,Beach 25th St,40.600066,-73.761353,5846.7638


In [6]:
#station_data.head()
bike_stations.sort_values('capacity')

Unnamed: 0,short_name,name,region_id,lat,lon,capacity
86,4920.13,South St & Broad St,71,40.701889,-74.010899,3
2181,HB409,Clinton St & Newark St,311,40.737430,-74.035710,14
2184,JC008,Newport Pkwy,70,40.728745,-74.032108,14
2133,JC099,Montgomery St,70,40.719420,-74.050990,14
2187,JC080,Leonard Gordon Park,70,40.745910,-74.057271,14
...,...,...,...,...,...,...
1290,6847.02,Broadway & W 56 St,71,40.765265,-73.981923,117
1131,5279.03,Centre St & Worth St,71,40.714948,-74.002345,120
820,6072.16,Gansevoort St & Hudson St,,40.739376,-74.005208,120
80,6432.11,E 40 St & Park Ave,71,40.750756,-73.978326,123


In [7]:
def euclid_distance(lat1, lon1, lat2, lon2):
    lat1, lon1, lat2, lon2 = map(np.radians, [lat1, lon1, lat2, lon2])
    dlat = lat2 - lat1
    dlon = lon2 - lon1
    distance = (np.sqrt(dlat**2 + dlon**2) )* 6371000 # Euclidean distance formula
    #distance_in_meters = distance * 6371000  # Convert distance to meters
    return distance

In [8]:
total_capacities = []
for _, subway_station in subway.iterrows():
    distances = bike_stations.apply(lambda row: euclid_distance(subway_station['Lat'], subway_station['Long'], row['lat'], row['lon']), axis=1)
    nearby_bike_stations = bike_stations[distances <= 100]  # 100m = 0.1 km
    total_capacity = nearby_bike_stations['capacity'].sum()
    total_capacities.append(total_capacity)

subway['Bike_Capacity_100m'] = total_capacities
print(subway)

                  Station        Lat       Long    Footfall  \
0    Far Rockaway-Mott Av  40.603995 -73.755405  10118.5748   
1           Beach 25th St  40.600066 -73.761353   5846.7638   
2           Beach 36th St  40.595398 -73.768175   5846.7638   
3           Beach 44th St  40.592943 -73.776013   5846.7638   
4        Jamaica-179th St  40.712646 -73.783817  11237.5827   
..                    ...        ...        ...         ...   
468               59th St  40.641362 -74.017881  22166.8032   
469          Bay Ridge Av  40.634967 -74.023377   6564.7205   
470               77th St  40.629742 -74.025510  22166.8032   
471               86th St  40.622687 -74.028398  22166.8032   
472               95th St  40.616622 -74.030876  22166.8032   

     Bike_Capacity_100m  
0                     0  
1                     0  
2                     0  
3                     0  
4                     0  
..                  ...  
468                  35  
469                   0  
470      

In [9]:
subway.sort_values('Bike_Capacity_100m')

Unnamed: 0,Station,Lat,Long,Footfall,Bike_Capacity_100m
0,Far Rockaway-Mott Av,40.603995,-73.755405,10118.5748,0
283,Eastern Parkway-Brooklyn Museum,40.671987,-73.964375,3366.5787,0
280,68th St-Hunter College,40.768141,-73.963870,22166.8032,0
279,Church Av,40.650527,-73.962982,3246.2165,0
278,Newkirk Av,40.635082,-73.962793,4735.5788,0
...,...,...,...,...,...
454,Chambers St,40.715478,-74.009266,4472.6496,126
456,World Trade Center,40.712582,-74.009781,16683.8937,139
370,34th St,40.749567,-73.987950,22166.8032,145
329,Grand Central,40.752769,-73.979189,7348.8583,148


In [11]:
subway['footfall_norm'] =0.5 + (subway['Footfall']/2*subway['Footfall'].max())
subway['capacity_norm'] = subway['Bike_Capacity_100m'] / subway['Bike_Capacity_100m'].max()

mymap = folium.Map(location=[40.730610, -73.935242], zoom_start=12)
colormap = cm.magma

for idx, station in subway.iterrows():
    # Extract latitude and longitude values for each subway station
    lat = station['Lat']
    lon = station['Long']
    color = colors.rgb2hex(colormap(station['capacity_norm']))
    folium.CircleMarker(location=[lat, lon],
                        radius=15 * station['footfall_norm'],  # Adjust the size of each marker
                        fill=True,
                        fill_opacity=0.8,  # Using 'footfall_norm' for opacity
                        fill_color=color,  # Marker color without outline
                        color=None,  # Remove the marker outline
                        ).add_to(mymap)
    

# Display the map
mymap