In [47]:
import pandas as pd 
import numpy as np
import matplotlib.pyplot as plt 
import seaborn as sns 

import cartopy.crs as ccrs
import cartopy.feature as cfeature

import folium
from folium.plugins import FastMarkerCluster
from folium.plugins import HeatMap

import utm 

import requests
import json


In [21]:
%run functions.ipynb

In [71]:
def Generate_basemap():
    basemap = folium.Map(location=[40.730610 , -73.935242])
    return basemap

# 1 Data Preparation

#### 1.1 List of Stations (Lat,Lon,Capacity)

In [None]:
# URL of the JSON file
url = 'https://gbfs.lyft.com/gbfs/2.3/bkn/en/station_information.json'

# Send a GET request to the URL
response = requests.get(url)

# Check if the request was successful (status code 200)
if response.status_code == 200:
    # Parse the JSON response
    json_data = response.json()
    
    # Now you can work with the JSON data
    print(json_data)
else:
    print('Failed to retrieve data:', response.status_code)

# Create a DataFrame from the 'stations' list
station_data = pd.DataFrame(json_data['data']['stations'])

# Select only the required columns
station_data = station_data[['short_name', 'name', 'region_id', 'lat', 'lon', 'capacity']]

In [74]:
station_data.iloc[:3]

Unnamed: 0,short_name,name,region_id,lat,lon,capacity
0,6215.04,W 25 St & 6 Ave,71,40.743954,-73.991449,51
1,5216.06,Vesey St & Church St,71,40.71222,-74.010472,48
2,6621.06,31 Ave & 57 St,71,40.757357,-73.904726,23


##### 1.1.1 Station Visualisation

In [72]:
basemap = Generate_basemap()
FastMarkerCluster(station_data[['lat', 'lon' , 'capacity']]).add_to(basemap)

HeatMap(station_data[['lat', 'lon' , 'capacity']]).add_to(basemap)
basemap

In [79]:
# Sample data
locations = station_data['name']
latitudes = station_data['lat']
longitudes = station_data['lon']
sizes = station_data['capacity']
#values = [20, 30, 25, 40]  # Values for the color scale

# Initialize the map centered around the first location
mymap = folium.Map()

# Iterate over locations
for lat, lon, size, location in zip(latitudes, longitudes, sizes, locations):
    # Add circle marker with varying sizes
    folium.CircleMarker(
        location=[lat, lon],
        radius=size / 10,  # Normalize size for better visualization
        color='blue',
        fill=True,
        fill_color='blue',
        fill_opacity=0.6,
        popup=location
    ).add_to(mymap)

# Save the map to an HTML file
mymap.save("nyc_bike_stations.html")

#### 1.2 Ride Data

In [62]:
# Open all files / computing intensive
base_folder_path = 'C:\\Users\\lukas\\OneDrive - Imperial College London\\0_Data\\nyc_bike_rental\\data'
start_year = 2015
end_year = 2024
# combined_data = combine_csv_files_in_years(base_folder_path,start_year,end_year)

In [63]:
combined_data.describe()

Unnamed: 0,Trip Duration,Start Station ID,Start Station Latitude,Start Station Longitude,End Station ID,End Station Latitude,End Station Longitude,Bike ID,Birth Year,Gender,...,end station id,end station latitude,end station longitude,bikeid,birth year,gender,start_lat,start_lng,end_lat,end_lng
count,339620.0,339620.0,339620.0,339620.0,339620.0,339620.0,339620.0,339620.0,313288.0,339620.0,...,1329984.0,1329984.0,1329984.0,1329984.0,1312074.0,1329984.0,2479054.0,2479054.0,2474097.0,2474097.0
mean,962.9629,3206.818214,40.723126,-74.046422,3203.450032,40.722469,-74.045612,24911.866754,1979.186748,1.12272,...,3272.43,40.69809,-74.00202,31984.55,1980.866,1.137917,40.7317,-74.04052,40.73164,-74.04025
std,48685.7,26.610566,0.008139,0.011194,62.60846,0.070325,0.127556,743.745408,9.652899,0.519344,...,165.6609,0.9896964,1.799568,6323.833,10.48134,0.5445743,0.01216913,0.0121306,0.01229517,0.0121282
min,61.0,3183.0,40.69264,-74.096937,147.0,0.0,-74.096937,14552.0,1900.0,0.0,...,79.0,0.0,-74.09694,14529.0,1887.0,0.0,40.67833,-74.08896,40.64,-74.19
25%,247.0,3186.0,40.717732,-74.050656,3186.0,40.71654,-74.050444,24486.0,1974.0,1.0,...,3191.0,40.71773,-74.05039,29198.0,1973.0,1.0,40.7198,-74.04629,40.71959,-74.04595
50%,384.0,3202.0,40.721525,-74.044247,3199.0,40.721124,-74.043845,24602.0,1981.0,1.0,...,3205.0,40.71959,-74.04312,29532.0,1983.0,1.0,40.73117,-74.03798,40.73101,-74.03798
75%,656.0,3211.0,40.727596,-74.038051,3211.0,40.727224,-74.036486,24711.0,1986.0,1.0,...,3273.0,40.72722,-74.03805,33612.0,1989.0,1.0,40.74097,-74.0316,40.74097,-74.0316
max,20260210.0,3426.0,40.752559,-74.032108,3442.0,40.801343,0.0,29296.0,2000.0,2.0,...,4071.0,40.84828,0.0,49985.0,2004.0,2.0,40.86394,-73.94117,40.87241,-73.88827


In [64]:
# Open only 2024
folder_path = 'C:\\Users\\lukas\\OneDrive - Imperial College London\\0_Data\\nyc_bike_rental\\data\\2024'
data_2024 = combine_csv_files(folder_path)
data_2024['started_at'] = pd.to_datetime(data_2024['started_at'])

In [67]:
# Group by start_station_id and started_at, and calculate count, start_lat, and start_lng for each group
grouped_data = data_2024.groupby(['start_station_name', 'started_at']).agg({
    'start_lat': 'first',
    'start_lng': 'first',
    'ride_id': 'count'  # Count the number of objects standing at each station at each time point
}).reset_index()

# Rename the 'ride_id' column to 'count' for clarity
grouped_data.rename(columns={'ride_id': 'count'}, inplace=True)

# Find the maximum count of objects standing at any given time for each station
max_objects_count = grouped_data.groupby('start_station_name')['count'].max()
max_objects_count

start_station_name
11 St & Washington St                2
12 St & Sinatra Dr N                 1
14 St Ferry - 14 St & Shipyard Ln    2
2 St & Park Ave                      2
4 St & Grand St                      1
                                    ..
Van Vorst Park                       2
Warren St                            2
Washington St                        1
Willow Ave & 12 St                   1
York St & Marin Blvd                 2
Name: count, Length: 89, dtype: int64

# 2 Data Analysis