In [6]:
import pandas as pd
import numpy  as np
import matplotlib.pyplot as plt
import statistics as st
import seaborn as sns
import datetime 
from geopy import distance
import folium
from folium.plugins import MarkerCluster
from folium.features import GeoJsonTooltip
from branca.colormap import LinearColormap
from collections import Counter
import json
from shapely.geometry import Point
import geopandas as gpd
from shapely.geometry import shape

In [3]:
data_types = {
    "rideable_type": "category", 
    "start_station_name": "category", 
    "end_station_name": "category", 
    "member_casual":"category",
    # "ride_id":"uint32",
    "time_of_day":"category",
    "trip_type":"category"}


In [21]:
bikes = pd.read_csv("bikes_clean.csv", dtype = data_types, parse_dates= ["started_at", "ended_at"], low_memory=False)


In [22]:
with open ("Maryland_Physical_Boundaries_-_County_Boundaries_(Detailed).geojson") as i:
    maryland = json.loads(i.read())

features = maryland["features"]

#GDF
maryland_gdf = gpd.GeoDataFrame(
    pd.DataFrame([feature['properties'] for feature in features]),  # Extract properties as attributes
    geometry=[shape(feature['geometry']) for feature in features],  # Convert geometries
    crs="EPSG:4326")

# join gdf and filter maryland rides

In [57]:
# Create a GeoDataFrame for the start stations
geometry = [Point(xy) for xy in zip(bikes['start_lng'], bikes['start_lat'])]
geometry = [Point(xy) for xy in zip(bikes['start_lng'], bikes['start_lat'])]
bikes_gdf = gpd.GeoDataFrame(bikes, geometry=geometry, crs="EPSG:4326")

# # Perform spatial join with the maryland gdf
bikes_maryland_join = gpd.sjoin(bikes_gdf, maryland_gdf, how="left", predicate="within")

In [58]:
rides_maryland = bikes_maryland_join.dropna(subset="COUNTY")


In [None]:
rides_maryland['year'] = rides_maryland['started_at'].dt.year
rides_maryland['dow'] = rides_maryland['started_at'].dt.dayofweek

In [60]:
rides_maryland = rides_maryland.drop(columns= ['started_at', 'ended_at','index_right','OBJECTID','DISTRICT', 'COUNTY_FIP', 'COUNTYNUM','CREATION_D', 'LAST_UPDAT'])

In [61]:
# # Count rides per county
rides_per_county = rides_maryland.groupby("COUNTY").size().reset_index(name="ride_count")  


In [62]:
rides_maryland.to_csv("cabi_maryland.csv", index=False)

# filtering per year

In [63]:
year_2021= rides_maryland[rides_maryland["year"]==2021]
year_2022= rides_maryland[rides_maryland["year"]==2022]
rides_maryland_2023= rides_maryland[rides_maryland["year"]==2023]

In [64]:
rides_maryland["year"].value_counts()

year
2023    98071
2021    72587
2022    72504
Name: count, dtype: int64

# filtering unique stations (classic and docked ebikes) from undocked e-bikes 

In [65]:
# unique_station_coordinates.groupby("start_station_name")["start_lat"].mean()
avg_lat_per_station = rides_maryland.groupby("start_station_name", as_index=False, observed=False)["start_lat"].mean()
avg_lng_per_station = rides_maryland.groupby("start_station_name", as_index=False, observed = False)["start_lng"].mean()

maryland_unique_bike_stations = avg_lat_per_station.merge(avg_lng_per_station)
maryland_unique_bike_stations = maryland_unique_bike_stations.dropna(subset="start_lat")
maryland_unique_bike_stations

Unnamed: 0,start_station_name,start_lat,start_lng
5,10th & Monroe St NE,38.973495,-77.020706
8,10th & U St NW,38.983053,-76.998549
14,11th & C St SE,39.000126,-77.079726
15,11th & Clifton St NW,39.012635,-77.066359
21,11th & O St NW,39.012635,-77.066359
...,...,...,...
850,Wisconsin Ave & Ingomar St NW,39.012635,-77.066359
851,Wisconsin Ave & Newark St NW,38.931240,-76.951919
853,Wisconsin Ave & Rodman St NW,38.989793,-77.028654
855,Woodglen Dr & Executive Blvd,39.043182,-77.113494


In [66]:
rides_maryland_2023["rideable_type"].value_counts()

rideable_type
electric_bike    48641
classic_bike     46467
docked_bike       2963
Name: count, dtype: int64

In [67]:
undocked_bikes

Unnamed: 0,ride_id,rideable_type,start_station_name,start_station_id,end_station_name,end_station_id,member_casual,start_lat,start_lng,end_lat,...,end_time,start_hour,end_hour,trip_duration_minutes,time_of_day,geometry,COUNTY,Shape_Length,Shape_Area,year
6226845,6226846,electric_bike,,32082,,32082,casual,39.040000,-77.050000,39.030000,...,01:17:28,1,1,4.0,night,POINT (-77.05 39.04),Montgomery,4.104648,0.134082,2023
6226851,6226852,electric_bike,Monroe St & Monroe Pl,32017,,32045,casual,39.083673,-77.149178,39.120000,...,01:35:45,1,1,19.0,night,POINT (-77.14918 39.08367),Montgomery,4.104648,0.134082,2023
6227060,6227061,electric_bike,,31707,,31613,casual,38.860000,-76.950000,38.860000,...,02:29:32,2,2,8.0,night,POINT (-76.95 38.86),Prince George's,3.427406,0.130280,2023
6227345,6227346,electric_bike,,32402,,32421,casual,38.960000,-76.940000,38.960000,...,07:24:40,7,7,12.0,morning,POINT (-76.94 38.96),Prince George's,3.427406,0.130280,2023
6227554,6227555,electric_bike,,32418,New Hampshire Ave & East-West Hwy,32412,casual,38.990000,-76.990000,38.974205,...,09:41:15,9,9,6.0,morning,POINT (-76.99 38.99),Montgomery,4.104648,0.134082,2023
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10693841,10693842,electric_bike,Friendship Hts Metro / Wisconsin Ave & Wiscons...,32014,Connecticut Ave & McKinley St NW,31315,member,38.961758,-77.085997,38.964544,...,23:18:43,23,23,5.0,night,POINT (-77.086 38.96176),Montgomery,4.104648,0.134082,2023
10693865,10693866,electric_bike,,31310,River Rd & Landy Ln,32046,member,38.960000,-77.090000,38.964992,...,23:30:39,23,23,9.0,night,POINT (-77.09 38.96),Montgomery,4.104648,0.134082,2023
10693883,10693884,electric_bike,,32418,,32418,member,39.000000,-76.980000,38.980000,...,23:34:50,23,23,8.0,night,POINT (-76.98 39),Prince George's,3.427406,0.130280,2023
10693917,10693918,electric_bike,,32418,,31515,member,38.980000,-77.000000,38.960000,...,23:44:35,23,23,9.0,night,POINT (-77 38.98),Montgomery,4.104648,0.134082,2023


In [68]:
undocked_bikes = rides_maryland_2023[rides_maryland_2023["rideable_type"] == "electric_bike" ]

In [70]:
undocked_bikes["dow"].value_counts()

dow
1    7179
3    7122
2    7088
4    6978
5    6956
6    6727
0    6591
Name: count, dtype: int64

undocked bikes shows rides, not stations. can think of a timelapse per hour to see how the bikes are placed at a given time. Mapping all the undocled ebikes in a year at once will give the impression there are lot more in circulation than they actually are