In [124]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from pandas.tseries.holiday import USFederalHolidayCalendar as calendar
import folium
from folium import plugins
from folium.plugins import MarkerCluster
from geopy.distance import geodesic
from geopy.geocoders import Nominatim
from folium.plugins import HeatMap
import tabulate as tabulate

In [3]:
bike_master = pd.read_csv('bike_full.csv', dtype={5: str, 7: str})  # Convert both columns to strings

In [4]:
# before_cleaning = bike_master.memory_usage(deep=True).sum() / 1024 ** 2
# print(f"Memory usage before cleaning: {before_cleaning:.2f} MB")

In [5]:
bike_master.isnull().sum()

ride_id                    0
rideable_type              0
started_at                 0
ended_at                   0
start_station_name    742776
start_station_id      742776
end_station_name      811282
end_station_id        811282
start_lat                  2
start_lng                  2
end_lat                16765
end_lng                16765
member_casual              0
dtype: int64

In [6]:
bike_master_clean = bike_master.copy()

# Convert datetime columns
bike_master_clean['started_at'] = pd.to_datetime(bike_master_clean['started_at'])
bike_master_clean['ended_at'] = pd.to_datetime(bike_master_clean['ended_at'])

# Convert category columns
category_columns = ['rideable_type', 'start_station_name', 'end_station_name', 'member_casual']
for col in category_columns:
    bike_master_clean[col] = bike_master_clean[col].astype('category')

# Convert string columns (IDs)
string_columns = ['ride_id', 'start_station_id', 'end_station_id']
for col in string_columns:
    bike_master_clean[col] = bike_master_clean[col].astype('string')



In [7]:
after_cleaning = bike_master_clean.memory_usage(deep=True).sum() / 1024 ** 2
print(f"Memory usage after cleaning: {after_cleaning:.2f} MB")

Memory usage after cleaning: 2355.93 MB


In [8]:
bike_master_clean.head()

Unnamed: 0,ride_id,rideable_type,started_at,ended_at,start_station_name,start_station_id,end_station_name,end_station_id,start_lat,start_lng,end_lat,end_lng,member_casual
0,5CB9DFCECF79AF84,classic_bike,2021-01-01 00:08:33,2021-01-01 00:33:53,Maine Ave & 9th St SW,31646.0,Rosslyn Metro / Wilson Blvd & Ft Myer Dr,31015.0,38.88044,-77.025236,38.8946,-77.072305,member
1,629E059504606547,electric_bike,2021-01-01 00:13:43,2021-01-01 00:29:34,10th & U St NW,31111.0,,,38.917193,-77.025894,38.96,-77.02,casual
2,E74069873161EE33,electric_bike,2021-01-01 00:14:32,2021-01-01 00:28:45,17th & Corcoran St NW,31214.0,14th & Belmont St NW,31119.0,38.912138,-77.038568,38.92087,-77.031691,member
3,91F95E512CABC46A,classic_bike,2021-01-01 00:15:45,2021-01-01 00:21:20,Wilson Blvd. & N. Vermont St.,31926.0,Wilson Blvd. & N. Vermont St.,31926.0,38.879477,-77.114563,38.879477,-77.114563,member
4,DA46A05139C0EA2F,classic_bike,2021-01-01 00:17:46,2021-01-01 00:21:00,11th & Park Rd NW,31651.0,14th & Newton St NW,31649.0,38.931322,-77.028247,38.931991,-77.032956,member


In [9]:
bike_master_clean.isnull().sum()

ride_id                    0
rideable_type              0
started_at                 0
ended_at                   0
start_station_name    742776
start_station_id      742776
end_station_name      811282
end_station_id        811282
start_lat                  2
start_lng                  2
end_lat                16765
end_lng                16765
member_casual              0
dtype: int64

In [10]:
bike_master_clean[['start_station_name', 'end_station_name', 'start_station_id', 'end_station_id','start_lat']].nunique()

start_station_name       860
end_station_name         865
start_station_id        1321
end_station_id          1338
start_lat             570110
dtype: int64

In [11]:
# # Find and export matching stations
# matching_station = bike_master_clean[
#   (bike_master_clean['start_lat'].round(2) == 38.93) & 
#   (bike_master_clean['start_lng'].round(2) == -77.02)
# ][['start_station_name', 'start_lat', 'start_lng']].dropna().drop_duplicates()

# matching_station.to_csv('matching_stations1.csv', index=False)

In [12]:
bike_master_clean.isnull().sum()

ride_id                    0
rideable_type              0
started_at                 0
ended_at                   0
start_station_name    742776
start_station_id      742776
end_station_name      811282
end_station_id        811282
start_lat                  2
start_lng                  2
end_lat                16765
end_lng                16765
member_casual              0
dtype: int64

In [None]:
#create a new column for month year
bike_master_clean['month_year'] = bike_master_clean['started_at'].dt.to_period('M')

#create a new column for year
bike_master_clean['year'] = bike_master_clean['started_at'].dt.year

#create a new dataframe for the month june 2023 to december 2023
bike_master_clean_2023 = bike_master_clean[(bike_master_clean['year'] == 2023) & (bike_master_clean['month_year'] >= '2023-10')]

row_count = len(bike_master_clean_2023)
print(f"Number of rows in the dataset: {row_count}")



Number of rows in the dataset: 1126727


In [34]:
#find missing null values
bike_master_clean_2023.isnull().sum()

#drop missing values
bike_master_clean_2023 = bike_master_clean_2023.dropna()


In [41]:
#get the top 10 stations with the highest number of trips using the start station name and ride id
top_stations = (
    bike_master_clean_2023
    .groupby('start_station_name',observed=False)
    .agg(trip_count=('ride_id', 'count'))
    .nlargest(10, 'trip_count')
    .reset_index()
)

top_stations

Unnamed: 0,start_station_name,trip_count
0,Columbus Circle / Union Station,11813
1,New Hampshire Ave & T St NW,9614
2,15th & P St NW,8504
3,Eastern Market Metro / Pennsylvania Ave & 8th ...,8108
4,5th & K St NW,7944
5,1st & M St NE,7636
6,14th & V St NW,7559
7,M St & Delaware Ave NE,7402
8,Massachusetts Ave & Dupont Circle NW,7316
9,8th & O St NW,6407


In [70]:
#get the top 10 end stations with the highest number of trips using the end station name and ride id
end_stations = (
    bike_master_clean_2023
    .groupby('end_station_name',observed=False)
    .agg(trip_count=('ride_id', 'count'))
    .nlargest(10, 'trip_count')
    .reset_index()
)

end_stations

Unnamed: 0,end_station_name,trip_count
0,Columbus Circle / Union Station,11804
1,New Hampshire Ave & T St NW,9297
2,15th & P St NW,8598
3,Eastern Market Metro / Pennsylvania Ave & 8th ...,8137
4,5th & K St NW,8079
5,1st & M St NE,7747
6,Massachusetts Ave & Dupont Circle NW,7573
7,M St & Delaware Ave NE,7516
8,14th & V St NW,7127
9,14th & R St NW,6414


In [72]:
popular_stations = pd.concat([top_stations, end_stations], axis=1)
popular_stations

Unnamed: 0,start_station_name,trip_count,end_station_name,trip_count.1
0,Columbus Circle / Union Station,11813,Columbus Circle / Union Station,11804
1,New Hampshire Ave & T St NW,9614,New Hampshire Ave & T St NW,9297
2,15th & P St NW,8504,15th & P St NW,8598
3,Eastern Market Metro / Pennsylvania Ave & 8th ...,8108,Eastern Market Metro / Pennsylvania Ave & 8th ...,8137
4,5th & K St NW,7944,5th & K St NW,8079
5,1st & M St NE,7636,1st & M St NE,7747
6,14th & V St NW,7559,Massachusetts Ave & Dupont Circle NW,7573
7,M St & Delaware Ave NE,7402,M St & Delaware Ave NE,7516
8,Massachusetts Ave & Dupont Circle NW,7316,14th & V St NW,7127
9,8th & O St NW,6407,14th & R St NW,6414


In [60]:
# Filter for the specific start station name
station_name = 'Columbus Circle / Union Station'
df_filtered = bike_master_clean_2023[bike_master_clean_2023['start_station_name'] == station_name]

#count the number of entries for the specific station
df_filtered_count = df_filtered.shape[0]

# Count unique start latitude and longitude values
num_unique_lat_lng = df_filtered[['start_lat', 'start_lng']].drop_duplicates().shape[0]

#Unique start latitude and longitude values
unique_lat_lng = df_filtered[['start_lat', 'start_lng']].drop_duplicates()
unique_lat_lng



Unnamed: 0,start_lat,start_lng
9567489,38.897,-77.005
9812020,38.887,-77.002
10009125,38.897,-77.004
10052987,38.964,-77.01
10109678,38.898,-77.005
10411046,38.955,-77.009
10426656,38.896,-77.005
10478355,38.888,-77.008
10518933,38.939,-77.037
10580481,38.881,-76.993


In [99]:
bike_master_clean.groupby(['member_casual', 'rideable_type'], observed=False)['end_station_id'].apply(lambda x: x.isnull().sum())

member_casual  rideable_type
casual         classic_bike      11554
               docked_bike        5136
               electric_bike    303090
member         classic_bike       8341
               docked_bike           0
               electric_bike    483161
Name: end_station_id, dtype: int64

In [101]:
#Count of trip by member type and rideable type by ride id
member_rideable = (
    bike_master_clean
    .groupby(['member_casual', 'rideable_type'], observed=False)
    .agg(trip_count=('ride_id', 'count'))
    .reset_index()
)

member_rideable

Unnamed: 0,member_casual,rideable_type,trip_count
0,casual,classic_bike,2702259
1,casual,docked_bike,539470
2,casual,electric_bike,959474
3,member,classic_bike,5008113
4,member,docked_bike,4
5,member,electric_bike,1484677


In [111]:
#calculating the num  of trips by member type
trip_counts = bike_master_clean['member_casual'].value_counts()

#calculating the percentage of trips by member type
trip_percent = (trip_counts / trip_counts.sum()) * 100

trip_percent

member_casual
member    60.71438
casual    39.28562
Name: count, dtype: float64

In [107]:
# Count of trip by member type and rideable type by ride id in percentage
member_rideable['percentage'] = member_rideable['trip_count'] / member_rideable.groupby('member_casual', observed=False)['trip_count'].transform('sum') * 100
member_rideable

Unnamed: 0,member_casual,rideable_type,trip_count,percentage
0,casual,classic_bike,2702259,64.321077
1,casual,docked_bike,539470,12.840846
2,casual,electric_bike,959474,22.838078
3,member,classic_bike,5008113,77.133404
4,member,docked_bike,4,6.2e-05
5,member,electric_bike,1484677,22.866535


In [128]:
# Assuming bike_master_clean_2023 is already loaded

# Convert categorical columns to strings for safe comparison
bike_master_clean_2023['start_station_name'] = bike_master_clean_2023['start_station_name'].astype(str)
bike_master_clean_2023['end_station_name'] = bike_master_clean_2023['end_station_name'].astype(str)

# Separate data by member type and filter same-station trips
member_routes = bike_master_clean_2023[
    (bike_master_clean_2023['member_casual'] == 'member') &
    (bike_master_clean_2023['start_station_name'] != bike_master_clean_2023['end_station_name'])
]

casual_routes = bike_master_clean_2023[
    (bike_master_clean_2023['member_casual'] == 'casual') &
    (bike_master_clean_2023['start_station_name'] != bike_master_clean_2023['end_station_name'])
]

# Create route counts
member_route_counts = member_routes.groupby(
    ['start_station_name', 'end_station_name']
).size().reset_index(name='trip_count')

casual_route_counts = casual_routes.groupby(
    ['start_station_name', 'end_station_name']
).size().reset_index(name='trip_count')

# Get top 20 routes
top_member = member_route_counts.sort_values('trip_count', ascending=False).head(20)
top_casual = casual_route_counts.sort_values('trip_count', ascending=False).head(20)

# Print results
print("=== TOP MEMBER ROUTES ===")
print(top_member.to_markdown(index=False, tablefmt="grid"))

print("\n\n=== TOP CASUAL ROUTES ===")
print(top_casual.to_markdown(index=False, tablefmt="grid"))

# Summary statistics
summary = pd.DataFrame({
    'User Type': ['Member', 'Casual'],
    'Total Trips': [len(member_routes), len(casual_routes)],
    'Unique Routes': [member_route_counts.shape[0], casual_route_counts.shape[0]]
})

print("\n=== SUMMARY STATISTICS ===")
print(summary.to_markdown(index=False, tablefmt="grid"))

=== TOP MEMBER ROUTES ===
+-----------------------------------------------------+-----------------------------------------------------+--------------+
| start_station_name                                  | end_station_name                                    |   trip_count |
| Columbus Circle / Union Station                     | 8th & F St NE                                       |          737 |
+-----------------------------------------------------+-----------------------------------------------------+--------------+
| 8th & F St NE                                       | Columbus Circle / Union Station                     |          686 |
+-----------------------------------------------------+-----------------------------------------------------+--------------+
| Columbus Circle / Union Station                     | 6th & H St NE                                       |          565 |
+-----------------------------------------------------+--------------------------------------------

In [121]:
# For Members
member_routes = bike_master_clean_2023[bike_master_clean_2023['member_casual'] == 'member']
member_route_counts = member_routes.groupby(['start_station_name', 'end_station_name']).size().reset_index(name='trip_count')
top_20_member_routes = member_route_counts.sort_values('trip_count', ascending=False).head(20)

# Add rank column for members
top_20_member_routes.insert(0, 'rank', range(1, 21))

# For Casual Riders
casual_routes = bike_master_clean_2023[bike_master_clean_2023['member_casual'] == 'casual']
casual_route_counts = casual_routes.groupby(['start_station_name', 'end_station_name']).size().reset_index(name='trip_count')
top_20_casual_routes = casual_route_counts.sort_values('trip_count', ascending=False).head(20)

# Add rank column for casual
top_20_casual_routes.insert(0, 'rank', range(1, 21))

# Export to CSV
top_20_member_routes.to_csv('top_20_member_routes.csv', index=False)
top_20_casual_routes.to_csv('top_20_casual_routes.csv', index=False)

# Optional: Export both to a single CSV with a separator between them
# First, add a type column to distinguish between member and casual
top_20_member_routes['rider_type'] = 'member'
top_20_casual_routes['rider_type'] = 'casual'

# Combine both dataframes
combined_routes = pd.concat([top_20_member_routes, top_20_casual_routes])

# Export combined data
combined_routes.to_csv('top_20_all_routes.csv', index=False)

print("Files exported successfully:")
print("1. top_20_member_routes.csv")
print("2. top_20_casual_routes.csv")
print("3. top_20_all_routes.csv")

Files exported successfully:
1. top_20_member_routes.csv
2. top_20_casual_routes.csv
3. top_20_all_routes.csv


  member_route_counts = member_routes.groupby(['start_station_name', 'end_station_name']).size().reset_index(name='trip_count')
  casual_route_counts = casual_routes.groupby(['start_station_name', 'end_station_name']).size().reset_index(name='trip_count')


In [69]:
# Calculate the mean latitude and longitude for each start station
mean_coords = bike_master_clean_2023.groupby('start_station_name', observed=False)[['start_lat', 'start_lng']].mean().reset_index()

# Sort by the number of occurrences to find the top 10 most frequent start locations
top_10 = bike_master_clean_2023.groupby('start_station_name',observed=False).size().reset_index(name='count')
top_10 = top_10.sort_values(by='count', ascending=False).head(10)

# Merge the mean coordinates with the top 10 locations
top_10_coords = top_10.merge(mean_coords, on='start_station_name')
top_10_coords

# Create a base map centered around the mean coordinates of the top 10 locations
map_center = [top_10_coords['start_lat'].mean(), top_10_coords['start_lng'].mean()]
map = folium.Map(location=map_center, zoom_start=13)

# Add a marker for each of the top 10 locations
for i in range(top_10_coords.shape[0]):
    row = top_10_coords.iloc[i]
    folium.Marker(
        location=[row['start_lat'], row['start_lng']],
        popup=row['start_station_name'],
        icon=folium.Icon(color='blue', icon='bicycle', prefix='fa')
    ).add_to(map)   
map


**Most Popular Rides**

In [85]:
# Count occurrences of each start station and get the top 10
top_10_start = bike_master_clean_2023['start_station_name'].value_counts().head(10).reset_index()
top_10_start.columns = ['start_station_name', 'start_count']

# Filter for the top 10 start stations
top_10_routes = bike_master_clean_2023[bike_master_clean_2023['start_station_name'].isin(top_10_start['start_station_name'])]

# Count how often each end station appears for these top 10 start stations
popular_routes = top_10_routes.groupby(['start_station_name', 'end_station_name'], observed=False).size().reset_index(name='route_count')

# Sort by start station and route popularity
popular_routes = popular_routes.sort_values(by=['start_station_name', 'route_count'], ascending=[True, False])

# Find the most common start-end pairs
top_routes_summary = bike_master_clean_2023.groupby(['start_station_name', 'end_station_name'],observed=False).size().reset_index(name='count')

# Sort by popularity
top_routes_summary = top_routes_summary.sort_values(by='count', ascending=False).head(10)

print(top_routes_summary.head(10))



                                       start_station_name  \
278756                    Columbus Circle / Union Station   
394897                          Jefferson Dr & 14th St SW   
278741                    Columbus Circle / Union Station   
195812                                      8th & F St NE   
633915  Smithsonian-National Mall / Jefferson Dr & 12t...   
182837                                      6th & H St NE   
378443                                     Gravelly Point   
442527                                   Lincoln Memorial   
318833  Eastern Market Metro / Pennsylvania Ave & 8th ...   
105269                                      1st & M St NE   

                                         end_station_name  count  
278756                                      8th & F St NE    929  
394897                          Jefferson Dr & 14th St SW    867  
278741                                      6th & H St NE    827  
195812                    Columbus Circle / Union Station   

In [84]:
# First, find the top 10 most popular starting stations
top_10_starts = bike_master_clean_2023['start_station_name'].value_counts().head(10)
print("Top 10 Starting Stations:")
print(top_10_starts)
print("\n")

# Create a map centered around the data
map_center = [bike_master_clean_2023['start_lat'].mean(), bike_master_clean_2023['start_lng'].mean()]
popular = folium.Map(location=map_center, zoom_start=12)

# Colors for different starting stations
colors = ['#FF0000', '#00FF00', '#0000FF', '#FFFF00', '#FF00FF', '#00FFFF', '#FFA500', '#800080', '#008000', '#FFC0CB']

# For each top starting station, find its most popular destination
for idx, (start_station, start_count) in enumerate(top_10_starts.items()):
    # Filter trips from this starting station
    station_trips = bike_master_clean_2023[bike_master_clean_2023['start_station_name'] == start_station]
    
    # Get top 3 destinations for this starting point
    top_destinations = station_trips['end_station_name'].value_counts().head(3)
    
    print(f"\nFrom {start_station} ({start_count} total starts):")
    print("Top 3 destinations:")
    print(top_destinations)
    
    # Plot on map
    start_point = station_trips[['start_lat', 'start_lng']].iloc[0]
    
    # Add marker for starting station
    folium.Marker(
        [start_point['start_lat'], start_point['start_lng']],
        popup=f"Start Station: {start_station}<br>Total Trips: {start_count}",
        icon=folium.Icon(color='red', icon='info-sign')
    ).add_to(popular)
    
    # Add lines to top 3 destinations
    for end_station, end_count in top_destinations.items():
        end_trips = bike_master_clean_2023[bike_master_clean_2023['end_station_name'] == end_station]
        if not end_trips.empty:
            end_point = end_trips[['end_lat', 'end_lng']].iloc[0]
            
            # Draw line
            folium.PolyLine(
                [(start_point['start_lat'], start_point['start_lng']), 
                 (end_point['end_lat'], end_point['end_lng'])],
                color=colors[idx],
                weight=end_count/100,  # Line thickness based on number of trips
                opacity=0.8,
                popup=f"From: {start_station}<br>To: {end_station}<br>Trips: {end_count}"
            ).add_to(popular)
            
            # Add destination marker
            folium.Marker(
                [end_point['end_lat'], end_point['end_lng']],
                popup=f"End Station: {end_station}<br>Trips from {start_station}: {end_count}",
                icon=folium.Icon(color='lightgray', icon='info-sign')
            ).add_to(popular)

# Add legend
legend_html = '''
<div style="position: fixed; 
            bottom: 50px; left: 50px; width: 180px;
            border:2px solid grey; z-index:9999; 
            background-color:white;
            opacity:0.8;
            font-size:12px;
            padding:10px">
<p><strong>Legend</strong></p>
<p>🔴 Starting Station</p>
<p>⚪ Destination Station</p>
<p>Line thickness indicates number of trips</p>
</div>
'''
popular.get_root().html.add_child(folium.Element(legend_html))

popular

Top 10 Starting Stations:
start_station_name
Columbus Circle / Union Station                        11813
New Hampshire Ave & T St NW                             9614
15th & P St NW                                          8504
Eastern Market Metro / Pennsylvania Ave & 8th St SE     8108
5th & K St NW                                           7944
1st & M St NE                                           7636
14th & V St NW                                          7559
M St & Delaware Ave NE                                  7402
Massachusetts Ave & Dupont Circle NW                    7316
8th & O St NW                                           6407
Name: count, dtype: int64



From Columbus Circle / Union Station (11813 total starts):
Top 3 destinations:
end_station_name
8th & F St NE             929
6th & H St NE             827
Maryland Ave & E St NE    476
Name: count, dtype: int64

From New Hampshire Ave & T St NW (9614 total starts):
Top 3 destinations:
end_station_name
15th & P St 

In [122]:
# Create a map centered around the data
map_center = [bike_master_clean_2023['start_lat'].mean(), bike_master_clean_2023['start_lng'].mean()]
heat_map = folium.Map(location=map_center, zoom_start=12)

# Prepare start and end locations with counts
start_locations = bike_master_clean_2023.groupby(['start_lat', 'start_lng']).size().reset_index()
start_locations.columns = ['lat', 'lng', 'count']

end_locations = bike_master_clean_2023.groupby(['end_lat', 'end_lng']).size().reset_index()
end_locations.columns = ['lat', 'lng', 'count']

# Combine start and end locations
all_locations = pd.concat([start_locations, end_locations])

# Filter out rows with NaN values and convert to list of lists
heat_data = all_locations.dropna(subset=['lat', 'lng']).astype(float).values.tolist()

# Add the heatmap layer
HeatMap(
    data=heat_data,
    radius=8,
    max_zoom=13,
    min_opacity=0.5,
    blur=5
).add_to(heat_map)

# Display the map
heat_map


In [129]:

# Create base map
map_center = [bike_master_clean_2023['start_lat'].mean(), 
             bike_master_clean_2023['start_lng'].mean()]
bike_map = folium.Map(location=map_center, zoom_start=12, tiles='CartoDB positron')

# Create station coordinate lookup
stations = pd.concat([
    bike_master_clean_2023[['start_station_name', 'start_lat', 'start_lng']]
    .rename(columns={'start_station_name': 'station', 'start_lat': 'lat', 'start_lng': 'lng'}),
    bike_master_clean_2023[['end_station_name', 'end_lat', 'end_lng']]
    .rename(columns={'end_station_name': 'station', 'end_lat': 'lat', 'end_lng': 'lng'})
]).drop_duplicates('station')

# Merge coordinates with route data
top_member = top_member.merge(stations, left_on='start_station_name', right_on='station') \
                       .merge(stations, left_on='end_station_name', right_on='station',
                              suffixes=('_start', '_end'))

top_casual = top_casual.merge(stations, left_on='start_station_name', right_on='station') \
                       .merge(stations, left_on='end_station_name', right_on='station',
                              suffixes=('_start', '_end'))

# Create feature groups
member_layer = folium.FeatureGroup(name='Member Routes', show=True)
casual_layer = folium.FeatureGroup(name='Casual Routes', show=True)
stations_layer = folium.FeatureGroup(name='Stations', show=False)

# Add stations
station_cluster = MarkerCluster().add_to(stations_layer)
for _, row in stations.iterrows():
    folium.Marker(
        [row['lat'], row['lng']],
        icon=folium.Icon(icon='bicycle', prefix='fa', color='beige'),
        popup=f"Station: {row['station']}",
        tooltip=row['station']
    ).add_to(station_cluster)

# Function to add routes
def add_routes(data, color, layer):
    max_trips = data['trip_count'].max()
    for _, row in data.iterrows():
        # Calculate line weight (normalized between 2-8)
        weight = 2 + (6 * (row['trip_count'] / max_trips))
        
        folium.PolyLine(
            locations=[
                [row['lat_start'], row['lng_start']],
                [row['lat_end'], row['lng_end']]
            ],
            color=color,
            weight=weight,
            opacity=0.7,
            popup=f"""
            <b>Route:</b> {row['start_station_name']} → {row['end_station_name']}<br>
            <b>Trips:</b> {row['trip_count']:,}
            """
        ).add_to(layer)
        
        # Add start marker
        folium.CircleMarker(
            [row['lat_start'], row['lng_start']],
            radius=5,
            color=color,
            fill=True,
            fill_color=color,
            popup=f"Start: {row['start_station_name']}"
        ).add_to(layer)
        
        # Add end marker
        folium.CircleMarker(
            [row['lat_end'], row['lng_end']],
            radius=5,
            color='gray',
            fill=True,
            fill_color='gray',
            popup=f"End: {row['end_station_name']}"
        ).add_to(layer)

# Add routes to layers
add_routes(top_member, '#1f77b4', member_layer)  # Blue for members
add_routes(top_casual, '#ff7f0e', casual_layer)  # Orange for casual

# Add layers to map
member_layer.add_to(bike_map)
casual_layer.add_to(bike_map)
stations_layer.add_to(bike_map)

# Add layer control and legend
folium.LayerControl().add_to(bike_map)

legend_html = '''
<div style="position: fixed; 
            bottom: 50px; left: 50px; width: 220px;
            border:2px solid grey; z-index:9999; 
            background-color:white;
            opacity:0.85;
            font-size:12px;
            padding:10px">
    <p style="margin:0"><strong>Map Legend</strong></p>
    <div style="margin: 5px 0;">
        <span style="color: #1f77b4;">⬤</span> Member Routes
    </div>
    <div style="margin: 5px 0;">
        <span style="color: #ff7f0e;">⬤</span> Casual Routes
    </div>
    <div style="margin: 5px 0;">
        <span style="color: gray;">⬤</span> Destination Stations
    </div>
    <div style="margin: 5px 0;">
        <i class="fa fa-bicycle" style="color: beige;"></i> All Stations
    </div>
    <div style="margin: 5px 0;">
        Line thickness = Trip volume
    </div>
</div>
'''
bike_map.get_root().html.add_child(folium.Element(legend_html))

# Display map
bike_map