In [1]:
%reset -f

In [2]:
import pandas as pd
import numpy as np
import os
import sys

In [3]:
# Set display options
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
pd.set_option('display.width', None)
pd.set_option('display.max_colwidth', None)

In [4]:
os.getcwd()

'C:\\Users\\rajpprit\\Documents\\UK I&I\\Trainings\\Vizcon 2025\\New York city bike data\\notebooks'

In [5]:
os.chdir("..")

In [6]:
os.getcwd()

'C:\\Users\\rajpprit\\Documents\\UK I&I\\Trainings\\Vizcon 2025\\New York city bike data'

In [7]:
os.listdir()

['.virtual_documents',
 'Community',
 'data_processed',
 'data_raw',
 'desktop.ini',
 'notebooks',
 'site']

In [8]:
os.getcwd()

'C:\\Users\\rajpprit\\Documents\\UK I&I\\Trainings\\Vizcon 2025\\New York city bike data'

In [9]:
# -----------------------------
# 2. Sample Data Preparation
# -----------------------------
os.chdir("C:\\Users\\rajpprit\\Documents\\UK I&I\\Trainings\\Vizcon 2025\\New York city bike data\\data_processed\\Pickel files")
df_2021 = pd.read_pickle("df_2021.pkl")
df_2024 = pd.read_pickle("df_2024.pkl")

In [10]:
os.chdir("C:\\Users\\rajpprit\\Documents\\UK I&I\\Trainings\\Vizcon 2025\\New York city bike data")

In [11]:
os.getcwd()

'C:\\Users\\rajpprit\\Documents\\UK I&I\\Trainings\\Vizcon 2025\\New York city bike data'

#### Data import

In [12]:
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import numpy as np
from datetime import datetime

#### Delta on map - Option1

In [13]:
import plotly.graph_objects as go
import pandas as pd
import numpy as np

def calculate_station_changes(df_2021, df_2024):
    # Function to combine start and end counts for a year
    def get_station_counts(df_year):
        start_counts = df_year.groupby('start_station_name').agg({
            'start_lat': 'first',
            'start_lng': 'first',
            'ride_id': 'count'
        }).rename(columns={'start_lat': 'lat', 'start_lng': 'lng', 'ride_id': 'count'})
        
        end_counts = df_year.groupby('end_station_name').agg({
            'end_lat': 'first',
            'end_lng': 'first',
            'ride_id': 'count'
        }).rename(columns={'end_lat': 'lat', 'end_lng': 'lng', 'ride_id': 'count'})
        
        combined = pd.concat([start_counts, end_counts]).groupby(level=0).agg({
            'lat': 'first',
            'lng': 'first',
            'count': 'sum'
        })
        
        return combined
    
    counts_2021 = get_station_counts(df_2021)
    counts_2024 = get_station_counts(df_2024)
    
    # Calculate changes
    stations = pd.merge(
        counts_2021, 
        counts_2024, 
        left_index=True, 
        right_index=True, 
        suffixes=('_2021', '_2024'),
        how='outer'
    ).fillna(0)
    
    stations['change'] = stations['count_2024'] - stations['count_2021']
    stations['pct_change'] = (stations['change'] / stations['count_2021']) * 100
    
    return stations

# Calculate changes using the two separate datasets
stations = calculate_station_changes(df_2021, df_2024)




Total stations analyzed: 2471
Stations with increased usage: 2162
Stations with decreased usage: 306
Maximum increase: 232,655
Maximum decrease: -190,120


In [14]:
# Create figure
fig = go.Figure()

# Add base map layer with station dots
fig.add_trace(go.Scattermapbox(
    lat=stations['lat_2021'],
    lon=stations['lng_2021'],
    mode='markers',
    marker=dict(
        size=4,
        color='white',
        opacity=0.8
    ),
    showlegend=False,
    hoverinfo='none'
))

# Add change visualization layer
def get_color(change):
    if change > 0:
        return f'rgba(0, 255, 0, 0.6)'  # Brighter green for positive
    return f'rgba(255, 0, 0, 0.6)'  # Brighter red for negative

# Calculate size scaling factor
max_change = stations['change'].abs().max()
size_scale = 30  # Adjust this value to make circles larger or smaller

fig.add_trace(go.Scattermapbox(
    lat=stations['lat_2021'],
    lon=stations['lng_2021'],
    mode='markers',
    marker=dict(
        size=stations['change'].abs().apply(lambda x: np.sqrt(abs(x)/max_change) * size_scale),
        color=[get_color(c) for c in stations['change']],
        opacity=0.6
    ),
    text=stations.apply(
        lambda x: f"Station: {x.name}<br>" +
                 f"2021 Rides: {int(x['count_2021']):,}<br>" +
                 f"2024 Rides: {int(x['count_2024']):,}<br>" +
                 f"Change: {int(x['change']):,} ({x['pct_change']:.1f}%)",
        axis=1
    ),
    hovertemplate="%{text}<extra></extra>"
))

# Update layout
fig.update_layout(
    mapbox_style="carto-darkmatter",
    mapbox=dict(
        center=dict(lat=40.7505, lon=-73.9934),  # Manhattan center
        zoom=11.5
    ),
    margin={"r":0,"t":50,"l":0,"b":0},
    title={
        'text': "Change in Citibike Station Usage (2021 vs 2024)<br>" +
               "<sup>Circle size = magnitude of change | Green = increase, Red = decrease</sup>",
        'y':0.98,
        'x':0.5,
        'xanchor': 'center',
        'yanchor': 'top',
        'font': dict(color='white')
    },
    paper_bgcolor='#1a1f35',
    plot_bgcolor='#1a1f35'
)

# Save the map
fig.write_html("site/charts/station_changes_map.html")

print(f"Total stations analyzed: {len(stations)}")
print(f"Stations with increased usage: {(stations['change'] > 0).sum()}")
print(f"Stations with decreased usage: {(stations['change'] < 0).sum()}")
print(f"Maximum increase: {stations['change'].max():,.0f}")
print(f"Maximum decrease: {stations['change'].min():,.0f}")

Total stations analyzed: 2471
Stations with increased usage: 2162
Stations with decreased usage: 306
Maximum increase: 232,655
Maximum decrease: -190,120


#### Delta on map - Option 2

In [16]:
import plotly.graph_objects as go
import pandas as pd
import numpy as np

def calculate_station_changes(df_2021, df_2024):
    def get_station_counts(df_year):
        start_counts = df_year.groupby('start_station_name').agg({
            'start_lat': 'first',
            'start_lng': 'first',
            'ride_id': 'count'
        }).rename(columns={'start_lat': 'lat', 'start_lng': 'lng', 'ride_id': 'count'})
        
        end_counts = df_year.groupby('end_station_name').agg({
            'end_lat': 'first',
            'end_lng': 'first',
            'ride_id': 'count'
        }).rename(columns={'end_lat': 'lat', 'end_lng': 'lng', 'ride_id': 'count'})
        
        combined = pd.concat([start_counts, end_counts]).groupby(level=0).agg({
            'lat': 'first',
            'lng': 'first',
            'count': 'sum'
        })
        
        return combined
    
    counts_2021 = get_station_counts(df_2021)
    counts_2024 = get_station_counts(df_2024)
    
    stations = pd.merge(
        counts_2021, 
        counts_2024, 
        left_index=True, 
        right_index=True, 
        suffixes=('_2021', '_2024'),
        how='outer'
    ).fillna(0)
    
    stations['total_rides'] = stations['count_2021'] + stations['count_2024']
    stations['pct_change'] = ((stations['count_2024'] - stations['count_2021']) / stations['count_2021'] * 100)
    
    return stations

# Calculate changes
stations = calculate_station_changes(df_2021, df_2024)




In [17]:
# Create figure
fig = go.Figure()

# Add base map layer with station dots
fig.add_trace(go.Scattermapbox(
    lat=stations['lat_2021'],
    lon=stations['lng_2021'],
    mode='markers',
    marker=dict(
        size=3,
        color='white',
        opacity=0.6
    ),
    showlegend=False,
    hoverinfo='none'
))

# Color function using a more subtle palette
def get_color(pct_change):
    if pct_change > 0:
        # Subtle blue gradient for positive change
        intensity = min(abs(pct_change) / 100, 1)
        return f'rgba(99, 171, 255, {intensity * 0.7})'  # Uber-style blue
    else:
        # Subtle coral/orange gradient for negative change
        intensity = min(abs(pct_change) / 100, 1)
        return f'rgba(255, 126, 103, {intensity * 0.7})'  # Warm coral

# Calculate size scaling factor based on total rides
max_total_rides = stations['total_rides'].max()
size_scale = 40  # Adjust this value to make circles larger or smaller

fig.add_trace(go.Scattermapbox(
    lat=stations['lat_2021'],
    lon=stations['lng_2021'],
    mode='markers',
    marker=dict(
        size=stations['total_rides'].apply(lambda x: np.sqrt(x/max_total_rides) * size_scale),
        color=[get_color(c) for c in stations['pct_change']],
        opacity=0.7
    ),
    text=stations.apply(
        lambda x: f"Station: {x.name}<br>" +
                 f"2021 Rides: {int(x['count_2021']):,}<br>" +
                 f"2024 Rides: {int(x['count_2024']):,}<br>" +
                 f"Total Rides: {int(x['total_rides']):,}<br>" +
                 f"Change: {x['pct_change']:.1f}%",
        axis=1
    ),
    hovertemplate="%{text}<extra></extra>"
))

# Update layout
fig.update_layout(
    mapbox_style="carto-darkmatter",
    mapbox=dict(
        center=dict(lat=40.7505, lon=-73.9934),  # Manhattan center
        zoom=11.5
    ),
    margin={"r":0,"t":50,"l":0,"b":0},
    title={
        'text': "Citibike Station Usage Changes (2021 vs 2024)<br>" +
               "<sup>Circle size = total rides | Color intensity = % change (Blue +ve, Coral -ve)</sup>",
        'y':0.98,
        'x':0.5,
        'xanchor': 'center',
        'yanchor': 'top',
        'font': dict(color='white', size=16)
    },
    paper_bgcolor='#1a1f35',
    plot_bgcolor='#1a1f35'
)

# Save the map
fig.write_html("site/charts/station_changes_map1.html")

# Print summary statistics
print(f"Total stations analyzed: {len(stations)}")
print(f"Stations with increased usage: {(stations['pct_change'] > 0).sum()}")
print(f"Stations with decreased usage: {(stations['pct_change'] < 0).sum()}")
print(f"Maximum % increase: {stations['pct_change'].max():.1f}%")
print(f"Maximum % decrease: {stations['pct_change'].min():.1f}%")
print(f"Median % change: {stations['pct_change'].median():.1f}%")

Total stations analyzed: 2471
Stations with increased usage: 2162
Stations with decreased usage: 306
Maximum % increase: inf%
Maximum % decrease: -100.0%
Median % change: 100.0%


#### Station changes - option 3

In [21]:
# Update layout
fig.update_layout(
    mapbox_style="carto-darkmatter",
    mapbox=dict(
        center=dict(lat=40.7505, lon=-73.9934),  # Manhattan center
        zoom=11.5
    ),
    margin=dict(r=0, t=80, l=0, b=0),  # Increased top margin to 80
    title=dict(
        text="Change in Citibike Station Usage (2021 vs 2024)<br>" +
             "<sup>Circle size = magnitude of change | Green = increase, Red = decrease</sup>",
        y=0.95,  # Adjusted y position
        x=0.5,
        xanchor='center',
        yanchor='top',
        font=dict(
            color='#00fff2',
            size=24,
            family='Arial, sans-serif'
        ),
        pad=dict(b=20)  # Added padding below title
    ),
    paper_bgcolor='rgba(0,0,0,0)',
    plot_bgcolor='rgba(0,0,0,0)'
)

# Save the map with config options
fig.write_html("site/charts/station_changes_map2.html",
               config={
                   'displayModeBar': True,
                   'modeBarButtonsToAdd': ['drawrect', 'eraseshape'],
                   'modeBarButtonsToRemove': ['lasso2d']
               })
