In [18]:
# Import required libraries
import pandas as pd
import plotly.express as px



In [19]:


# 0. LOAD & PREP DATA
df = pd.read_csv('hour.csv')

# Add readable labels
df['season_label'] = df['season'].map({1: 'Spring', 2: 'Summer', 3: 'Fall', 4: 'Winter'}) # Map seasons to labels
df['weather_label'] = df['weathersit'].map({                                              # Map weather conditions to labels
    1: 'Clear',
    2: 'Mist',                                                                            
    3: 'Light Snow/Rain',
    4: 'Severe'
})        
df['day_type'] = df['weekday'].apply(lambda x: 'Weekend' if x in [0,6] else 'Weekday')  # Classify days as Weekend or Weekday
df['hour_str'] = df['hr'].apply(lambda x: f"{int(x):02d}:00")                           # Format hour as string for better readability



In [20]:

# 1. AVERAGE HOURLY DEMAND (ALL DAYS)
# Group by hour and calculate mean count
hourly_df = df.groupby('hour_str')['cnt'].mean().reset_index()                         

fig_hourly = px.bar(
    hourly_df,
    x='hour_str',
    y='cnt',
    title="Average Hourly Demand (All Days)",
    color='cnt',
    color_continuous_scale='Viridis',
    labels={'cnt': 'Average Bike Count', 'hour_str': 'Hour of Day'}
)
fig_hourly.update_traces(
    hovertemplate="Hour: %{x}<br>Average Count: %{y}<extra></extra>"                      
)
fig_hourly.show()
#fig_hourly.write_html(os.path.join(output_dir,"average_hourly_demand.html"))
#fig_hourly.write_image(os.path.join(output_dir,"average_hourly_demand.png"))

# CONCLUSION: Shows overall usage pattern and peak hours.


In [21]:


# 2. AVERAGE HOURLY DEMAND (WEEKDAY vs WEEKEND)
# Group by hour and day type, then calculate mean count
line_df = df.groupby(['hour_str', 'day_type']).agg({'cnt': 'mean'}).reset_index()

fig_line = px.line(
    line_df,
    x='hour_str',
    y='cnt',
    color='day_type',
    markers=True,
    title="Average Hourly Demand: Weekday vs Weekend",
    labels={'cnt': 'Average Bike Count', 'hour_str': 'Hour of Day'},
    color_discrete_map={'Weekday': '#1f77b4', 'Weekend': '#ff7f0e'}
)
fig_line.update_traces(
    hovertemplate="Hour: %{x}<br>Average Count: %{y}<br>Day Type: %{legendgroup}<extra></extra>"
)
fig_line.show()
#fig_line.write_html(os.path.join(output_dir,"average_hourly_weekday_weekend.html"))
#fig_line.write_image(os.path.join(output_dir,"average_hourly_weekday_weekend.png"))

# CONCLUSION: Commute peaks on weekdays, broader spread on weekends.



In [22]:

# 3. HEATMAPS (REGISTERED vs CASUAL)
# Create weekday labels for heatmaps
weekday_map = {0:'Sunday',1:'Monday',2:'Tuesday',3:'Wednesday',4:'Thursday',5:'Friday',6:'Saturday'}
df['weekday_label'] = df['weekday'].map(weekday_map)

# Registered
heat_reg = df.pivot_table(values='registered', index='weekday_label', columns='hour_str', aggfunc='mean').reindex(              
    ['Monday','Tuesday','Wednesday','Thursday','Friday','Saturday','Sunday']
)
fig_heat_reg = px.imshow(
    heat_reg,
    color_continuous_scale='Plasma',
    title="Registered Demand (Hour vs Day of Week)",
    labels=dict(x="Hour of Day", y="Day of Week", color="Avg Registered Demand")
)
fig_heat_reg.update_traces(
    hovertemplate="Day: %{y}<br>Hour: %{x}<br>Registered Avg: %{z}<extra></extra>"        
)
fig_heat_reg.show()
#fig_heat_reg.write_html(os.path.join(output_dir,"heatmap_registered_demand.html"))
#fig_heat_reg.write_image(os.path.join(output_dir,"heatmap_registered_demand.png"))

# Casual
heat_casual = df.pivot_table(values='casual', index='weekday_label', columns='hour_str', aggfunc='mean').reindex(
    ['Monday','Tuesday','Wednesday','Thursday','Friday','Saturday','Sunday']
)
fig_heat_casual = px.imshow(
    heat_casual,
    color_continuous_scale='Plasma',
    title="Casual Demand (Hour vs Day of Week)",
    labels=dict(x="Hour of Day", y="Day of Week", color="Avg Casual Demand")
)
fig_heat_casual.update_traces(
    hovertemplate="Day: %{y}<br>Hour: %{x}<br>Casual Avg: %{z}<extra></extra>"
)
fig_heat_casual.show()
#fig_heat_casual.write_html(os.path.join(output_dir,"heatmap_casual_demand.html"))
#fig_heat_casual.write_image(os.path.join(output_dir,"heatmap_casual_demand.png"))

# CONCLUSION: Clear separation of user behavior by time/day.


In [23]:


# 4. BOXPLOTS: SEASONS & TEMPERATURE LEVELS
# Boxplot for seasonal demand

#  Seasons
fig_season = px.box(
    df,
    x='season_label',
    y='cnt',
    color='season_label',
    title="Bike Demand Across Seasons",
    labels={'cnt': 'Bike Count', 'season_label': 'Season'},
    color_discrete_sequence=px.colors.qualitative.Set2
)
fig_season.update_traces(
    hovertemplate="Season: %{x}<br>Demand: %{y}<extra></extra>"
)
fig_season.show()
#fig_season.write_html(os.path.join(output_dir,"boxplot_seasonal_demand.html"))
#fig_season.write_image(os.path.join(output_dir,"boxplot_seasonal_demand.png"))

#  Temperature
bins = [0, 0.3, 0.6, 1.0]
labels = ['Low Temp', 'Medium Temp', 'High Temp']
df['temp_range'] = pd.cut(df['temp'], bins=bins, labels=labels, include_lowest=True)

fig_temp = px.box(
    df,
    x='temp_range',
    y='cnt',
    color='temp_range',
    title="Bike Demand by Temperature Levels",
    labels={'cnt': 'Bike Count', 'temp_range': 'Temperature Level'},    
    color_discrete_sequence=px.colors.qualitative.Pastel
)
fig_temp.update_traces(
    hovertemplate="Temp Level: %{x}<br>Demand: %{y}<extra></extra>"
)
fig_temp.show()
#fig_temp.write_html(os.path.join(output_dir,"boxplot_temperature_demand.html"))
#fig_temp.write_image(os.path.join(output_dir,"boxplot_temperature_demand.png"))

# CONCLUSION: Warm seasons and higher temps = higher usage.


In [24]:


# 5. GROUPED BAR: PEAK vs OFF-PEAK by WEATHER

# Create hour categories
df['hour_cat'] = df['hr'].apply(lambda x: 'Peak' if x in [7,8,9,17,18,19] else 'Off-Peak')
grouped_weather = df.groupby(['hour_cat','weather_label'])['cnt'].mean().reset_index()

fig_group = px.bar(
    grouped_weather,
    x='hour_cat',
    y='cnt',
    color='weather_label',
    barmode='group',
    text_auto='.2s',
    title="Average Demand: Peak vs Off-Peak by Weather",
    labels={'cnt': 'Average Bike Count', 'hour_cat': 'Hour Category'},
    color_discrete_sequence=px.colors.qualitative.Bold
)
fig_group.update_traces(
    hovertemplate="Hour Cat: %{x}<br>Weather: %{legendgroup}<br>Avg Demand: %{y}<extra></extra>"
)
fig_group.show()
#fig_group.write_html(os.path.join(output_dir,"grouped_bar_peak_offpeak_weather.html"))    
#fig_group.write_image(os.path.join(output_dir,"grouped_bar_peak_offpeak_weather.png"))

# CONCLUSION: Peak periods dominate demand regardless of weather, but clear weather leads.



In [25]:

# 6. TREEMAP (FINAL SUMMARY)
# Create a treemap to summarize demand by season, day type, and user type

# Aggregate demand
agg_treemap = df.groupby(['season_label', 'day_type'], as_index=False)[['registered', 'casual']].sum()

# Melt to long format
treemap_df = agg_treemap.melt(
    id_vars=['season_label', 'day_type'],
    value_vars=['registered', 'casual'],
    var_name='user_type',
    value_name='total_demand'
)

# Build treemap
fig_treemap = px.treemap(
    treemap_df,
    path=['season_label', 'day_type', 'user_type'],
    values='total_demand',
    color='total_demand',
    color_continuous_scale=[
        (0.0, 'white'),   # low = white
        (1.0, '#0033cc')  # high = dark blue
    ],
    title="Treemap: Season → Day Type → User Type"
)

# Styling
fig_treemap.update_traces(
    marker=dict(line=dict(width=1, color='white')),
    hovertemplate="<b>%{label}</b><br>Total Demand: %{value:,}<extra></extra>",
    textinfo='label+value'
)
fig_treemap.update_layout(
    margin=dict(t=60, l=25, r=25, b=25),
    font=dict(size=14),
    paper_bgcolor='white',
    plot_bgcolor='white',
    coloraxis_colorbar=dict(
        title="Total Demand",
        tickformat=','
    )
)

# Show treemap
fig_treemap.show()
#fig_treemap.write_html(os.path.join(output_dir,"treemap_blue_white.html"))
#fig_treemap.write_image(os.path.join(output_dir,"treemap_blue_white.png"))