In [6]:
# ============================================================
# 1. Install & Import
# ============================================================
# !pip install plotly pandas numpy
import pandas as pd
import plotly.express as px

# ============================================================
# 2. Load Data
# ============================================================
day_df = pd.read_csv("day.csv")
hour_df = pd.read_csv("hour.csv")

# ============================================================
# 3. Feature Engineering
# ============================================================
# Map season numbers
season_map = {1: 'Winter', 2: 'Spring', 3: 'Summer', 4: 'Fall'}
hour_df['season_label'] = hour_df['season'].map(season_map)
day_df['season_label'] = day_df['season'].map(season_map)

# Weekday names
weekday_map = {0: 'Sunday', 1: 'Monday', 2: 'Tuesday', 3: 'Wednesday',
               4: 'Thursday', 5: 'Friday', 6: 'Saturday'}
hour_df['weekday_label'] = hour_df['weekday'].map(weekday_map)

# Weather description
weather_map = {
    1: 'Clear / Few Clouds / Partly Cloudy',
    2: 'Mist / Cloudy / Broken Clouds',
    3: 'Light Snow / Light Rain / Thunderstorm',
    4: 'Heavy Rain / Ice Pellets / Snow / Fog'
}
hour_df['weather_label'] = hour_df['weathersit'].map(weather_map)

# Day category (holiday/working/weekend)
def classify_day(row):
    if row['holiday'] == 1:
        return 'Holiday'
    elif row['workingday'] == 1:
        return 'Working Day'
    else:
        return 'Weekend'
hour_df['day_category'] = hour_df.apply(classify_day, axis=1)

# Peak vs Off-Peak
def peak_flag(hr):
    return 'Peak' if hr in [7,8,9,16,17,18,19] else 'Off-Peak'
hour_df['peak_offpeak'] = hour_df['hr'].apply(peak_flag)

# Temperature bins
def temp_bin(temp):
    if temp < 0.33:
        return 'Low Temp (<13°C)'
    elif temp < 0.66:
        return 'Moderate Temp (13–27°C)'
    else:
        return 'High Temp (>27°C)'
hour_df['temp_bin'] = hour_df['temp'].apply(temp_bin)

# Hour label in hh:MM format
hour_df['hour_label'] = hour_df['hr'].apply(lambda x: f"{x:02d}:00")

# Order for heatmap y-axis
weekday_order = ['Monday','Tuesday','Wednesday','Thursday','Friday','Saturday','Sunday']
hour_order = [f"{h:02d}:00" for h in range(24)]

# ============================================================
# 4. VISUALIZATIONS
# ============================================================

# ---------- A. Average Hourly Demand (Casual vs Registered) ----------
hourly_all = hour_df.groupby('hr')[['casual','registered']].mean().reset_index()
hourly_all['hour_label'] = hourly_all['hr'].apply(lambda x: f"{x:02d}:00")
hourly_all_melt = hourly_all.melt(id_vars=['hour_label'], value_vars=['casual','registered'],
                                  var_name='user_type', value_name='avg_count')
fig_avg_hour = px.bar(
    hourly_all_melt,
    x='hour_label', y='avg_count', color='user_type',
    barmode='group',
    color_discrete_sequence=px.colors.qualitative.Set2,
    title='Average Hourly Demand (Casual vs Registered)',
    labels={'hour_label':'Hour of Day','avg_count':'Average Bike Count','user_type':'User Type'}
)
fig_avg_hour.update_layout(plot_bgcolor='#F9F9F9', paper_bgcolor='white', font=dict(size=12), xaxis_tickangle=45)
fig_avg_hour.show()

# ---------- B. Hourly Usage by Day Category (Holiday / Working / Weekend) ----------
hourly_group_cat = hour_df.groupby(['hr','day_category'])[['casual','registered']].mean().reset_index()
hourly_melt_cat = hourly_group_cat.melt(id_vars=['hr','day_category'],
                                        value_vars=['casual','registered'],
                                        var_name='user_type', value_name='count')
fig_hourly_combined = px.line(
    hourly_melt_cat,
    x='hr', y='count',
    color='day_category',
    facet_col='user_type',
    markers=True,
    title='Hourly Usage by Day Category (Casual & Registered)',
    labels={'hr':'Hour of Day','count':'Average Count'}
)
fig_hourly_combined.show()

# ---------- C. Seasonal Usage (Casual vs Registered) ----------
melt_season = day_df.melt(id_vars=['season_label'], value_vars=['casual','registered'],
                          var_name='user_type', value_name='count')
fig_season = px.box(
    melt_season,
    x='season_label', y='count', color='user_type',
    title='Seasonal Usage Variation: Casual vs Registered',
    labels={'season_label':'Season','count':'Daily Count'}
)
fig_season.show()

# ---------- D. Temperature Bin Boxplots by Day Category ----------
melt_temp_daycat = hour_df.melt(id_vars=['temp_bin','day_category'],
                                value_vars=['casual','registered'],
                                var_name='user_type', value_name='count')
fig_temp_daycat = px.box(
    melt_temp_daycat,
    x='temp_bin', y='count', color='day_category',
    facet_col='user_type',
    title='Usage by Temperature Bin and Day Category (Casual & Registered)',
    labels={'temp_bin':'Temperature Level','count':'Hourly Count'}
)
fig_temp_daycat.update_xaxes(tickangle=20)
fig_temp_daycat.show()

# ---------- E. Weather Impact (X = Peak/Off-Peak, Pattern = User Type, Color = Weather) ----------
weather_group = hour_df.groupby(['weather_label','peak_offpeak'])[['casual','registered']].mean().reset_index()
weather_melt = weather_group.melt(id_vars=['weather_label','peak_offpeak'],
                                  value_vars=['casual','registered'],
                                  var_name='user_type', value_name='count')
fig_weather = px.bar(
    weather_melt,
    x='peak_offpeak', y='count',
    color='weather_label',
    pattern_shape='user_type',
    barmode='group',
    title='Weather Impact on Usage (Peak vs Off-Peak, Pattern=User Type)',
    labels={'peak_offpeak':'Peak vs Off-Peak','count':'Average Count'}
)
fig_weather.update_layout(plot_bgcolor='#F9F9F9', paper_bgcolor='white', font=dict(size=12))
fig_weather.show()

# ---------- F. Heatmaps (Hour vs Day of Week) ----------
# Casual heatmap
heatmap_casual = hour_df.groupby(['weekday_label','hour_label'])['casual'].mean().reset_index()
fig_heat_casual = px.density_heatmap(
    heatmap_casual,
    x='hour_label', y='weekday_label', z='casual',
    color_continuous_scale='Plasma',
    title='Casual Demand (Hour vs Day of Week)',
    labels={'hour_label':'Hour of Day','weekday_label':'Day of Week','casual':'Avg Casual Demand'},
    category_orders={'hour_label': hour_order, 'weekday_label': weekday_order}
)
fig_heat_casual.show()

# Registered heatmap
heatmap_registered = hour_df.groupby(['weekday_label','hour_label'])['registered'].mean().reset_index()
fig_heat_registered = px.density_heatmap(
    heatmap_registered,
    x='hour_label', y='weekday_label', z='registered',
    color_continuous_scale='Plasma',
    title='Registered Demand (Hour vs Day of Week)',
    labels={'hour_label':'Hour of Day','weekday_label':'Day of Week','registered':'Avg Registered Demand'},
    category_orders={'hour_label': hour_order, 'weekday_label': weekday_order}
)
fig_heat_registered.show()

# ---------- G. Faceted Boxplots (Season × Day Category × User Type) ----------
melt_facet = hour_df.melt(id_vars=['season_label','temp_bin','day_category'],
                          value_vars=['casual','registered'],
                          var_name='user_type', value_name='count')
fig_facet = px.box(
    melt_facet,
    x='temp_bin', y='count', color='day_category',
    facet_col='season_label', facet_row='user_type',
    title='Usage Faceted by Season, Day Category, and User Type',
    labels={'temp_bin':'Temperature Level','count':'Bike Count'}
)
fig_facet.update_xaxes(tickangle=25)
fig_facet.show()

