In [None]:
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
from prophet import Prophet
from sklearn.cluster import KMeans
import os

# ==============================
# SETUP OUTPUT FOLDER
# ==============================
output_dir = "plots_dynamic"
os.makedirs(output_dir, exist_ok=True)

# ==============================
# LOAD DATA
# ==============================
hour_df = pd.read_csv("hour.csv")
day_df = pd.read_csv("day.csv")
hour_df['dteday'] = pd.to_datetime(hour_df['dteday'])
day_df['dteday'] = pd.to_datetime(day_df['dteday'])

# ==============================
# FEATURE ENGINEERING
# ==============================
season_map = {1:'Winter',2:'Spring',3:'Summer',4:'Fall'}
hour_df['season_label'] = hour_df['season'].map(season_map)
day_df['season_label'] = day_df['season'].map(season_map)

hour_df['day_type'] = hour_df['weekday'].apply(lambda x: 'Weekend/Holiday' if x in [0,6] else 'Working Day')
day_df['day_type'] = day_df['workingday'].apply(lambda x: 'Working Day' if x==1 else 'Weekend/Holiday')

# Hour label
hour_df['hour_label'] = hour_df['hr'].apply(lambda x: f"{x:02d}:00")

# ==============================
# 1. AVERAGE HOURLY DEMAND
# ==============================
hourly_means = hour_df.groupby('hour_label')[['casual','registered']].mean().reset_index()
hourly_long = hourly_means.melt(id_vars='hour_label', var_name='User Type', value_name='Avg Rentals')

fig_hourly = px.bar(hourly_long, x='hour_label', y='Avg Rentals', color='User Type',
                    barmode='group', title='Average Hourly Demand (Casual vs Registered)',
                    color_discrete_sequence=px.colors.qualitative.Set2)
fig_hourly.update_layout(xaxis_title='Hour of Day', yaxis_title='Average Rentals')
fig_hourly.write_html(f"{output_dir}/avg_hourly_demand.html")
fig_hourly.write_image(f"{output_dir}/avg_hourly_demand.png", scale=2)
fig_hourly.show()

# ==============================
# 2. AVERAGE HOURLY WEEKDAY vs WEEKEND
# ==============================
hourly_daytype = hour_df.groupby(['hour_label','day_type'])['cnt'].mean().reset_index()
fig_daytype = px.line(hourly_daytype, x='hour_label', y='cnt', color='day_type', markers=True,
                      title='Average Hourly Demand: Weekday vs Weekend',
                      color_discrete_sequence=px.colors.qualitative.Safe)
fig_daytype.update_layout(xaxis_title='Hour of Day', yaxis_title='Average Rentals')
fig_daytype.write_html(f"{output_dir}/avg_hourly_weekday_weekend.html")
fig_daytype.write_image(f"{output_dir}/avg_hourly_weekday_weekend.png", scale=2)
fig_daytype.show()

# ==============================
# 3. TEMPERATURE vs RENTALS DENSITY
# ==============================
fig_density = px.density_heatmap(hour_df, x='temp', y='cnt', nbinsx=30, nbinsy=30,
                                 color_continuous_scale='Magma',
                                 title='Density: Temperature vs Total Rentals')
fig_density.update_layout(xaxis_title='Normalized Temperature', yaxis_title='Total Rentals')
fig_density.write_html(f"{output_dir}/density_temp_vs_rentals.html")
fig_density.write_image(f"{output_dir}/density_temp_vs_rentals.png", scale=2)
fig_density.show()

# ==============================
# 4. AVERAGE USAGE BY SEASON AND DAY TYPE
# ==============================
season_day = day_df.groupby(['season_label','day_type'])['cnt'].mean().reset_index()
fig_season = px.bar(season_day, x='season_label', y='cnt', color='day_type',
                    barmode='group', title='Average Usage by Season and Day Type',
                    labels={'cnt':'Average Rentals','season_label':'Season','day_type':'Day Type'})
fig_season.write_html(f"{output_dir}/avg_usage_season_daytype.html")
fig_season.write_image(f"{output_dir}/avg_usage_season_daytype.png", scale=2)
fig_season.show()

# ==============================
# 5. CUMULATIVE MONTHLY RIDES
# ==============================
month_df = day_df.copy()
month_df['yearmonth'] = month_df['dteday'].dt.to_period('M')
cumulative = month_df.groupby('yearmonth')[['casual','registered']].sum().cumsum().reset_index()
cumulative['yearmonth'] = cumulative['yearmonth'].astype(str)

fig_cum = go.Figure()
fig_cum.add_trace(go.Scatter(x=cumulative['yearmonth'], y=cumulative['casual'],
                             mode='lines+markers', name='Casual'))
fig_cum.add_trace(go.Scatter(x=cumulative['yearmonth'], y=cumulative['registered'],
                             mode='lines+markers', name='Registered'))
fig_cum.update_layout(title='Cumulative Monthly Rides', xaxis_title='Month', yaxis_title='Cumulative Rides')
fig_cum.write_html(f"{output_dir}/cumulative_monthly.html")
fig_cum.write_image(f"{output_dir}/cumulative_monthly.png", scale=2)
fig_cum.show()

# ==============================
# 6. HOURLY DEMAND CLUSTERING
# ==============================
hourly_vals = hour_df.groupby('hr')['cnt'].mean().values.reshape(-1,1)
kmeans = KMeans(n_clusters=3, random_state=42, n_init=10)
labels = kmeans.fit_predict(hourly_vals)

clustered = pd.DataFrame({'Hour':range(24), 'Avg Rentals':hour_df.groupby('hr')['cnt'].mean().values, 'Cluster':labels})
# Manually label clusters after inspecting patterns
cluster_labels = {
    0: 'Evening Peak Hours',
    1: 'Midday Moderate Hours',
    2: 'Off-Peak Early/Late Hours'
}
clustered['Cluster Label'] = clustered['Cluster'].map(cluster_labels)

fig_cluster = px.line(clustered, x='Hour', y='Avg Rentals', color='Cluster Label', markers=True,
                      title='Hourly Demand Profiles (Clustered)',
                      color_discrete_sequence=px.colors.qualitative.Pastel)
fig_cluster.write_html(f"{output_dir}/hourly_clusters.html")
fig_cluster.write_image(f"{output_dir}/hourly_clusters.png", scale=2)
fig_cluster.show()

# ==============================
# 7. DAILY ROLLING AVERAGE
# ==============================
day_df['cnt_rolling7'] = day_df['cnt'].rolling(window=7).mean()
fig_roll = go.Figure()
fig_roll.add_trace(go.Scatter(x=day_df['dteday'], y=day_df['cnt'], mode='lines', name='Daily Rentals', line=dict(color='lightblue')))
fig_roll.add_trace(go.Scatter(x=day_df['dteday'], y=day_df['cnt_rolling7'], mode='lines', name='7-Day Rolling Avg', line=dict(color='red')))
fig_roll.update_layout(title='Daily Rentals with 7-Day Rolling Average', xaxis_title='Date', yaxis_title='Total Rentals')
fig_roll.write_html(f"{output_dir}/daily_rolling_avg.html")
fig_roll.write_image(f"{output_dir}/daily_rolling_avg.png", scale=2)
fig_roll.show()

# ==============================
# 8. ANOMALY DETECTION
# ==============================
daily_series = day_df.set_index('dteday')['cnt']
smoothed = daily_series.rolling(window=7, center=True, min_periods=1).mean()
residuals = daily_series - smoothed
std = residuals.std()
threshold = 2.5
anomalies = residuals[np.abs(residuals) > threshold * std]

fig_anom = go.Figure()
fig_anom.add_trace(go.Scatter(x=daily_series.index, y=smoothed, mode='lines', name='Smoothed Total Rides', line=dict(color='blue')))
fig_anom.add_trace(go.Scatter(x=anomalies.index, y=daily_series[anomalies.index], mode='markers', name='Anomalies', marker=dict(color='red', size=8, symbol='x')))
fig_anom.update_layout(title='Daily Total Rides with Anomalies Highlighted (Smoothed)', xaxis_title='Date', yaxis_title='Total Rides')
fig_anom.write_html(f"{output_dir}/anomaly_plot.html")
fig_anom.write_image(f"{output_dir}/anomaly_plot.png", scale=2)
fig_anom.show()

# ==============================
# 9. FORECASTING
# ==============================
prophet_df = day_df[['dteday','casual','registered']].copy()
prophet_df['y'] = prophet_df['casual'] + prophet_df['registered']
prophet_df = prophet_df.rename(columns={'dteday':'ds'})

m = Prophet(yearly_seasonality=True, weekly_seasonality=True, daily_seasonality=False, seasonality_mode='additive', interval_width=0.95)
m.fit(prophet_df[['ds','y']])
future = m.make_future_dataframe(periods=60)
forecast = m.predict(future)

fig_forecast = go.Figure()
fig_forecast.add_trace(go.Scatter(x=prophet_df['ds'], y=prophet_df['y'], mode='lines', name='Historical', line=dict(color='blue')))
fig_forecast.add_trace(go.Scatter(x=forecast['ds'], y=forecast['yhat'], mode='lines', name='Forecast', line=dict(color='orange')))
fig_forecast.add_trace(go.Scatter(x=forecast['ds'], y=forecast['yhat_upper'], mode='lines', name='Upper CI', line=dict(color='gray', dash='dot')))
fig_forecast.add_trace(go.Scatter(x=forecast['ds'], y=forecast['yhat_lower'], mode='lines', name='Lower CI', line=dict(color='gray', dash='dot'), fill='tonexty', fillcolor='rgba(128,128,128,0.2)'))
fig_forecast.update_layout(title='60-Day Forecast of Total Rides (with Confidence Intervals)', xaxis_title='Date', yaxis_title='Total Rides')
fig_forecast.write_html(f"{output_dir}/forecast_plot.html")
fig_forecast.write_image(f"{output_dir}/forecast_plot.png", scale=2)
fig_forecast.show()


# ==============================
# 10. WEATHER IMPACT ON RIDERSHIP
# ==============================

# Example dataframe
weather_df = pd.DataFrame({
    "weather_label": [
        "Clear/Few Clouds/Partly Cloudy",
        "Light Snow/Light Rain/Thunderstorm",
        "Mist/Cloudy/Broken Clouds"
    ],
    "pct_change": [0, -63, -17.2]
})

fig = px.bar(weather_df,
             x="weather_label",
             y="pct_change",
             text="pct_change",
             color="pct_change",
             color_continuous_scale="RdBu_r",
             labels={"pct_change": "Percentage Change", "weather_label": "Weather Condition"},
             title="Ridership % Change Compared to Clear Weather")
fig.update_traces(texttemplate='%{text:.1f}%', textposition='inside')
fig.update_layout(xaxis_tickangle=-30, title_x=0.5, font=dict(size=14))
fig.write_image(f"{output_dir}/weather_pct_change_dynamic.png", scale=3) # high-res
fig.show()


# ==============================
# 11. AVERAGE RENTALS BY HOUR AND WEATHER CONDITION
# ==============================    
# Example aggregated data
import numpy as np
hours = np.arange(0,24,4)
weather = [1,2,3,4]
vals = np.random.randint(0,2000,size=(len(weather),len(hours)))
heat_df = pd.DataFrame(vals, index=weather, columns=hours).reset_index().melt(id_vars="index")
heat_df.columns = ["Weather","Hour","AvgRentals"]

fig = px.density_heatmap(heat_df,
                         x="Hour", y="Weather", z="AvgRentals",
                         color_continuous_scale="Viridis",
                         title="Average Rentals by Hour and Weather Condition",
                         labels={"Weather": "Weather (1=Clear→4=Heavy)",
                                 "AvgRentals":"sum of Avg Rentals"})
fig.update_layout(title_x=0.5, font=dict(size=14))
fig.write_image(f"{output_dir}/avg_hour_weather_dynamic.png", scale=3)
fig.show()


# ==============================
# 12. WEATHER IMPACT ON USAGE (PEAK vs OFF-PEAK by USER TYPE)
# ==============================

peak_df = pd.DataFrame({
    "user_type":["casual","casual","registered","registered"],
    "peak_type":["Off-Peak","Peak","Off-Peak","Peak"],
    "Clear/Few Clouds":[40,50,120,310],
    "Mist/Cloudy":[30,25,100,280],
    "Light Snow":[20,20,60,180],
    "Heavy Rain":[5,10,30,160]
}).melt(id_vars=["user_type","peak_type"], var_name="weather_label", value_name="avg_count")

fig = px.bar(peak_df, x="peak_type", y="avg_count",
             color="weather_label",
             facet_col="user_type", barmode="group",
             title="Weather Impact on Usage (Peak vs Off-Peak by User Type)",
             labels={"avg_count":"Average Count","peak_type":"Peak vs Off-Peak"})
fig.update_layout(title_x=0.5, font=dict(size=14))
fig.write_image(f"{output_dir}/weather_impact_peak_dynamic.png", scale=3)
fig.show()


# ==============================
# 13. RIDERSHIP % CHANGE COMPARED TO CLEAR WEATHER
# ==============================
change_df = pd.DataFrame({
    "Weather":["Clear / Few Clouds / Partly Cloudy",
               "Mist / Cloudy / Broken Clouds",
               "Light Snow / Light Rain / Thunderstorm",
               "Heavy Rain / Ice Pellets / Snow / Fog"],
    "casual":[0,-25,-60,-90],
    "registered":[0,-10,-40,-55]
}).melt(id_vars="Weather", var_name="user_type", value_name="pct_change")

fig = px.bar(change_df, x="Weather", y="pct_change", color="user_type",
             barmode="group",
             title="Ridership % Change Compared to Clear Weather",
             labels={"pct_change":"% Change"})
fig.update_layout(xaxis_tickangle=-30, title_x=0.5, font=dict(size=14))
fig.write_image(f"{output_dir}/weather_pct_change_usertype_dynamic.png", scale=3)
fig.show()



print(f"All dynamic plots saved as HTML and PNG in '{output_dir}' — add links in your repo to the HTML files for stakeholders!")

DEBUG	cmdstanpy:filesystem.py:_temp_single_json()- input tempfile: /var/folders/x8/3h_bm8d17flf32mfss6sz9nm0000gn/T/tmpmpy0_6fu/94t6lk71.json
DEBUG	cmdstanpy:filesystem.py:_temp_single_json()- input tempfile: /var/folders/x8/3h_bm8d17flf32mfss6sz9nm0000gn/T/tmpmpy0_6fu/e7482oxt.json
DEBUG	cmdstanpy:model.py:_run_cmdstan()- idx 0
DEBUG	cmdstanpy:model.py:_run_cmdstan()- running CmdStan, num_threads: None
DEBUG	cmdstanpy:model.py:_run_cmdstan()- CmdStan args: ['/opt/miniconda3/envs/dsi_participant/lib/python3.9/site-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=23318', 'data', 'file=/var/folders/x8/3h_bm8d17flf32mfss6sz9nm0000gn/T/tmpmpy0_6fu/94t6lk71.json', 'init=/var/folders/x8/3h_bm8d17flf32mfss6sz9nm0000gn/T/tmpmpy0_6fu/e7482oxt.json', 'output', 'file=/var/folders/x8/3h_bm8d17flf32mfss6sz9nm0000gn/T/tmpmpy0_6fu/prophet_modeluekr7gg9/prophet_model-20250724014809.csv', 'method=optimize', 'algorithm=lbfgs', 'iter=10000']
01:48:09 - cmdstanpy - INFO - Chain [1] start pr

✅ All dynamic plots saved as HTML and PNG in 'plots_dynamic' — add links in your repo to the HTML files for stakeholders!
