In [3]:
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score

# =============================================
# 1. Load your daily dataset
# =============================================
day_df = pd.read_csv("day.csv")
day_df['dteday'] = pd.to_datetime(day_df['dteday'])

# =============================================
# 2. Prepare data for forecast and anomalies
# =============================================
daily_totals = day_df[['dteday', 'cnt']].sort_values('dteday')
daily_totals.set_index('dteday', inplace=True)

# --- Simple anomaly detection using IQR
q1 = daily_totals['cnt'].quantile(0.25)
q3 = daily_totals['cnt'].quantile(0.75)
iqr = q3 - q1
lower = q1 - 1.5*iqr
upper = q3 + 1.5*iqr
anomalies = daily_totals[(daily_totals['cnt'] < lower) | (daily_totals['cnt'] > upper)]

# --- Simple rolling mean forecast
rolling_window = 30
forecast_values = daily_totals['cnt'].rolling(window=rolling_window, min_periods=1).mean()
forecast_dates = daily_totals.index

# =============================================
# 3. Plot: Forecast
# =============================================
fig_forecast = go.Figure()
fig_forecast.add_trace(go.Scatter(x=daily_totals.index, y=daily_totals['cnt'],
                                  mode='lines', name='Historical', line=dict(color='blue')))
fig_forecast.add_trace(go.Scatter(x=forecast_dates, y=forecast_values,
                                  mode='lines', name='Forecast (30-day rolling avg)',
                                  line=dict(color='red', dash='dash')))
fig_forecast.update_layout(title='30-Day Rolling Forecast of Total Rides',
                           xaxis_title='Date', yaxis_title='Rides',
                           template='plotly_white')
fig_forecast.show()

# =============================================
# 4. Plot: Anomalies
# =============================================
fig_anomaly = go.Figure()
fig_anomaly.add_trace(go.Scatter(x=daily_totals.index, y=daily_totals['cnt'],
                                 mode='lines', name='Total Rides', line=dict(color='blue')))
fig_anomaly.add_trace(go.Scatter(x=anomalies.index, y=anomalies['cnt'],
                                 mode='markers', name='Anomalies',
                                 marker=dict(color='red', size=10, symbol='x')))
fig_anomaly.update_layout(title='Daily Total Rides with Anomalies Highlighted',
                          xaxis_title='Date', yaxis_title='Total Rides',
                          template='plotly_white')
fig_anomaly.show()

# =============================================
# 5. Plot: Ridership % Change by Weather
# =============================================
weather_map = {
    1:'Clear/Few Clouds/Partly Cloudy',
    2:'Mist/Cloudy/Broken Clouds',
    3:'Light Snow/Light Rain/Thunderstorm',
    4:'Heavy Rain/Ice Pellets/Snow/Fog'
}
day_df['weather_label'] = day_df['weathersit'].map(weather_map)

base_casual = day_df[day_df['weathersit']==1]['casual'].mean()
base_reg = day_df[day_df['weathersit']==1]['registered'].mean()

weather_stats = day_df.groupby('weather_label')[['casual','registered']].mean().reset_index()
weather_stats['Casual % Change'] = (weather_stats['casual'] - base_casual)/base_casual*100
weather_stats['Registered % Change'] = (weather_stats['registered'] - base_reg)/base_reg*100

weather_long = weather_stats.melt(id_vars='weather_label',
                                  value_vars=['Casual % Change','Registered % Change'],
                                  var_name='User Type', value_name='% Change')

fig_weather = px.bar(weather_long, x='weather_label', y='% Change', color='User Type',
                     barmode='group', title='Ridership % Change Compared to Clear Weather',
                     color_discrete_map={'Casual % Change':'#1f77b4','Registered % Change':'#ff7f0e'})
fig_weather.update_layout(xaxis_tickangle=-45, template='plotly_white')
fig_weather.show()

# =============================================
# 6. Extra: Features for Linear Regression & RF
# =============================================
# Example features: season, weathersit, temp, atemp, hum, windspeed
X = day_df[['season','weathersit','temp','atemp','hum','windspeed']]
y = day_df['cnt']

# --- Train/Test split
train_size = int(0.8 * len(day_df))
X_train, X_test = X.iloc[:train_size], X.iloc[train_size:]
y_train, y_test = y.iloc[:train_size], y.iloc[train_size:]

# --- Linear Regression
lr = LinearRegression()
lr.fit(X_train, y_train)
y_pred_lr = lr.predict(X_test)

# --- Random Forest
rf = RandomForestRegressor(n_estimators=100, random_state=42)
rf.fit(X_train, y_train)
y_pred_rf = rf.predict(X_test)

# =============================================
# 7. Plot: Feature Importance (Random Forest)
# =============================================
importances = rf.feature_importances_
feat_imp = pd.DataFrame({'Feature': X.columns, 'Importance': importances}).sort_values('Importance', ascending=False)

fig_feat_imp = px.bar(feat_imp, x='Feature', y='Importance', color='Importance',
                      title='Random Forest Feature Importances', color_continuous_scale='Viridis')
fig_feat_imp.update_layout(template='plotly_white')
fig_feat_imp.show()

# =============================================
# 8. Plot: Actual vs Predicted (Linear Regression)
# =============================================
fig_lr = go.Figure()
fig_lr.add_trace(go.Scatter(x=y_test, y=y_pred_lr, mode='markers', name='Predictions',
                            marker=dict(color='#1f77b4')))
fig_lr.add_trace(go.Scatter(x=[y_test.min(), y_test.max()], y=[y_test.min(), y_test.max()],
                            mode='lines', name='Perfect Fit', line=dict(color='red', dash='dash')))
fig_lr.update_layout(title='Linear Regression: Actual vs Predicted',
                     xaxis_title='Actual Rides', yaxis_title='Predicted Rides',
                     template='plotly_white')
fig_lr.show()

# =============================================
# 9. Plot: Residuals (Linear Regression)
# =============================================
residuals = y_test - y_pred_lr
fig_resid = px.scatter(x=y_pred_lr, y=residuals, title='Residual Plot (Linear Regression)',
                       labels={'x':'Predicted Rides','y':'Residuals'})
fig_resid.add_hline(y=0, line_dash='dash', line_color='red')
fig_resid.update_layout(template='plotly_white')
fig_resid.show()

In [12]:
import pandas as pd
from prophet import Prophet
import plotly.graph_objects as go

# -------------------------------
# 1. Load & prepare data
# -------------------------------
df = pd.read_csv('day.csv')
df['dteday'] = pd.to_datetime(df['dteday'])
df = df[['dteday','casual','registered']].copy()
df['y'] = df['casual'] + df['registered']
df = df.rename(columns={'dteday':'ds'})

# -------------------------------
# 2. Fit Prophet model
# -------------------------------
m = Prophet(
    yearly_seasonality=True,
    weekly_seasonality=True,
    daily_seasonality=False,
    seasonality_mode='additive',
    interval_width=0.95
)
m.fit(df[['ds','y']])

# -------------------------------
# 3. Forecast for 60 days
# -------------------------------
future = m.make_future_dataframe(periods=60)
forecast = m.predict(future)

# -------------------------------
# 4. Plot with Plotly (customized for management)
# -------------------------------
fig = go.Figure()

# Historical
fig.add_trace(go.Scatter(
    x=df['ds'], y=df['y'],
    mode='lines',
    name='Historical',
    line=dict(color='blue', width=2)
))

# Forecast
fig.add_trace(go.Scatter(
    x=forecast['ds'], y=forecast['yhat'],
    mode='lines',
    name='Forecast',
    line=dict(color='orange', width=2)
))

# Upper CI
fig.add_trace(go.Scatter(
    x=forecast['ds'], y=forecast['yhat_upper'],
    mode='lines',
    name='Upper CI',
    line=dict(color='gray', dash='dot'),
    showlegend=True
))

# Lower CI
fig.add_trace(go.Scatter(
    x=forecast['ds'], y=forecast['yhat_lower'],
    mode='lines',
    name='Lower CI',
    line=dict(color='gray', dash='dot'),
    fill='tonexty',  # fill area between upper and lower
    fillcolor='rgba(128,128,128,0.2)',
    showlegend=True
))

fig.update_layout(
    title="60-Day Forecast of Total Rides (with Confidence Intervals)",
    xaxis_title="Date",
    yaxis_title="Total Rides",
    template="plotly_white",
    legend=dict(yanchor="top", y=0.99, xanchor="left", x=0.01)
)

fig.show()

DEBUG	cmdstanpy:filesystem.py:_temp_single_json()- input tempfile: /var/folders/x8/3h_bm8d17flf32mfss6sz9nm0000gn/T/tmpxhwe8p_7/yxk_3rae.json
DEBUG	cmdstanpy:filesystem.py:_temp_single_json()- input tempfile: /var/folders/x8/3h_bm8d17flf32mfss6sz9nm0000gn/T/tmpxhwe8p_7/ol94hbqx.json
DEBUG	cmdstanpy:model.py:_run_cmdstan()- idx 0
DEBUG	cmdstanpy:model.py:_run_cmdstan()- running CmdStan, num_threads: None
DEBUG	cmdstanpy:model.py:_run_cmdstan()- CmdStan args: ['/opt/miniconda3/envs/dsi_participant/lib/python3.9/site-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=67907', 'data', 'file=/var/folders/x8/3h_bm8d17flf32mfss6sz9nm0000gn/T/tmpxhwe8p_7/yxk_3rae.json', 'init=/var/folders/x8/3h_bm8d17flf32mfss6sz9nm0000gn/T/tmpxhwe8p_7/ol94hbqx.json', 'output', 'file=/var/folders/x8/3h_bm8d17flf32mfss6sz9nm0000gn/T/tmpxhwe8p_7/prophet_model1072y6gi/prophet_model-20250723221313.csv', 'method=optimize', 'algorithm=lbfgs', 'iter=10000']
22:13:13 - cmdstanpy - INFO - Chain [1] start pr