In [None]:
# Imports
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from statsmodels.tsa.stattools import adfuller
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
from sklearn.metrics import mean_absolute_error, mean_squared_error
import plotly.graph_objects as go

In [None]:
# Read in data
df = pd.read_csv("daily_steps.csv")

In [None]:
display(df)

In [None]:
# Convert to date time
df['date'] = pd.to_datetime(df['date'])
df1 = df.set_index('date')

# Aggregate monthly
monthly_steps = df1.resample('ME').mean()

In [None]:
plt.figure(figsize=(12, 6))
plt.plot(monthly_steps['steps'], linewidth=1.5,c='blue')
plt.title("Monthly steps")
plt.xlabel("Date")
plt.ylabel("Steps")
plt.show()

In [None]:
# Check for autocorrelation
plot_acf(monthly_steps)
plot_pacf(monthly_steps)
plt.show()

In [None]:
# Check for seasonality
from statsmodels.tsa.seasonal import seasonal_decompose

# Decompose the time series into trend, seasonal, and residual components
decomposition = seasonal_decompose(monthly_steps, model='additive', period = 12) 
decomposition.plot()
plt.show()

In [None]:
# From these plots, it seems to data is seasonal and trending downward. 
# Based on the residuals, a monthly cadence may not capture all the trends in the data

In [None]:
# Dickey-Fuller test
result = adfuller(weekly_steps['steps'], autolag='AIC')
p_value = result[1]
print(f'ADF Statistic: {result[0]}')
print(f'p-value: {p_value}')
print('Stationary' if p_value < 0.05 else 'Non-Stationary')

In [None]:
from statsmodels.tsa.holtwinters import ExponentialSmoothing

# Fit Holt-Winters model
model = ExponentialSmoothing(monthly_steps['steps'], trend='add', seasonal='add', seasonal_periods=12)
results = model.fit()

# Forecast for 12 months ahead
forecast_steps = 12 
holtwinters_forecast = results.forecast(steps=forecast_steps)

# Find error to make confidence intervals
forecast_se = np.std(results.resid) 

# Confidence interval at 95%
z_score = 1.96 
upper_bound = forecast + z_score * forecast_se
lower_bound = forecast - z_score * forecast_se

# Put in df
holtwinters_df = pd.DataFrame({
    'Forecast': holtwinters_forecast,
    'Upper Bound': upper_bound,
    'Lower Bound': lower_bound
})

In [None]:
display(holtwinters_df)

In [None]:
# Aggregate by month and find average
monthly_steps["Month"] = monthly_steps.index.month
monthly_avg = monthly_steps.groupby("Month")["steps"].mean()

# Generate future dates for 2025
last_date = monthly_steps.index[-1]
future_dates = pd.date_range(start=last_date + pd.DateOffset(months=1), periods=12, freq="ME")

# Extract month numbers for future dates
future_months = future_dates.month

# Create 2025 forecast using the monthly averages
average_forecast = pd.DataFrame({"steps": [monthly_avg[m] for m in future_months]}, index=future_dates)

print(average_forecast)

In [None]:
# Find shifted forecast using last 12 data points in monthly_steps (2024 data)
shifted_forecast = monthly_steps[-12:]
shifted_forecast.index = future_dates

In [None]:
shifted_forecast

In [None]:
# combine forecasting data to one df
forecast_df = pd.concat([
    holtwinters_forecast,
    average_forecast,
    shifted_forecast['steps']], 
    axis=1)

# Rename columns
forecast_df.columns = ['holtwinters', 'average', 'shifted']
print(forecast_df)

# Write to csv for use in streamlit dashboard
forecast_df.to_csv("combined_forecast.csv", index=True)

In [None]:
fig = go.Figure()

# Add step observations
fig.add_trace(go.Scatter(
    x=monthly_steps.index, 
    y=monthly_steps['steps'], 
    mode='lines', 
    name='Historical Data', 
    line=dict(color='blue')
))

# Holt Winters forecast
fig.add_trace(go.Scatter(
    x=forecast_df.index, 
    y=forecast_df['holtwinters'], 
    mode='lines', 
    name="Holt Winter's Forecast", 
    line=dict(color='red', dash='dash')
))


# Add averaged data
fig.add_trace(go.Scatter(
    x=forecast_df.index, 
    y=forecast_df['average'], 
    mode='lines', 
    name='Average by Month', 
    line=dict(color='green', dash='dash')
))

# Add 2024 shifted data
fig.add_trace(go.Scatter(
    x=forecast_df.index, 
    y=forecast_df['shifted'], 
    mode='lines', 
    name='2024 Shifted', 
    line=dict(color='orange', dash='dash')
))

# Layout settings
fig.update_layout(
    title="2025 Forecasting",
    xaxis_title="Month",
    yaxis_title="Steps",
    template="plotly_white"
)

fig.show()