# Time Series Forecasting Models

Building and comparing multiple time series models for Boston 311 service request forecasting.

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime, timedelta
import warnings
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
warnings.filterwarnings('ignore')

df = pd.read_parquet('../data/processed/daily_features.parquet')
df['date'] = pd.to_datetime(df['date'])
df = df.sort_values('date')

print("Data shape:", df.shape)
print("Date range:", df['date'].min(), "to", df['date'].max())
print("\nColumns:", df.columns.tolist())
df.head()

## Data Overview

In [None]:
plt.figure(figsize=(15, 6))
plt.plot(df['date'], df['count'], linewidth=0.8, alpha=0.8)
plt.title('Daily 311 Request Volume', fontsize=14, fontweight='bold')
plt.xlabel('Date')
plt.ylabel('Number of Requests')
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()

print("\nBasic statistics:")
print(df['count'].describe())

## Train/Test Split

Split data into training and testing sets. Use last 90 days for testing.

In [None]:
test_days = 90
train = df[:-test_days].copy()
test = df[-test_days:].copy()

print("Training set:", len(train), "days", train['date'].min(), "to", train['date'].max())
print("Test set:", len(test), "days", test['date'].min(), "to", test['date'].max())

plt.figure(figsize=(15, 6))
plt.plot(train['date'], train['count'], label='Train', linewidth=0.8)
plt.plot(test['date'], test['count'], label='Test', linewidth=0.8, color='orange')
plt.axvline(x=test['date'].min(), color='red', linestyle='--', alpha=0.5, label='Split point')
plt.title('Train/Test Split', fontsize=14, fontweight='bold')
plt.xlabel('Date')
plt.ylabel('Number of Requests')
plt.legend()
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()

In [None]:
from prophet import Prophet

train_prophet = train[['date', 'count']].rename(columns={'date': 'ds', 'count': 'y'})
test_prophet = test[['date', 'count']].rename(columns={'date': 'ds', 'count': 'y'})


prophet_model = Prophet(
    daily_seasonality=False,
    weekly_seasonality=True,
    yearly_seasonality=True,
    changepoint_prior_scale=0.05
)
prophet_model.fit(train_prophet)

future = prophet_model.make_future_dataframe(periods=len(test))
prophet_forecast = prophet_model.predict(future)

prophet_pred = prophet_forecast[['ds', 'yhat']].tail(len(test))
prophet_pred = prophet_pred.set_index('ds')['yhat']
prophet_pred.index = test['date']

prophet_mae = mean_absolute_error(test['count'], prophet_pred)
prophet_rmse = np.sqrt(mean_squared_error(test['count'], prophet_pred))
prophet_mape = np.mean(np.abs((test['count'] - prophet_pred) / test['count'])) * 100
prophet_r2 = r2_score(test['count'], prophet_pred)

print("\nProphet Performance:")
print("MAE:", prophet_mae)
print("RMSE:", prophet_rmse)
print("RÂ²:", prophet_r2)

In [None]:
plt.figure(figsize=(15, 6))
plt.plot(train['date'][-90:], train['count'][-90:], label='Train (last 90 days)', linewidth=1.5)
plt.plot(test['date'], test['count'], label='Actual', linewidth=1.5, color='green')
plt.plot(test['date'], prophet_pred, label='Prophet Forecast', linewidth=1.5, linestyle='--', color='blue')
plt.axvline(x=test['date'].min(), color='gray', linestyle='--', alpha=0.5)
plt.title('Prophet Model Predictions', fontsize=14, fontweight='bold')
plt.xlabel('Date')
plt.ylabel('Number of Requests')
plt.legend()
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()

fig = prophet_model.plot_components(prophet_forecast)
plt.tight_layout()
plt.show()