In [None]:
!pip uninstall -y prophet fbprophet pystan cmdstanpy

In [None]:
pip install "numpy<2.0" prophet==1.1.5 cmdstanpy==1.1.0

In [None]:
import prophet, cmdstanpy
print("Prophet version:", prophet.__version__)
print("CmdStanPy version:", cmdstanpy.__version__)
print("CmdStan path:", cmdstanpy.cmdstan_path())
from prophet import Prophet
m = Prophet()
print("Backend attached:", hasattr(m, "stan_backend"))

In [None]:
import pandas as pd
import numpy as np
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.preprocessing import MinMaxScaler
from statsmodels.tsa.holtwinters import ExponentialSmoothing
from statsmodels.tsa.statespace.sarimax import SARIMAX

In [None]:
import warnings, signal, time, logging
warnings.filterwarnings("ignore")

In [None]:
df = pd.read_csv("/content/drive/MyDrive/Capstone project/final_dataset.csv")
df['YearMonth'] = pd.to_datetime(df['YearMonth'])
df = df.sort_values(['Park','YearMonth'])

In [None]:
df.head()

In [None]:
def calculate_metrics(actual, predicted):
    actual = np.array(actual)
    predicted = np.array(predicted)

    # Replace zeros to avoid division errors
    actual = np.where(actual == 0, 1e-8, actual)

    rmse = np.sqrt(mean_squared_error(actual, predicted))
    mae = mean_absolute_error(actual, predicted)
    mape = np.mean(np.abs((actual - predicted) / actual)) * 100
    smape = 100 * np.mean(np.abs(predicted - actual) / ((np.abs(predicted) + np.abs(actual)) / 2))
    r2 = r2_score(actual, predicted)

    # Replace invalids with 0
    metrics = {'RMSE': rmse, 'MAE': mae, 'MAPE': mape, 'SMAPE': smape, 'R2': r2}
    for k, v in metrics.items():
        if np.isnan(v) or np.isinf(v):
            metrics[k] = 0
    return metrics

In [None]:
logging.basicConfig(filename="forecast_errors.log", level=logging.WARNING,
                    format='%(asctime)s - %(levelname)s - %(message)s')
class TimeoutException(Exception): pass


def handler(signum, frame): raise TimeoutException()
signal.signal(signal.SIGALRM, handler)

In [None]:
results = []

for park,group in df.groupby('Park'):
    group = group.sort_values('YearMonth').reset_index(drop=True)
    if len(group) < 36:
        continue

    train = group.iloc[:-12]
    test = group.iloc[-12:]
    metrics_all = []


    # -------- ETS --------
    try:
        signal.alarm(15)
        start = time.time()
        ets_model = ExponentialSmoothing(train['Recreation Visits'],
                                         trend='add', seasonal='add',seasonal_periods=12).fit()
        ets_forecast = ets_model.forecast(len(test))
        signal.alarm(0)
        ets_metrics = calculate_metrics(test['Recreation Visits'], ets_forecast)
        ets_metrics['Model'] = 'ETS'
        metrics_all.append(ets_metrics)
        print(f"[ETS OK] {park} ({round(time.time()-start,2)}s)")
    except Exception as e:
        signal.alarm(0)
        logging.warning(f"{park}: ETS failed - {e}")

    #-------- SARIMA --------
    try:
        signal.alarm(20)
        start = time.time()
        sarima_model = SARIMAX(train['Recreation Visits'],
                               order=(1,1,1), seasonal_order=(1,1,1,12)).fit(disp=False)
        sarima_forecast = sarima_model.forecast(len(test))
        signal.alarm(0)
        sarima_metrics = calculate_metrics(test['Recreation Visits'], sarima_forecast)
        sarima_metrics['Model'] = 'SARIMA'
        metrics_all.append(sarima_metrics)
        print(f"[SARIMA OK] {park} ({round(time.time()-start,2)}s)")
    except Exception as e:
        signal.alarm(0)
        logging.warning(f"{park}: SARIMA failed - {e}")

    # -------- PROPHET --------
    try:
        signal.alarm(20)
        start = time.time()
        prophet_df = train.rename(columns={'YearMonth': 'ds', 'Recreation Visits': 'y'})
        prophet_model = Prophet(yearly_seasonality=True)
        prophet_model.fit(prophet_df)
        future = prophet_model.make_future_dataframe(periods=len(test), freq='MS')
        forecast = prophet_model.predict(future)
        prophet_forecast = forecast['yhat'][-len(test):].values
        signal.alarm(0)
        prophet_metrics = calculate_metrics(test['Recreation Visits'], prophet_forecast)
        prophet_metrics['Model'] = 'Prophet'
        metrics_all.append(prophet_metrics)
        print(f"[Prophet OK] {park} ({round(time.time()-start,2)}s)")
    except Exception as e:
        signal.alarm(0)
        logging.warning(f"{park}: Prophet failed - {e}")

    # to skip if it fails
    if not metrics_all:
        logging.warning(f"{park}: All models failed or timed out")
        continue


    metrics_df = pd.DataFrame(metrics_all)
    metrics_df = metrics_df.replace([np.inf, -np.inf], np.nan).fillna(0)

    for col in ['RMSE', 'MAE', 'MAPE', 'SMAPE', 'R2']:
        if col not in metrics_df.columns:
            metrics_df[col] = 0

    metrics_df['MAPE'] = metrics_df['MAPE'].clip(0, 1000)
    metrics_df['SMAPE'] = metrics_df['SMAPE'].clip(0, 1000)

    scaler = MinMaxScaler()
    for col in ['RMSE', 'MAE', 'MAPE', 'SMAPE']:
        if metrics_df[col].nunique() <= 1:
            metrics_df[col + '_norm'] = 0
        else:
            metrics_df[col + '_norm'] = scaler.fit_transform(metrics_df[[col]])

    metrics_df['Composite_Score'] = (
        0.25 * metrics_df.get('RMSE_norm', 0) +
        0.25 * metrics_df.get('MAE_norm', 0) +
        0.25 * metrics_df.get('MAPE_norm', 0) +
        0.25 * (1 - metrics_df.get('R2', 0))
    )

    best_row = metrics_df.sort_values('Composite_Score').iloc[0]
    best_model_name = best_row['Model']


    try:
        full_series = group['Recreation Visits']
        last_date = group['YearMonth'].max()

        if best_model_name == 'ETS':
            final_model = ExponentialSmoothing(full_series,
                                               trend='add', seasonal='add', seasonal_periods=12).fit()
            future_fc = final_model.forecast(12)
            forecast_dates = pd.date_range(start=last_date + pd.offsets.MonthBegin(1),
                                           periods=12, freq='MS')
            monthly_forecast = pd.DataFrame({
             'Park': park,
             'Best_Model': best_model_name,
             'Forecast_Month': forecast_dates,
             'Predicted_Visits': future_fc.values
             })
            monthly_forecast['State'] = group['State'].iloc[0]
            monthly_forecast['Region'] = group['Region'].iloc[0]

        elif best_model_name == 'SARIMA':
            final_model = SARIMAX(full_series,order=(1,1,1),seasonal_order=(1,1,1,12)).fit(disp=False)
            future_fc = final_model.forecast(12)
            forecast_dates = pd.date_range(start=last_date + pd.offsets.MonthBegin(1),periods=12,freq='MS')
            monthly_forecast = pd.DataFrame({
                'Park': park,
                'Best_Model': best_model_name,
                'Forecast_Month': forecast_dates,
                'Predicted_Visits': future_fc.values
            })
            monthly_forecast['State'] = group['State'].iloc[0]
            monthly_forecast['Region'] = group['Region'].iloc[0]

        else:  # Prophet
            prophet_df_full = group.rename(columns={'YearMonth':'ds','Recreation Visits':'y'})
            final_model = Prophet(yearly_seasonality=True)
            final_model.fit(prophet_df_full)
            future = final_model.make_future_dataframe(periods=12,freq='MS')
            fc = final_model.predict(future)
            forecast_part = fc[['ds', 'yhat']].tail(12)
            monthly_forecast = pd.DataFrame({
                'Park': park,
                'Best_Model': best_model_name,
                'Forecast_Month': forecast_part['ds'],
                'Predicted_Visits': forecast_part['yhat']
            })
            monthly_forecast['State'] = group['State'].iloc[0]
            monthly_forecast['Region'] = group['Region'].iloc[0]
        results.append(monthly_forecast)

    except Exception as e:
        logging.warning(f"{park}: Final forecast failed - {e}")
        continue



In [None]:
# --------------------------------------------
# STEP 6: COMBINE AND EXPORT MONTHLY FORECASTS
# --------------------------------------------
final_forecasts = pd.concat(results, ignore_index=True)
final_forecasts['Forecast_Month'] = pd.to_datetime(final_forecasts['Forecast_Month'])
pd.set_option('display.float_format', '{:,.0f}'.format)
# Fix: Replace negative predictions with 0 and round to nearest integer
final_forecasts['Predicted_Visits'] = (final_forecasts['Predicted_Visits'].clip(lower=0)
              .round().astype(int))

final_forecasts.to_csv("/content/drive/MyDrive/Capstone project/monthly_forecasts.csv", index=False)
print("Monthly forecasts completed and saved to 'monthly_forecasts.csv'")
display(final_forecasts.tail(15))