In [None]:
import pandas as pd
import numpy as np

import itertools
import random
from math import sqrt
import matplotlib.pyplot as plt

from statsmodels.tsa.statespace.sarimax import SARIMAX
from statsmodels.tsa.stattools import adfuller
from statsmodels.graphics.tsaplots import plot_acf
from statsmodels.graphics.tsaplots import plot_pacf

from sklearn.metrics import mean_squared_error, mean_absolute_error

In [None]:
df = pd.read_csv('Jacksonville KPI 2023.csv', parse_dates=['Date of Service'], index_col = 'Date of Service')

#Differencing the 'Actual Trips' column due to a p_value greater than 0.05
df['Actual Trips Diff']=df['Actual Trips'].diff()

#Dropping the NaN values resulting from differencing
df.dropna(inplace = True)

pd.DataFrame(df)

In [None]:
#Using the Dickery-Fuller test, testing 'Actual Trips Diff' for stationary
result_diff = adfuller(df['Actual Trips Diff'])
print('ADF Statistic: %f' % result_diff[0])
print('p-value %f' % result_diff[1])

In [None]:
#Visualizing the Differenced Series
df['Actual Trips Diff'].plot(figsize=(10,6))
plt.show()

In [None]:
#Fitting the SARIMA Model for Exogenous Variable: Operating Hours 
#Best Parameters: (1, 1, 1, 0, 1, 2, 30)
#Best Parameters: (1, 0, 0, 1, 1, 2, 30)
operating_hours_series = df['Operating Hours']

sarima_model = SARIMAX(operating_hours_series,
               order = (1,0,0), #Using best parameters determined from testing
               seasonal_order = (1,1,2,30))

sarima_model_fit = sarima_model.fit(disp=False)

#Forecasting for the next 14 days
operating_hours_forecast = sarima_model_fit.forecast(steps=14)

In [None]:
#Fitting the SARIMA Model for Exogenous Variable: Turn Down Transports
#Best Parameters: (1, 0, 0, 2, 2, 1, 14)
#Best Parameters: (0, 2, 2, 2, 0, 1, 30)
turn_down_transports_series = df['Turn Down Transports']


sarima_model = SARIMAX(turn_down_transports_series,
               order = (0,2,2), #Here d=1 to indicate first differencing
               seasonal_order = (2,0,1,30))
               #exog=exog) This is the X in SARIMAX. Taking it out to use SARIMA instead
    
sarima_model_fit = sarima_model.fit(disp=False)

#Forecasting for the next 14 days
turn_down_transports_forecast = sarima_model_fit.forecast(steps=14)

In [None]:
#Using the Forecasted Exogenous Variables in Your SARIMAX Forecast
#Best Parameters from 1.: (2, 0, 2, 0, 1, 2, 30)
#BEst Parameters from 4.: (2, 2, 1, 1, 1, 2, 30) #4. Usually gets better results

#Fitting model for Actual Trips First
actual_trips = df['Actual Trips']
exog = df[['Operating Hours', 'Turn Down Transports']]

sarimax_model = SARIMAX(actual_trips,
               order = (2,2,1),
               seasonal_order = (1,1,2,30),
               exog=exog) #Comment out to test without exogenout values
    
sarimax_model_fit = sarimax_model.fit(disp=False)

In [None]:
#Combining Future Forecasts for Exogenous values into a DataFrame
future_dates = pd.date_range(start=df.index[-1] + pd.Timedelta(days=1), periods = 14, freq = 'D')
future_exog = pd.DataFrame({
    'Operation Hours': operating_hours_forecast,
    #'Units OSS': units_oos_forecast,
    'Turn Down Transports': turn_down_transports_forecast},
    index = future_dates)

In [None]:
future_exog

In [None]:
#Forecast "Actual Trips" for the next 14 days using the forecasted exogeneous variables
predicted_actual_trips = sarimax_model_fit.forecast(steps=14, exog = future_exog) #steps=14 means predicting 2 weeks ahead.

#Print Forecast
print(predicted_actual_trips)

In [None]:
#Visualizing Forecast Alongside Historical Data

#Historical Values
plt.figure(figsize=(15,6))
plt.plot(df.index, df['Actual Trips'], label='Historical Actual Trips', color = 'blue')

#Forecasted Values
fututure_dates = pd.date_range(start=df.index[-1] + pd.Timedelta(days=1), periods=14, freq = 'D')
plt.plot(future_dates, predicted_actual_trips, label = 'Forecasted Actual Trips', color = 'red')

#Setting Title and Labels
plt.legend()
plt.xlabel('Date')
plt.ylabel('Actual Trips')
plt.title('Historical and Forecasted Actual Trips')

#Save Figure
plt.savefig('Actual Trips Forecasted Trajectory.pdf', bbox_inches='tight')

plt.show()

In [None]:
#Combining forecasts and exogenous values
forecast_df = future_exog.copy()
#forecast_df['Forecasted Actual Trips'] = predicted_actual_trips

forecast_df.insert(loc=0, column = 'Forecasted Actual Trips', value=predicted_actual_trips)

forecast_df = forecast_df.round(0)

pd.DataFrame(forecast_df)

In [None]:
#Saving DataFrame to a CSV file
forecast_df.to_csv('Jacksonville KPI Forecast.csv', index=True, index_label='Date of Service')