In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from statsmodels.tsa.stattools import adfuller
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
from statsmodels.tsa.arima.model import ARIMA
from sklearn.metrics import mean_squared_error, mean_absolute_error
from math import sqrt
from statsmodels.tsa.ar_model import AutoReg

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
# Load your cleaned dataset
df = pd.read_csv('pirvision_office_dataset_clean.csv', parse_dates=['Date'])

## AR Model

Fit Model

In [None]:
p = 1
ar_model = AutoReg(occupancy_series, lags=p).fit()
print(ar_model.summary())



NameError: name 'occupancy_series' is not defined

In sample predictions

In [None]:
fitted_values = ar_model.fittedvalues


Plot Actual vs Fitted

In [None]:
import plotly.graph_objects as go

fig = go.Figure()

fig.add_trace(go.Scatter(x=occupancy_series.index, y=occupancy_series.values,
                         mode='lines', name='Actual'))
fig.add_trace(go.Scatter(x=fitted_values.index, y=fitted_values.values,
                         mode='lines', name='Fitted (AR)', line=dict(color='red')))

fig.update_layout(title='AR Model: Actual vs Fitted Occupancy',
                  xaxis_title='Date', yaxis_title='Occupancy',
                  template='plotly_dark')
fig.show()


Forecast future values

In [None]:
n_periods = 30
forecast = ar_model.predict(start=len(occupancy_series), end=len(occupancy_series)+n_periods-1, dynamic=False)

forecast_index = pd.date_range(start=occupancy_series.index[-1] + pd.Timedelta(days=1), periods=n_periods, freq='D')

fig = go.Figure()

fig.add_trace(go.Scatter(x=occupancy_series.index, y=occupancy_series.values,
                         mode='lines', name='Actual'))
fig.add_trace(go.Scatter(x=forecast_index, y=forecast,
                         mode='lines', name='Forecast (AR)', line=dict(color='green')))

fig.update_layout(title='AR Model Forecast',
                  xaxis_title='Date', yaxis_title='Occupancy',
                  template='plotly_dark')
fig.show()


##  MA Model

In [None]:
# MA(q): Only moving average part, so p=0, d=0, q=2
ma_model = ARIMA(occupancy_series, order=(0, 0, 2))
ma_result = ma_model.fit()


NameError: name 'occupancy_series' is not defined

##### in sample predictions

In [None]:
fitted_ma = pd.Series(ma_result.predict(), index=occupancy_series.index)


##### Plot Actual vs Fitted

In [None]:
fig = go.Figure()

fig.add_trace(go.Scatter(x=occupancy_series.index, y=occupancy_series.values,
                         mode='lines', name='Actual', line=dict(color='blue')))
fig.add_trace(go.Scatter(x=fitted_ma.index, y=fitted_ma.values,
                         mode='lines', name='Fitted (MA)', line=dict(color='orange')))

fig.update_layout(title='MA Model: Actual vs Fitted Occupancy',
                  xaxis_title='Date',
                  yaxis_title='Occupancy',
                  template='plotly_dark')
fig.show()


##### Accuracy matrix

In [None]:
mae = mean_absolute_error(occupancy_series, fitted_ma)
rmse = np.sqrt(mean_squared_error(occupancy_series, fitted_ma))

print(f"📊 MA Model Accuracy:\nMAE: {mae:.4f}\nRMSE: {rmse:.4f}")


📊 MA Model Accuracy:
MAE: 11740.8276
RMSE: 43881.9776


##### Forecast future values

In [None]:
n_periods = 30
forecast_ma = ma_result.get_forecast(steps=n_periods)
forecast_values = forecast_ma.predicted_mean
conf_int_ma = forecast_ma.conf_int()

forecast_index = pd.date_range(start=occupancy_series.index[-1] + pd.Timedelta(days=1),
                               periods=n_periods, freq='D')

# Plot forecast
fig = go.Figure()
fig.add_trace(go.Scatter(x=occupancy_series.index, y=occupancy_series.values,
                         mode='lines', name='Actual'))
fig.add_trace(go.Scatter(x=forecast_index, y=forecast_values,
                         mode='lines', name='Forecast (MA)', line=dict(color='green')))
fig.add_trace(go.Scatter(x=forecast_index.tolist() + forecast_index[::-1].tolist(),
                         y=conf_int_ma.iloc[:, 0].tolist() + conf_int_ma.iloc[::-1, 1].tolist(),
                         fill='toself', fillcolor='rgba(0,255,0,0.2)',
                         line=dict(color='rgba(255,255,255,0)'), name='Confidence Interval'))

fig.update_layout(title='MA Forecast of Occupancy',
                  xaxis_title='Date', yaxis_title='Occupancy', template='plotly_dark')
fig.show()


### ARMA Model

Fit Model

In [None]:
# ARMA is ARIMA with d=0 (no differencing)
arma_model = ARIMA(occupancy_series, order=(2, 0, 2))
arma_result = arma_model.fit()



Non-stationary starting autoregressive parameters found. Using zeros as starting parameters.



In sample prediction

In [None]:
fitted_arma = pd.Series(arma_result.predict(), index=occupancy_series.index)


Plot actual vs fitted

In [None]:
fig = go.Figure()

fig.add_trace(go.Scatter(x=occupancy_series.index, y=occupancy_series.values,
                         mode='lines', name='Actual', line=dict(color='blue')))
fig.add_trace(go.Scatter(x=fitted_arma.index, y=fitted_arma.values,
                         mode='lines', name='Fitted (ARMA)', line=dict(color='orange')))

fig.update_layout(title='ARMA Model: Actual vs Fitted Occupancy',
                  xaxis_title='Date',
                  yaxis_title='Occupancy',
                  template='plotly_dark')
fig.show()


Accuracy Matrix

In [None]:
mae = mean_absolute_error(occupancy_series, fitted_arma)
rmse = np.sqrt(mean_squared_error(occupancy_series, fitted_arma))

print(f"📊 ARMA Model Accuracy:\nMAE: {mae:.4f}\nRMSE: {rmse:.4f}")


📊 ARMA Model Accuracy:
MAE: 3541.6789
RMSE: 32804.7611


Forecast Future values

In [None]:
n_periods = 30
forecast_arma = arma_result.get_forecast(steps=n_periods)
forecast_values = forecast_arma.predicted_mean
conf_int_arma = forecast_arma.conf_int()

forecast_index = pd.date_range(start=occupancy_series.index[-1] + pd.Timedelta(days=1),
                               periods=n_periods, freq='D')

# Forecast plot
fig = go.Figure()
fig.add_trace(go.Scatter(x=occupancy_series.index, y=occupancy_series.values,
                         mode='lines', name='Actual'))
fig.add_trace(go.Scatter(x=forecast_index, y=forecast_values,
                         mode='lines', name='Forecast (ARMA)', line=dict(color='green')))
fig.add_trace(go.Scatter(x=forecast_index.tolist() + forecast_index[::-1].tolist(),
                         y=conf_int_arma.iloc[:, 0].tolist() + conf_int_arma.iloc[::-1, 1].tolist(),
                         fill='toself', fillcolor='rgba(0,255,0,0.2)',
                         line=dict(color='rgba(255,255,255,0)'), name='Confidence Interval'))

fig.update_layout(title='ARMA Forecast of Occupancy',
                  xaxis_title='Date', yaxis_title='Occupancy', template='plotly_dark')
fig.show()


### ARIMA MODEL

In [None]:
df['Total_Occupancy'] = df[[col for col in df.columns if 'PIR_' in col]].sum(axis=1)


In [None]:
# If 'Time' is like 'HH:MM' or 'HH:MM:SS' as a string:
df['Datetime'] = pd.to_datetime(df['Date'].dt.strftime('%Y-%m-%d') + ' ' + df['Time'])
df.set_index('Datetime', inplace=True)

In [None]:
occupancy_series = df['Total_Occupancy'].resample('H').mean()
occupancy_series = occupancy_series.fillna(0)


  occupancy_series = df['Total_Occupancy'].resample('H').mean()


In [None]:
from statsmodels.tsa.stattools import adfuller

result = adfuller(occupancy_series)
print("ADF Statistic:", result[0])
print("p-value:", result[1])


ADF Statistic: -4.348913522406622
p-value: 0.0003651143000228005


In [None]:
from pmdarima import auto_arima

model = auto_arima(occupancy_series, seasonal=False, trace=True)
model.summary()



'force_all_finite' was renamed to 'ensure_all_finite' in 1.6 and will be removed in 1.8.


'force_all_finite' was renamed to 'ensure_all_finite' in 1.6 and will be removed in 1.8.


'force_all_finite' was renamed to 'ensure_all_finite' in 1.6 and will be removed in 1.8.


'force_all_finite' was renamed to 'ensure_all_finite' in 1.6 and will be removed in 1.8.


'force_all_finite' was renamed to 'ensure_all_finite' in 1.6 and will be removed in 1.8.


'force_all_finite' was renamed to 'ensure_all_finite' in 1.6 and will be removed in 1.8.



Performing stepwise search to minimize aic
 ARIMA(2,0,2)(0,0,0)[0]             : AIC=34056.407, Time=1.34 sec
 ARIMA(0,0,0)(0,0,0)[0]             : AIC=37102.599, Time=0.03 sec
 ARIMA(1,0,0)(0,0,0)[0]             : AIC=33995.118, Time=0.08 sec



'force_all_finite' was renamed to 'ensure_all_finite' in 1.6 and will be removed in 1.8.


'force_all_finite' was renamed to 'ensure_all_finite' in 1.6 and will be removed in 1.8.


'force_all_finite' was renamed to 'ensure_all_finite' in 1.6 and will be removed in 1.8.



 ARIMA(0,0,1)(0,0,0)[0]             : AIC=36041.752, Time=0.14 sec
 ARIMA(2,0,0)(0,0,0)[0]             : AIC=33996.200, Time=0.18 sec



'force_all_finite' was renamed to 'ensure_all_finite' in 1.6 and will be removed in 1.8.


'force_all_finite' was renamed to 'ensure_all_finite' in 1.6 and will be removed in 1.8.



 ARIMA(1,0,1)(0,0,0)[0]             : AIC=33996.281, Time=0.21 sec



'force_all_finite' was renamed to 'ensure_all_finite' in 1.6 and will be removed in 1.8.



 ARIMA(2,0,1)(0,0,0)[0]             : AIC=33990.692, Time=0.88 sec



'force_all_finite' was renamed to 'ensure_all_finite' in 1.6 and will be removed in 1.8.



 ARIMA(3,0,1)(0,0,0)[0]             : AIC=33992.562, Time=0.98 sec



'force_all_finite' was renamed to 'ensure_all_finite' in 1.6 and will be removed in 1.8.



 ARIMA(1,0,2)(0,0,0)[0]             : AIC=34062.723, Time=0.33 sec



'force_all_finite' was renamed to 'ensure_all_finite' in 1.6 and will be removed in 1.8.



 ARIMA(3,0,0)(0,0,0)[0]             : AIC=33997.186, Time=0.36 sec



'force_all_finite' was renamed to 'ensure_all_finite' in 1.6 and will be removed in 1.8.



 ARIMA(3,0,2)(0,0,0)[0]             : AIC=inf, Time=2.80 sec



'force_all_finite' was renamed to 'ensure_all_finite' in 1.6 and will be removed in 1.8.



 ARIMA(2,0,1)(0,0,0)[0] intercept   : AIC=33996.175, Time=1.35 sec

Best model:  ARIMA(2,0,1)(0,0,0)[0]          
Total fit time: 8.701 seconds


0,1,2,3
Dep. Variable:,y,No. Observations:,1450.0
Model:,"SARIMAX(2, 0, 1)",Log Likelihood,-16991.346
Date:,"Wed, 28 May 2025",AIC,33990.692
Time:,11:24:20,BIC,34011.809
Sample:,08-08-2024,HQIC,33998.572
,- 10-08-2024,,
Covariance Type:,opg,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
ar.L1,1.8998,0.097,19.659,0.000,1.710,2.089
ar.L2,-0.9054,0.091,-9.956,0.000,-1.084,-0.727
ma.L1,-0.9059,0.122,-7.416,0.000,-1.145,-0.667
sigma2,8.634e+08,3.68e-10,2.35e+18,0.000,8.63e+08,8.63e+08

0,1,2,3
Ljung-Box (L1) (Q):,0.86,Jarque-Bera (JB):,6804087.81
Prob(Q):,0.35,Prob(JB):,0.0
Heteroskedasticity (H):,1.0,Skew:,1.43
Prob(H) (two-sided):,0.97,Kurtosis:,338.58


### Plot prediction vs Actual

In [None]:
import plotly.graph_objects as go

# In-sample predictions
fitted_values = pd.Series(model.predict_in_sample(), index=occupancy_series.index)

# Create Plotly figure
fig = go.Figure()

# Actual values
fig.add_trace(go.Scatter(
    x=occupancy_series.index,
    y=occupancy_series.values,
    mode='lines',
    name='Actual',
    line=dict(color='blue')
))

# Fitted values
fig.add_trace(go.Scatter(
    x=fitted_values.index,
    y=fitted_values.values,
    mode='lines',
    name='Fitted',
    line=dict(color='red')
))

# Layout customization
fig.update_layout(
    title='Actual vs Fitted Occupancy (In-Sample)',
    xaxis_title='Date',
    yaxis_title='Occupancy',
    template='plotly_dark'
)

fig.show()


### Make future forecast

In [None]:
# Step 1: Predict In-sample and Out-of-sample
n_periods = 30  # Adjust the number of future steps you want
forecast, conf_int = model.predict(n_periods=n_periods, return_conf_int=True)

# Step 2: Create time index for forecast
last_date = occupancy_series.index[-1]
forecast_index = pd.date_range(start=last_date + pd.Timedelta(days=1), periods=n_periods, freq='D')

# Step 3: Create Plotly figure
fig = go.Figure()

# Actual values
fig.add_trace(go.Scatter(x=occupancy_series.index, y=occupancy_series.values,
                         mode='lines', name='Actual Occupancy'))

# Forecasted values
fig.add_trace(go.Scatter(x=forecast_index, y=forecast,
                         mode='lines', name='Forecast', line=dict(color='green')))

# Confidence Interval (shaded area)
fig.add_trace(go.Scatter(
    x=forecast_index.tolist() + forecast_index[::-1].tolist(),
    y=conf_int[:, 0].tolist() + conf_int[::-1, 1].tolist(),
    fill='toself',
    fillcolor='rgba(0,255,0,0.2)',
    line=dict(color='rgba(255,255,255,0)'),
    name='Confidence Interval',
    showlegend=True
))

# Layout
fig.update_layout(title='ARIMA Forecast of Occupancy',
                  xaxis_title='Date',
                  yaxis_title='Occupancy',
                  template='plotly_dark')

fig.show()


'force_all_finite' was renamed to 'ensure_all_finite' in 1.6 and will be removed in 1.8.


'force_all_finite' was renamed to 'ensure_all_finite' in 1.6 and will be removed in 1.8.



### Accuracy Matrix

In [None]:
from sklearn.metrics import mean_absolute_error, mean_squared_error
import numpy as np

# --- In-sample Accuracy ---
actual_in_sample = occupancy_series
predicted_in_sample = fitted_values

mae_in = mean_absolute_error(actual_in_sample, predicted_in_sample)
rmse_in = np.sqrt(mean_squared_error(actual_in_sample, predicted_in_sample))

print("📊 In-Sample Accuracy:")
print(f"MAE: {mae_in:.4f}")
print(f"RMSE: {rmse_in:.4f}")

📊 In-Sample Accuracy:
MAE: 2733.2066
RMSE: 32979.4964


### SARIMA Model

In [None]:
from statsmodels.tsa.statespace.sarimax import SARIMAX


Fit Model

In [None]:
sarima_model = SARIMAX(occupancy_series,
                       order=(1,1,1),
                       seasonal_order=(1,1,1,7),
                       enforce_stationarity=False,
                       enforce_invertibility=False)
sarima_result = sarima_model.fit()


inspect summary

In [None]:
print(sarima_result.summary())


                                     SARIMAX Results                                     
Dep. Variable:                   Total_Occupancy   No. Observations:                 1450
Model:             SARIMAX(1, 1, 1)x(1, 1, 1, 7)   Log Likelihood              -16816.696
Date:                           Wed, 28 May 2025   AIC                          33643.392
Time:                                   11:46:52   BIC                          33669.730
Sample:                               08-08-2024   HQIC                         33653.226
                                    - 10-08-2024                                         
Covariance Type:                             opg                                         
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
ar.L1         -0.0103     31.012     -0.000      1.000     -60.793      60.773
ma.L1          0.0113     29.924      0.000

in sample prediction

In [None]:
fitted_sarima = sarima_result.get_prediction(start=occupancy_series.index[0], end=occupancy_series.index[-1])
fitted_values = fitted_sarima.predicted_mean


Plot

In [None]:
fig = go.Figure()

fig.add_trace(go.Scatter(x=occupancy_series.index, y=occupancy_series.values,
                         mode='lines', name='Actual', line=dict(color='blue')))
fig.add_trace(go.Scatter(x=fitted_values.index, y=fitted_values.values,
                         mode='lines', name='Fitted (SARIMA)', line=dict(color='orange')))

fig.update_layout(title='SARIMA Model: Actual vs Fitted Occupancy',
                  xaxis_title='Date',
                  yaxis_title='Occupancy',
                  template='plotly_dark')
fig.show()


Forecast

In [None]:
n_periods = 30
forecast_sarima = sarima_result.get_forecast(steps=n_periods)
forecast_index = pd.date_range(start=occupancy_series.index[-1] + pd.Timedelta(days=1), periods=n_periods, freq='D')

forecast_values = forecast_sarima.predicted_mean
conf_int = forecast_sarima.conf_int()

fig = go.Figure()

fig.add_trace(go.Scatter(x=occupancy_series.index, y=occupancy_series.values,
                         mode='lines', name='Actual'))
fig.add_trace(go.Scatter(x=forecast_index, y=forecast_values,
                         mode='lines', name='Forecast (SARIMA)', line=dict(color='green')))
fig.add_trace(go.Scatter(x=forecast_index.tolist() + forecast_index[::-1].tolist(),
                         y=conf_int.iloc[:, 0].tolist() + conf_int.iloc[::-1, 1].tolist(),
                         fill='toself', fillcolor='rgba(0,255,0,0.2)',
                         line=dict(color='rgba(255,255,255,0)'), name='Confidence Interval'))

fig.update_layout(title='SARIMA Forecast of Occupancy',
                  xaxis_title='Date', yaxis_title='Occupancy', template='plotly_dark')
fig.show()
