# Import libraries


In [20]:
import pandas as pd
import numpy as np
from sklearn.metrics import mean_absolute_error
from sklearn.preprocessing import StandardScaler
from scipy import stats

# Data loading and vizualisation

In [22]:
# Load and preprocess data
X = pd.read_csv("../data/X.csv")
Y = pd.read_csv("../data/Y.csv")["DispFrames"]

# Preprocessing: Remove TimeStamp and missing values
timestamp = X["TimeStamp"]
X.drop(X.columns[0:2], axis=1, inplace=True)
X.dropna(how='any', inplace=True)

# Data preparation: Standardization and Outlier Removal

In [23]:
# Standardize the target variable to have zero mean and unit variance
Y_std = StandardScaler().fit_transform(Y.values.reshape(-1, 1)).flatten()

# Function to remove outliers by keeping 99% of the data based on deviation from the mean
def remove_outliers_to_keep_99_percent(data):
    threshold = np.percentile(np.abs(data - np.mean(data)), 99)
    return data[np.abs(data - np.mean(data)) < threshold]

# Apply outlier removal to the standardized target variable
Y_filtered = remove_outliers_to_keep_99_percent(Y_std)

In [24]:
# Import AutoRegression and ARIMA models for time series forecasting
from statsmodels.tsa.ar_model import AutoReg
from statsmodels.tsa.arima.model import ARIMA

# Initialize an array to store predictions from AutoRegression (AR) models
predictions_ar = np.zeros(10)

# Train AR models with different lag values (p = 1 to 10) and make 1-step forecasts
for p in range(1,11):
  model_ar = AutoReg(Y_filtered, lags=p)
  model_result = model_ar.fit()
  # Forecasting with h = 1
  h = 1
  predictions_ar[p-1] = model_result.predict(start=len(Y_filtered), end=len(Y_filtered) + h - 1)

In [25]:
# Initialize an array to store predictions from Moving Average (MA) models
predictions_ma = np.zeros(10)

# Train MA models with different lag values (q = 1 to 10) and make 1-step forecasts
for q in range(1,11):
  model_ma = ARIMA(Y_filtered, order=(0, 1, q))
  model_result = model_ma.fit()
  # Forecasting with h = 1
  h = 1
  predictions_ma[p-1] = model_result.forecast(steps = h)

  warn('Non-invertible starting MA parameters found.'


In [26]:
# Initialize a 2D array to store predictions from ARIMA models
predictions_arima= np.zeros((10, 5))

# Train ARIMA models with different p and q values and make 1-step forecasts
for p in range(1,11):
  for q in range(1,6):
    model_arima = ARIMA(Y_filtered, order=(p, 1, q))
    model_result = model_arima.fit()
    # Forecasting with h = 1
    h = 1
    predictions_arima[p-1, q-1] = model_result.forecast(steps=h)


  warn('Non-stationary starting autoregressive parameters'


In [28]:
# Import Exponential Smoothing model for forecasting
from statsmodels.tsa.holtwinters import ExponentialSmoothing

# Initialize a dictionary to store predictions from Exponential Smoothing models
alpha_values = [0.2, 0.5, 0.8] # Different smoothing levels
predictions_exp_smoothing = {}

# Train Exponential Smoothing models for different alpha values and make 1-step forecasts
for alpha in alpha_values:
    model_exp = ExponentialSmoothing(Y_filtered, trend=None, seasonal=None, seasonal_periods=None)
    model_result = model_exp.fit(smoothing_level=alpha, optimized=False)
    # Forecasting with h = 1
    h = 1
    predictions_exp_smoothing[alpha] = model_result.forecast(steps=h)


In [30]:
# Evaluate the AR models using Mean Absolute Error (MAE)
mae_ar = [mean_absolute_error(Y_filtered[i:i+1], predictions_ar[i:i+1]) for i in range(10)]

# Evaluate the MA models using MAE
mae_ma = [mean_absolute_error(Y_filtered[i:i+1], predictions_ma[i:i+1]) for i in range(10)]

# Initialize a 2D array to store MAE values for ARIMA models
mae_arima = np.zeros((10, 5))

# Evaluate the ARIMA models using MAE
for p in range(10):
    for q in range(5):
        mae_arima[p, q] = mean_absolute_error(Y_filtered[p:p+1], predictions_arima[p, q:q+1])

# Evaluate the Exponential Smoothing models using MAE
mae_exp_smoothing = {alpha: mean_absolute_error(Y_filtered[:1], predictions_exp_smoothing[alpha][:1]) for alpha in alpha_values}

# Print the MAE results
print("MAE for AR models:", mae_ar)
print("MAE for MA models:", mae_ma)
print("MAE for ARIMA models:", mae_arima)
print("MAE for Exponential Smoothing models:", mae_exp_smoothing)

MAE for AR models: [0.17742240182076338, 0.08432339436687364, 0.08521492781973244, 0.08498932365173956, 0.08372567525345803, 0.16676584633109373, 0.08166658760240386, 0.07966405751799166, 0.07483508859319377, 0.07122926879518665]
MAE for MA models: [0.16158952802218815, 0.4113237026103098, 0.4113237026103098, 0.4113237026103098, 0.4113237026103098, 0.16158952802218815, 0.4113237026103098, 0.4113237026103098, 0.4113237026103098, 0.003603391066798911]
MAE for ARIMA models: [[0.2481054  0.24788832 0.24778862 0.24773226 0.24771131]
 [0.0020189  0.00478394 0.00471381 0.00479689 0.00187571]
 [0.0021915  0.00453618 0.00484561 0.00569196 0.00085055]
 [0.00236542 0.00495462 0.00561808 0.00369246 0.00565604]
 [0.00253777 0.0020429  0.00473139 0.00373827 0.00640393]
 [0.24697193 0.24440169 0.24465383 0.24841831 0.24361624]
 [0.00301091 0.00485523 0.00456549 0.00135353 0.00539508]
 [0.00327997 0.00529032 0.00682712 0.00322764 0.00528731]
 [0.00343875 0.00335285 0.00519759 0.0042998  0.00610393]
 [