In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from statsmodels.tsa.statespace.sarimax import SARIMAX
from statsmodels.tsa.arima.model import ARIMA
from sklearn.metrics import mean_absolute_error, mean_squared_error
import matplotlib.pyplot as plt
from itertools import product

data = pd.read_csv('./CFS_based_feature.csv')
data['timestamp'] = pd.to_datetime(data['timestamp'])  # Ensure timestamp is datetime

SEED = 12
np.random.seed(SEED)

# Set timestamp as index
data.set_index('timestamp', inplace=True)

# Define target and exogenous features
target = data['cooling_power']
exog = data.drop(columns=['cooling_power'])

# Split the dataset into train, validation, and test sets
train_size = 0.7
val_size = 0.15

train_end = int(len(data) * train_size)
val_end = train_end + int(len(data) * val_size)

y_train, y_val, y_test = target[:train_end], target[train_end:val_end], target[val_end:]
exog_train, exog_val, exog_test = exog[:train_end], exog[train_end:val_end], exog[val_end:]

best_arimax_order = None
best_arimax_aic = float('inf')

p_values = range(1, 3)
d_values = range(1, 2)
q_values = range(1, 3)

for order in product(p_values, d_values, q_values):
    try:
        model = ARIMA(endog=y_train, exog=exog_train, order=order)
        model_fit = model.fit()
        if model_fit.aic < best_arimax_aic:
            best_arimax_aic = model_fit.aic
            best_arimax_order = order
    except Exception as e:
        continue

# Hyperparameter tuning for SARIMAX using AIC
best_sarimax_order = None
best_sarimax_seasonal_order = None
best_sarimax_aic = float('inf')

seasonal_p_values = range(0, 2)
seasonal_d_values = range(0, 2)
seasonal_q_values = range(0, 2)
seasonal_m = [24]

for order in product(p_values, d_values, q_values):
    for seasonal_order in product(seasonal_p_values, seasonal_d_values, seasonal_q_values, seasonal_m):
        try:
            model = SARIMAX(endog=y_train, exog=exog_train, order=order, seasonal_order=seasonal_order)
            model_fit = model.fit(disp=False)
            if model_fit.aic < best_sarimax_aic:
                best_sarimax_aic = model_fit.aic
                best_sarimax_order = order
                best_sarimax_seasonal_order = seasonal_order
        except Exception as e:
            continue




  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  warn('Non-stationary starting autoregressive parameters'
  warn('Non-invertible starting MA parameters found.'
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  warn('Non-invertible starting MA parameters found.'
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
 

In [None]:
ariam_true=0
if ariam_true==0:
    final_model = ARIMA(endog=pd.concat([y_train,y_val],axis=0), exog=pd.concat([exog_train,exog_val],axis=0), order=best_arimax_order)
    model_type = 'ARIMAX'
else:
    final_model = SARIMAX(endog=y_train, exog=exog_train, order=best_sarimax_order, seasonal_order=best_sarimax_seasonal_order)
    model_type = 'SARIMAX'

final_model_fit = final_model.fit()
#print(exog_test.shape, )
predictions_test = final_model_fit.forecast(steps=len(y_test), exog=exog_test)
# exog_test_aligned = exog_test.loc[y_test.index]
# predictions_test = final_model_fit.predict(
#     start=y_test.index[0],  # Start from the first timestamp in test data
#     end=y_test.index[-1],   # End at the last timestamp in test data
#     exog=exog_test_aligned
# )
#print(predictions_test)

predictions_test_1=predictions_test.reset_index(drop=True)
#print(y_test)
y_test_1=y_test.reset_index(drop=True)
plt.figure(figsize=(10, 6))
plt.plot(y_test_1[:200].values, marker='o', label='Ground Truth')
plt.plot(predictions_test_1[:200], marker='x', label='Predicted')
plt.title(f'CFS - Ground Truth vs Predicted Cooling Power ({model_type})')
plt.xlabel('Time Index')
plt.ylabel('Cooling Power')
plt.legend()
plt.show()

# Evaluate final model on test set
test_mae = mean_absolute_error(y_test, predictions_test)
test_rmse = np.sqrt(mean_squared_error(y_test, predictions_test))
test_mape = np.mean(np.abs((y_test - predictions_test) / y_test)) * 100

print(f"Test MAE: {test_mae:.2f}")
print(f"Test RMSE: {test_rmse:.2f}")
print(f"Test MAPE: {test_mape:.2f}%")

print(best_arimax_order,best_sarimax_order,best_sarimax_seasonal_order)