### **Package**

In [None]:
import subprocess
import sys

try:
    from autogluon.tabular import TabularPredictor
    print("AutoGluon is already installed.")
except ImportError:
    print("AutoGluon is not installed. Installing...")
    subprocess.check_call([sys.executable, "-m", "pip", "install", "autogluon.tabular"])
    print("AutoGluon installed.")


In [None]:
from autogluon.tabular import TabularPredictor
import numpy as np
import pandas as pd
import torch
import matplotlib.pyplot as plt
from sklearn.metrics import mean_absolute_percentage_error, mean_squared_error, r2_score

In [None]:
import autogluon.tabular
print(autogluon.tabular.__version__)

# GPU or CPU
if torch.cuda.is_available():
  device = torch.device('cuda:0')
else:
  device = torch.device('cpu')
print(device)

In [None]:
from google.colab import drive
drive.mount('/content/drive')

### **1. Dataset**

In [None]:
# Data source
data = pd.read_csv('/content/drive/MyDrive/Test/Dataset/data_office_1.csv')
data['date'] = pd.to_datetime(data['timestamp'])
data.set_index('date', inplace=True)


# Find discontinuous timestamps
time_diffs = data.index.to_series().diff()
irregular_timestamps = data.index[time_diffs != pd.Timedelta(minutes=5)]
print("Discontinuous timestamps:")
print(irregular_timestamps)


# Choose the time interval of input data
data_temp1 = data.loc['2021-09-07':'2021-09-08'].copy()
data_temp2 = data.loc['2021-09-10':'2021-09-10'].copy()
data_temp3 = data.loc['2021-09-13':'2021-09-17'].copy()
data_temp4 = data.loc['2021-09-20':'2021-09-23'].copy()

# Prepare for combining the time-series data
data_temp2.index = data_temp2.index.map(lambda x: x.replace(day=9))
date_offset = pd.DateOffset(days=-3)
data_temp3.index = data_temp3.index.map(lambda x: x + date_offset)
date_offset_5_days = pd.DateOffset(days=-5)
data_temp4.index = data_temp4.index.map(lambda x: x + date_offset_5_days)

data_combined = pd.concat([data_temp1, data_temp2, data_temp3, data_temp4])
data_selected = data_combined
print("data_combined.shape:", data_combined.shape)

unique_days = data_combined.index.date
print("Total days:", pd.Series(unique_days).nunique())

In [None]:
# Create features and labels DataFrame
features = data_selected[['occupant_count [number]', 'air_temperature [Celsius]',
                          'indoor_relative_humidity [%]','dry_bulb_temp [Celsius]',
                          'outdoor_relative_humidity [%]', 'wind_speed [m/s]',
                          'global_horizontal_solar_radiation [W/m2]']].copy()
X = features.copy()
labels = data_selected[['ceiling_fan_energy [kWh]', 'chilled_water_energy [kWh]',
                        'ahu_fan_energy [kWh]']].copy()

labels['total_energy'] = labels.sum(axis=1)
Y_sum = labels['total_energy'].copy()

# Exception handling
Y_sum.loc['2021-09-10 17:25:00'] = (Y_sum.loc['2021-09-10 17:20:00'] + Y_sum.loc['2021-09-10 17:30:00'])/2


# Split data into train and test sets
train_size = 288*10
X_train, X_test = X[:train_size], X[train_size:]
Y_train, Y_test = Y_sum[:train_size], Y_sum[train_size:]
print('X_train.shape:', X_train.shape)
print('X_test.shape:', X_test.shape)
print('Y_train.shape:', Y_train.shape)
print('Y_test.shape:', Y_test.shape)

plt.figure(figsize=(10, 6))
Y_train.plot(label="Y_train")
Y_test.plot(label="Y_test")
plt.legend()
plt.title("Dataset visualization")
plt.show()

### **2. Training**

In [None]:
train_data = pd.concat([X_train, Y_train], axis=1)
predictor = TabularPredictor(label='total_energy',
                             problem_type='regression',
                             path='/content/drive/MyDrive/Test/Model_autogluon')
predictor.fit(train_data)


In [None]:
leaderboard = predictor.leaderboard()
leaderboard

### **3. Prediction and Evaluation**

In [None]:
test_data = pd.concat([X_test, Y_test], axis=1)
predictions = predictor.predict(test_data, model='NeuralNetTorch')


performance = predictor.evaluate(test_data)
print(performance, '\n')


best_model = predictor.model_best
print("Best model used by the predictor:", best_model, '\n')


# Evaluate by MAPE, R2 and RMSE
non_zero_mask = Y_test != 0
filtered_Y = Y_test[non_zero_mask]
filtered_predictions = predictions[non_zero_mask]
mape_value = mean_absolute_percentage_error(filtered_Y, filtered_predictions) * 100
print(f"MAPE: {mape_value:.3f}%")

r2_value = r2_score(Y_test, predictions)
print(f"R²: {r2_value:.3f}")

rmse_value = np.sqrt(mean_squared_error(Y_test, predictions))
print(f"RMSE: {rmse_value:.3f}")

# Plot
plt.figure(figsize=(10, 6))
plt.plot(Y_test.index, Y_test, label='Actual Values', color='blue')
plt.plot(predictions.index, predictions, label='Predicted Values', color='red')
plt.title('Y_test and Predictions')
plt.legend()