In [1]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
!pip install optuna



In [3]:
import xgboost as xgb
import optuna
import torch
import torch.nn as nn
import torch.optim as optim
from xgboost import plot_importance
from matplotlib import pyplot as plt
import numpy as np
import pandas as pd
import random
import matplotlib.pyplot as plt
import time
from sklearn.metrics import mean_squared_error
from sklearn.linear_model import LinearRegression

In [4]:
# GPU or CPU
if torch.cuda.is_available():
  device = torch.device('cuda:0')
else:
  device = torch.device('cpu')
print(device)

cpu


### Data

In [54]:
data = pd.read_csv('/content/drive/MyDrive/CAI 2024_Flexible Building Space Usage Opt/Dataset/dataset_merged_r1+r2.csv')

# occupancy
data_occ_presence = data['occupant_presence [binary]'].to_numpy()
data_occ_count = data['occupant_count [number]'].to_numpy()

# energy use
data_energy_1 = data['ceiling_fan_energy [kWh]'].to_numpy()
data_energy_2 = data['lighting_energy [kWh]'].to_numpy()
data_energy_3 = data['plug_load_energy [kWh]'].to_numpy()
data_energy_4 = data['chilled_water_energy [kWh]'].to_numpy()
data_energy_5 = data['fcu_fan_energy [kWh]'].to_numpy()

# indoor data
data_t = data['air_temperature [Celsius]'].to_numpy()
data_h = data['indoor_relative_humidity [%]'].to_numpy()

# outdoor
data_outdoor_t = data['dry_bulb_temp [Celsius]'].to_numpy()
data_outdoor_h = data['outdoor_relative_humidity [%]'].to_numpy()

# time
data_month = data['Month'].to_numpy()
data_time_1 = data['Week'].to_numpy()
data_time_2 = data['Hour'].to_numpy()

In [55]:
# indoor: x1 - x3
x1 = np.reshape(data_occ_count,(-1,1))
x2 = np.reshape(data_t,(-1,1))
x3 = np.reshape(data_h,(-1,1))

# outdoor: x11 - x12
x11 = np.reshape(data_outdoor_t,(-1,1))
x12 = np.reshape(data_outdoor_h,(-1,1))


# **********************
X = np.hstack((x1,x2,x3,x11,x12))
print('X_shape\n',X.shape)
# **********************

# y1 - y3
y1 = np.reshape(data_energy_1,(-1,1))
y2 = np.reshape(data_energy_2,(-1,1))
y3 = np.reshape(data_energy_3,(-1,1))
y4 = np.reshape(data_energy_4,(-1,1))
y5 = np.reshape(data_energy_5,(-1,1))

Y = np.hstack((y1,y2,y3,y4,y5))
#print('Y_shape\n',Y.shape)

Y_sum = y1 + y2 + y3 + y4 + y5
#print('Y_sum_shape\n',Y_sum.shape)

X_shape
 (16650, 5)


In [56]:
# Training set: 0 - 60%
train_size = int(len(X) * 0.6)
train_range = train_size

# Validation set: 60% - 80%
valid_size = int(len(X) * 0.2)
valid_range = train_size + valid_size

# Testing set: 80% - 100%
test_size = int(len(X) * 0.2)
test_range = train_size + valid_size + test_size

#print(train_range)
#print(valid_range)
#print(test_range)

X_training, Y_training, Y_sum_training = X[:train_range ,:], Y[:train_range ,:], Y_sum[:train_range ,:]
print('X_training\n', X_training.shape)
#print('Y_training\n', Y_training.shape)
#print('Y_sum_training\n', Y_sum_training.shape)

X_valid, Y_valid, Y_sum_valid = X[train_range:valid_range ,:], Y[train_range:valid_range ,:], Y_sum[train_range:valid_range ,:]
print('X_valid\n', X_valid.shape)
#print('Y_valid\n', Y_valid.shape)
#print('Y_sum_valid\n', Y_sum_valid.shape)

X_test, Y_test, Y_sum_test = X[valid_range: ,:], Y[valid_range: ,:], Y_sum[valid_range: ,:]
print('X_test\n', X_test.shape)
#print('Y_test\n', Y_test.shape)
#print('Y_sum_test\n', Y_sum_test.shape)

X_training
 (9990, 5)
X_valid
 (3330, 5)
X_test
 (3330, 5)


### XGBRegressor model

In [19]:
def objective(trial):
    max_depth = trial.suggest_int('max_depth', 3, 20)
    n_estimators = trial.suggest_int('n_estimators', 100, 2000)
    learning_rate = trial.suggest_float('learning_rate', 0.01, 0.3)

    model = xgb.XGBRegressor(
    max_depth=max_depth,
    n_estimators=n_estimators,
    learning_rate=learning_rate,
    early_stopping_rounds=5,
    tree_method='hist',
    device='cpu'
    )

    try:
        model.fit(
            X_training, Y_sum_training,
            eval_set=[(X_valid, Y_sum_valid)],
            verbose=False
        )
        mse = mean_squared_error(Y_sum_valid, model.predict(X_valid))
        return mse
    except Exception as e:
        print(f"Error {e}")
        return float('inf')



In [20]:
study = optuna.create_study(direction='minimize')
study.optimize(objective, n_trials=500)

best_params_xgboost = study.best_params
print(f"Best max_depth: {best_params_xgboost.get('max_depth')}")
print(f"Best n_estimators: {best_params_xgboost.get('n_estimators')}")
print(f"Best learning_rate: {best_params_xgboost.get('learning_rate')}")

[I 2024-04-15 12:56:12,198] A new study created in memory with name: no-name-3b7a4a06-ba50-4f50-8966-2ecb0c000532
[I 2024-04-15 12:56:14,185] Trial 0 finished with value: 0.13474731127235104 and parameters: {'max_depth': 15, 'n_estimators': 1709, 'learning_rate': 0.05875220755829756}. Best is trial 0 with value: 0.13474731127235104.
[I 2024-04-15 12:56:14,733] Trial 1 finished with value: 0.12688414892920635 and parameters: {'max_depth': 12, 'n_estimators': 1714, 'learning_rate': 0.21244402780096508}. Best is trial 1 with value: 0.12688414892920635.
[I 2024-04-15 12:56:15,254] Trial 2 finished with value: 0.1282896941018968 and parameters: {'max_depth': 13, 'n_estimators': 1523, 'learning_rate': 0.29945527318387344}. Best is trial 1 with value: 0.12688414892920635.
[I 2024-04-15 12:56:15,479] Trial 3 finished with value: 0.15234269751465304 and parameters: {'max_depth': 3, 'n_estimators': 125, 'learning_rate': 0.2478979470749449}. Best is trial 1 with value: 0.12688414892920635.
[I 202

KeyboardInterrupt: 

### Neural Network

In [21]:
class SimpleNet(nn.Module):
    def __init__(self, num_layers, num_units):
        super(SimpleNet, self).__init__()
        layers = [nn.Linear(5, num_units)]
        layers += [nn.Linear(num_units, num_units) for _ in range(num_layers - 1)]
        layers.append(nn.Linear(num_units, 1))
        self.layers = nn.Sequential(*layers)

    def forward(self, x):
        for layer in self.layers:
            x = torch.relu(layer(x))
        return x


In [44]:
def objective(trial):
    num_layers = trial.suggest_int('num_layers', 1, 10)
    num_units = trial.suggest_int('num_units', 1, 30)
    lr = trial.suggest_float('lr', 1e-3, 1e-1, log=True)

    model = SimpleNet(num_layers=num_layers, num_units=num_units)
    optimizer = optim.Adam(model.parameters(), lr=lr)
    criterion = nn.MSELoss()

    x_train = torch.tensor(X_training, dtype=torch.float32)
    y_train = torch.tensor(Y_sum_training, dtype=torch.float32)
    x_val = torch.tensor(X_valid, dtype=torch.float32)
    y_val = torch.tensor(Y_valid, dtype=torch.float32)

    x_train, x_val = x_train.to(device), x_val.to(device)
    y_train, y_val = y_train.to(device), y_val.to(device)

    best_val_loss = 10000000000000000
    for epoch in range(1000):
        optimizer.zero_grad()
        output = model(x_train)
        loss = criterion(output, y_train)
        loss.backward()
        optimizer.step()

        model.eval()
        with torch.no_grad():
            val_output = model(x_val)
            val_loss = criterion(val_output, y_val)
            #print(loss.item(), val_loss)

        if val_loss < best_val_loss:
            best_val_loss = val_loss
            best_model = model
        else:
            break

    output = model(x_train)
    loss = criterion(output, y_train)
    return loss.item()

In [45]:
study = optuna.create_study(direction='minimize')
study.optimize(objective, n_trials=500)

best_params_nn = study.best_params
print(f"Best num_layers: {best_params_nn.get('num_layers')}")
print(f"Best num_units: {best_params_nn.get('num_units')}")
print(f"Best lr: {best_params_nn.get('lr')}")

[I 2024-04-15 13:07:07,354] A new study created in memory with name: no-name-9eb134a3-ea81-46db-90b9-fad01a22bfb5
[I 2024-04-15 13:07:15,223] Trial 0 finished with value: 0.1959504932165146 and parameters: {'num_layers': 4, 'num_units': 14, 'lr': 0.005268504838186188}. Best is trial 0 with value: 0.1959504932165146.
[I 2024-04-15 13:07:19,249] Trial 1 finished with value: 0.1959504932165146 and parameters: {'num_layers': 3, 'num_units': 9, 'lr': 0.0031006420707502167}. Best is trial 0 with value: 0.1959504932165146.
[I 2024-04-15 13:07:27,972] Trial 2 finished with value: 0.1959504932165146 and parameters: {'num_layers': 10, 'num_units': 3, 'lr': 0.030924005931541435}. Best is trial 0 with value: 0.1959504932165146.
[I 2024-04-15 13:07:36,815] Trial 3 finished with value: 0.1959504932165146 and parameters: {'num_layers': 6, 'num_units': 13, 'lr': 0.09357237074576524}. Best is trial 0 with value: 0.1959504932165146.
[I 2024-04-15 13:07:47,275] Trial 4 finished with value: 0.073168382048

KeyboardInterrupt: 

### Linear Regression

In [13]:
model_LR = LinearRegression()

### Main program

In [9]:
model = xgb.XGBRegressor(
    max_depth=best_params_xgboost['max_depth'],
    n_estimators=best_params_xgboost['n_estimators'],
    learning_rate=best_params_xgboost['learning_rate'],
    tree_method='hist',
    device='cpu'
)

model.fit(X_training, Y_sum_training)
prection_test = model.predict(X_test)

# MSE Loss
mse_loss = mean_squared_error(prection_test,Y_sum_test)
print('XGBoost Test loss:','%.3f'%mse_loss)


NameError: name 'best_params_xgboost' is not defined

In [60]:
model_nn = SimpleNet(num_layers=3, num_units=4)
optimizer = optim.Adam(model_nn.parameters(), lr=0.010)
criterion = nn.MSELoss()

x_train = torch.tensor(X_training, dtype=torch.float32)
y_train = torch.tensor(Y_sum_training, dtype=torch.float32)
x_val = torch.tensor(X_valid, dtype=torch.float32)
y_val = torch.tensor(Y_valid, dtype=torch.float32)

x_train, x_val = x_train.to(device), x_val.to(device)
y_train, y_val = y_train.to(device), y_val.to(device)

for epoch in range(1000):
  optimizer.zero_grad()
  output = model_nn(x_train)
  loss = criterion(output, y_train)
  loss.backward()
  optimizer.step()

  if epoch % 50 == 0:
    loss_val = criterion(model_nn(x_val), y_val)
    print('Training loss', '%.3f'%loss.item(), 'Val loss','%.3f'%loss_val.item())

prection_test = model_nn(torch.tensor(X_test, dtype=torch.float32))

# MSE Loss
mse_loss = mean_squared_error(prection_test.detach().numpy(),Y_sum_test)
print('NN Test loss:','%.3f'%mse_loss)

  return F.mse_loss(input, target, reduction=self.reduction)


Training loss 0.693 Val loss 0.297
Training loss 0.626 Val loss 0.307
Training loss 0.515 Val loss 0.368
Training loss 0.506 Val loss 0.360
Training loss 0.493 Val loss 0.421
Training loss 0.483 Val loss 0.433
Training loss 0.481 Val loss 0.462
Training loss 0.483 Val loss 0.508
Training loss 0.482 Val loss 0.417
Training loss 0.479 Val loss 0.468
Training loss 0.478 Val loss 0.474
Training loss 0.478 Val loss 0.486
Training loss 0.478 Val loss 0.445
Training loss 0.477 Val loss 0.484
Training loss 0.477 Val loss 0.492
Training loss 0.478 Val loss 0.461
Training loss 0.477 Val loss 0.487
Training loss 0.477 Val loss 0.497
Training loss 0.477 Val loss 0.484
Training loss 0.478 Val loss 0.498
NN Test loss: 0.338


In [61]:
model_LR = LinearRegression()
model_LR.fit(X_training, Y_sum_training)

y_pred = model_LR.predict(X_test)
mse_loss = mean_squared_error(Y_sum_test, y_pred)
print('LR Test loss:','%.3f'%mse_loss)

LR Test loss: 0.352
