In [18]:
import pandas as pd
from warnings import simplefilter
simplefilter(action="ignore", category=pd.errors.PerformanceWarning)
import numpy as np
import os
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import KFold, train_test_split

In [19]:
import xgboost as xgb
from bayes_opt import BayesianOptimization
from sklearn.metrics import mean_squared_error

In [20]:
# Set options
pd.options.display.max_rows = 999
pd.options.display.max_columns = 999

data_raw = pd.read_csv("STAT_444_project_data_power_transformed.csv", low_memory = True)

data_raw.dropna()
data_raw.reset_index(drop=True, inplace=True)
train_raw, test_raw = train_test_split(data_raw, test_size=0.1, random_state=42)
#train_x_raw = pd.read_csv("../01-Data/X_train.csv", low_memory = True, index_col=0)
#train_y_raw = pd.read_csv("../01-Data/y_train.csv", low_memory = True, index_col=0)
#test_x_raw = pd.read_csv("../01-Data/X_test.csv", low_memory=True, index_col=0)


df_train = pd.DataFrame(train_raw).drop(["date_1","date_2", "temp2_c", "temp2_min_c",
            "wind_speed50_ave_m_s", "max_generation_mw"], axis=1)
df_test = pd.DataFrame(test_raw).drop(["date_1","date_2", "temp2_c", "temp2_min_c",
            "wind_speed50_ave_m_s", "max_generation_mw"], axis=1)

In [21]:
import numpy as np
import torch
import torch.nn as nn
import torchvision
import matplotlib.pyplot as plt
from tqdm import tqdm

In [22]:
df_z_scaled = df_train.copy()
df_tz = df_test.copy()
# apply normalization techniques 
for column in df_z_scaled.columns:
    df_z_scaled[column] = df_z_scaled[column].astype(float)
    df_z_scaled[column] = (df_z_scaled[column] -
                           df_z_scaled[column].mean()) / (df_z_scaled[column].std())

for column in df_tz.columns:
    df_tz[column] = df_tz[column].astype(float)
    df_tz[column] = (df_tz[column] -
                           df_tz[column].mean()) / (df_tz[column].std())
df_z_scaled = df_z_scaled.dropna()
df_tz = df_tz.dropna()

In [23]:
df_xtrain = df_z_scaled.drop(['total_demand_mw'], axis=1)
df_xtest = df_tz.drop(['total_demand_mw'], axis=1)
df_ytrain = df_z_scaled['total_demand_mw']
df_ytest = df_tz['total_demand_mw']



In [24]:
def xgb_cv(max_depth, learning_rate, subsample, colsample_bytree, lambd, alpha, min_child_weight, gamma, scale_pos_weight, n_estimators):
    params = {
        'device': 'cuda:0',
        'objective': 'reg:squarederror',
        'max_depth': int(max_depth),
        'learning_rate': learning_rate,
        'subsample': subsample,
        'colsample_bytree': colsample_bytree,
        'lambda': lambd,
        'alpha': alpha,
        'min_child_weight': min_child_weight,
        'gamma': gamma,
        'scale_pos_weight': scale_pos_weight
        #'n_estimators': int(n_estimators) #Comment this line out; you don't need it.
    }
    kf = KFold(n_splits=5, shuffle=True, random_state=42)
    mse_scores = []
    #dtrain = xgb.DMatrix(df_xtrain, label = df_ytrain)
    #cv_result = xgb.cv(params, dtrain, num_boost_round=int(n_estimators), early_stopping_rounds=10, nfold=5, metrics='rmse') #  <---- you need to set the number of estimators here and accept it as an arg to the function.
    #return -cv_result['test-rmse-mean'].iloc[-1]
    for train_index, test_index in kf.split(df_xtrain,df_ytrain):
        xgb_train = xgb.DMatrix(df_xtrain.iloc[train_index], label=df_ytrain.iloc[train_index])
        xgb_valid = xgb.DMatrix(df_xtrain.iloc[test_index], label=df_ytrain.iloc[test_index])
        
        watchlist = [(xgb_train, 'train'), (xgb_valid, 'eval')]

        # Add early_stopping_rounds
        model = xgb.train(params, xgb_train, num_boost_round=500, evals=watchlist, early_stopping_rounds=10, verbose_eval=False)

        # Predict using the best iteration
        preds = model.predict(xgb_valid)
        #preds = model.predict(xgb_valid, ntree_limit=(model.best_iteration + 1) * params['num_class']

        mse_score = mean_squared_error(df_ytrain.iloc[test_index], preds)
        mse_scores.append(mse_score)

    return -np.mean(mse_scores)

pbounds = {
    'max_depth': (3, 9),
    'learning_rate': (0.01, 0.5),
    'subsample': (0.1, 1),
    'colsample_bytree': (0.1, 1),
    'lambd': (0.5, 4),
    'alpha': (0, 0.5),
    'min_child_weight': (2, 10),
    'gamma': (0.1, 0.5),
    'scale_pos_weight': (1, 1),
    'n_estimators': (50, 500) # <---- You need to indicate the number of trees here
}

optimizer = BayesianOptimization(f=xgb_cv, pbounds=pbounds, random_state=100)
optimizer.maximize(init_points=10, n_iter=300) 

|   iter    |  target   |   alpha   | colsam... |   gamma   |   lambd   | learni... | max_depth | min_ch... | n_esti... | scale_... | subsample |
-------------------------------------------------------------------------------------------------------------------------------------------------
| [0m1        [0m | [0m-0.2018  [0m | [0m0.2717   [0m | [0m0.3505   [0m | [0m0.2698   [0m | [0m3.457    [0m | [0m0.01231  [0m | [0m3.729    [0m | [0m7.366    [0m | [0m421.6    [0m | [0m1.0      [0m | [0m0.6176   [0m |
| [0m2        [0m | [0m-0.2169  [0m | [0m0.4457   [0m | [0m0.2883   [0m | [0m0.1741   [0m | [0m0.8793   [0m | [0m0.1177   [0m | [0m8.872    [0m | [0m8.493    [0m | [0m127.4    [0m | [0m1.0      [0m | [0m0.3467   [0m |
| [95m3        [0m | [95m-0.1943  [0m | [95m0.2159   [0m | [95m0.946    [0m | [95m0.4271   [0m | [95m1.676    [0m | [95m0.09595  [0m | [95m5.237    [0m | [95m2.046    [0m | [95m163.6    [0m | [95m1.0    

In [25]:
optimizer.max['params']

{'alpha': 0.0,
 'colsample_bytree': 1.0,
 'gamma': 0.1,
 'lambd': 0.5,
 'learning_rate': 0.01,
 'max_depth': 7.2949433167147895,
 'min_child_weight': 10.0,
 'n_estimators': 77.28438012989156,
 'scale_pos_weight': 1.0,
 'subsample': 1.0}

In [26]:


# Best parameters from optimization
best_params = {
    'device': 'cuda:0',
    'max_depth': int(optimizer.max['params']['max_depth']),
    'gamma': optimizer.max['params']['gamma'],
    'colsample_bytree': optimizer.max['params']['colsample_bytree'],
    'subsample': optimizer.max['params']['subsample'],
    'learning_rate': optimizer.max['params']['learning_rate'],
    'lambda': optimizer.max['params']['lambd'],
    'alpha': optimizer.max['params']['alpha'],
    'min_child_weight': optimizer.max['params']['min_child_weight'],
    'objective': 'reg:squarederror',
    'eval_metric': 'rmse',
    'verbosity': 0,
    'seed': 42
}

dtrain = xgb.DMatrix(df_xtrain, label=df_ytrain)
dval = xgb.DMatrix(df_xtest, label=df_ytest)

In [27]:
evals_result = {}
bst = xgb.train(best_params, dtrain, num_boost_round=1000, evals=[(dtrain, 'train'), (dval, 'val')],
                early_stopping_rounds=10, evals_result=evals_result, verbose_eval=True)

# Evaluate and print the final training and validation loss
train_last_eval = evals_result['train']['rmse'][-1]
val_last_eval = evals_result['val']['rmse'][-1]

print(f"Training MSE Loss: {train_last_eval**2}")
print(f"Validation MSE Loss: {val_last_eval**2}")

[0]	train-rmse:0.99130	val-rmse:0.98974
[1]	train-rmse:0.98298	val-rmse:0.98223
[2]	train-rmse:0.97475	val-rmse:0.97477
[3]	train-rmse:0.96661	val-rmse:0.96742


[4]	train-rmse:0.95857	val-rmse:0.96018
[5]	train-rmse:0.95062	val-rmse:0.95307
[6]	train-rmse:0.94277	val-rmse:0.94600
[7]	train-rmse:0.93500	val-rmse:0.93911
[8]	train-rmse:0.92731	val-rmse:0.93232
[9]	train-rmse:0.91971	val-rmse:0.92558
[10]	train-rmse:0.91221	val-rmse:0.91896
[11]	train-rmse:0.90477	val-rmse:0.91251
[12]	train-rmse:0.89744	val-rmse:0.90602
[13]	train-rmse:0.89018	val-rmse:0.89968
[14]	train-rmse:0.88302	val-rmse:0.89341
[15]	train-rmse:0.87592	val-rmse:0.88729
[16]	train-rmse:0.86891	val-rmse:0.88114
[17]	train-rmse:0.86199	val-rmse:0.87516
[18]	train-rmse:0.85515	val-rmse:0.86920
[19]	train-rmse:0.84837	val-rmse:0.86343
[20]	train-rmse:0.84168	val-rmse:0.85772
[21]	train-rmse:0.83506	val-rmse:0.85215
[22]	train-rmse:0.82853	val-rmse:0.84661
[23]	train-rmse:0.82209	val-rmse:0.84115
[24]	train-rmse:0.81570	val-rmse:0.83578
[25]	train-rmse:0.80939	val-rmse:0.83052
[26]	train-rmse:0.80315	val-rmse:0.82529
[27]	train-rmse:0.79699	val-rmse:0.82013
[28]	train-rmse:0.7909