In [None]:
import pandas as pd
import numpy as np
import datetime
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split, cross_val_score, GridSearchCV
from sklearn.metrics import mean_squared_error, mean_absolute_error, mean_absolute_percentage_error
from sklearn.preprocessing import MinMaxScaler
from xgboost import XGBRegressor
from src.utils.functions import mkdir_if_not_exists, write_test_json

In [None]:
def descale(descaler, values):
    values_2d = np.array(values)[:, np.newaxis]
    return descaler.inverse_transform(values_2d).flatten()


In [None]:
df = pd.read_csv('data/Pecanstreet/participants_data/1min/features/661_test_30_all_features.csv')
df.tail()

In [None]:
df.shape

In [None]:
df.isnull().sum()

In [None]:
scaler = MinMaxScaler(feature_range=(-1, 1))
scaler = scaler.fit(df)
data = pd.DataFrame(scaler.transform(df), index=df.index, columns=df.columns)

In [None]:
scaler.min_[4]

In [None]:
X = data.copy()
X.drop('consumption', axis=1, inplace=True)
X.head()

In [None]:
y = data.iloc[:, 4]
y.describe().transpose()

In [None]:
n = len(X)
X_train, y_train = X[:int(0.7*n)], y[:int(0.7*n)]
X_test, y_test = X[int(0.7*n):], y[int(0.7*n):]

In [None]:
print(X.shape, y.shape)
print(X_train.shape, y_train.shape)
print(X_test.shape, y_test.shape)

In [None]:
param_grid = {
    'max_depth':[3,4,5],
    'learning_rate': [0.1, .01, .05],
    'gamma': [0, .25, 1],
    'reg_lambda': [0, 1, 10],
    'scale_pos_weight': [1, 3, 5]
}
second_param_grid = {
    'max_depth':[4],
    'learning_rate': [0.1, .5, 1],
    'gamma': [.25],
    'reg_lambda': [10, 20, 100],
    'scale_pos_weight': [3]
}


optimal_params = GridSearchCV(estimator=XGBRegressor(seed=42,
                                                          subsample=0.9,
                                                          colsample_bytree=.5),
                             param_grid=param_grid,
                             scoring='neg_mean_squared_error',
                             verbose=0,
                             n_jobs=10,
                             cv=10)

optimal_params.fit(X_train, 
                   y_train, 
                    eval_set=[(X_test, y_test)],
                  verbose=False)
print(optimal_params.best_params_)


In [None]:
xg_regressor = XGBRegressor(seed=0, gamma=1, learning_rate=0.1, max_depth=3, reg_lambda=1, scale_pos_weight=1)

In [None]:
xg_regressor.fit(X_train, y_train)

In [None]:
y_preds = xg_regressor.predict(X_test)

In [None]:
result = [{
    'model': 'XGBoost',
    'MSE': mean_squared_error(y_test, y_preds),
    'MAE': mean_absolute_error(y_test, y_preds),
    'MAPE': mean_absolute_percentage_error(y_test, y_preds)
}]
metrics = pd.DataFrame(result)
metrics

In [None]:
mkdir_if_not_exists('etc/results/Pecanstreet/single-step/661/15min/XGBoost')
write_test_json(path='etc/results/Pecanstreet/single-step/661/15min/XGBoost', 
                result=result, model='XGBoost', task='test')
mkdir_if_not_exists('etc/imgs/participants/Pecanstreet/single-step/661/15min/XGBoost')

In [None]:
test_preds = []
for preds, labels in zip(list(y_preds), y_test.to_list()):
    test_preds.append(dict(
        label=float(labels),
        model_output=float(preds)))
test_preds

In [None]:
write_test_json(path='etc/results/Pecanstreet/single-step/661/15min/XGBoost', 
                result=test_preds, model='XGBoost', task='predict')


In [None]:
descaler = MinMaxScaler(feature_range=(-1,1))
descaler.min_, descaler.scale_ = scaler.min_[4], scaler.scale_[4]
descale_preds = descale(descaler, y_preds)
descale_labels = descale(descaler, y_test)
descale_labels

In [None]:
plt.plot(descale_preds[:24])
plt.plot(descale_labels[:24])

In [None]:
plt.plot(range(0, df.consumption[int(n*0.7):].shape[0]), df.consumption[int(n*0.7):])
plt.plot(descale_labels)

In [None]:
_lable