# XGBoost Example

## Installation

### Package

In [3]:
## Install Library
#pip install forecasting-models https://pypi.org/project/forecasting-models/

In [9]:
## Load up Library
from forecasting_models import generateXGBoostForecast


### Data Exploration

In [3]:
## Sample Data

import pandas as pd
import numpy as np

df = pd.read_csv('./data/PJME_hourly.csv')


In [4]:
## Top rows of data
df.head(10)

Unnamed: 0,Datetime,PJME_MW
0,2002-12-31 01:00:00,26498.0
1,2002-12-31 02:00:00,25147.0
2,2002-12-31 03:00:00,24574.0
3,2002-12-31 04:00:00,24393.0
4,2002-12-31 05:00:00,24860.0
5,2002-12-31 06:00:00,26222.0
6,2002-12-31 07:00:00,28702.0
7,2002-12-31 08:00:00,30698.0
8,2002-12-31 09:00:00,31800.0
9,2002-12-31 10:00:00,32359.0


In [7]:
## Last date of data
df.max()

Datetime    2018-08-03 00:00:00
PJME_MW                 62009.0
dtype: object

### Format of Code

In [2]:
#forecast table, metrics = generateXGBoostForecast(
#    data,                      # The DataFrame containing your time series data.
#    date,                      # The name of the column in 'data' that contains the time component.
#    value,                     # The name of the column in 'data' that contains the value to forecast.
#    group=None,                # Optional: A tuple for filtering data by a certain group (column name, value).
#    forecast_horizon=365,      # Optional: Number of periods to forecast into the future.
#    frequency='D',             # Optional: Frequency of the time series data ('D' for daily, etc.).
#    train_size=0.8,            # Optional: Proportion of data to use for training (e.g., 0.8 for 80%).
#    make_stationary_flag=False,# Optional: Whether to apply differencing to make the series stationary.
#    remove_seasonality_flag=False, # Optional: Whether to remove seasonality from the time series.
#    hyperparameters=None,      # Optional: A dictionary of hyperparameters for the XGBoost model.
#    confidence_level=0.95      # Optional: Confidence level for the prediction intervals.
#)


#Also, 

#custom_hyperparameters = {
#    'max_depth': 5,             # Maximum depth of a tree. Increasing this value will make the model more complex and more likely to overfit.
#    'learning_rate': 0.1,       # Step size shrinkage used to prevent overfitting. Range is [0,1].
#    'n_estimators': 100,        # Number of trees you want to build.
#    'objective': 'reg:squarederror', # The loss function to be minimized.
#    'booster': 'gbtree',        # Select the type of model to run at each iteration. It can be gbtree, gblinear or dart.
#    'subsample': 0.8,           # Subsample ratio of the training instances.
#    'colsample_bytree': 0.8,    # Subsample ratio of columns when constructing each tree.
#    'colsample_bylevel': 0.8,   # Subsample ratio of columns for each level.
#    'colsample_bynode': 0.8,    # Subsample ratio of columns for each split.
#    'min_child_weight': 1,      # Minimum sum of instance weight (hessian) needed in a child.
#    'gamma': 0,                 # Minimum loss reduction required to make a further partition on a leaf node of the tree.
#    'alpha': 0,                 # L1 regularization term on weights.
#    'lambda': 1,                # L2 regularization term on weights.
#    # Add other XGBoost parameters as needed
#}

#forecast table, metrics = generateXGBoostForecast(
#    data,
#    date,
#    value,
#    hyperparameters=custom_hyperparameters
    # other parameters...
#)


In [8]:
### Example usage

In [10]:
# Example usage
# Assuming 'df' is your DataFrame, 'date' is the name of your date column, and 'value' is the name of your value column
forecast_table, metrics = generateXGBoostForecast(df, 'Datetime', 'PJME_MW', forecast_horizon=24*7, 
                                                  frequency='H', train_size=0.8, confidence_level=0.95, 
                                                  make_stationary_flag=False, remove_seasonality_flag=False)

INFO: The time series is stationary. Model results might be reliable.
INFO: Significant seasonality detected in the time series data.


In [11]:
forecast_table

Unnamed: 0,Date,Forecast,Lower_Bound,Upper_Bound
0,2018-08-03 01:00:00,35566.832031,31888.216797,41138.851562
1,2018-08-03 02:00:00,34848.035156,31169.419922,40420.054688
2,2018-08-03 03:00:00,34865.734375,31187.119141,40437.753906
3,2018-08-03 04:00:00,34782.781250,31104.166016,40354.800781
4,2018-08-03 05:00:00,35085.515625,31406.900391,40657.535156
...,...,...,...,...
163,2018-08-09 20:00:00,34671.121094,30992.505859,40243.140625
164,2018-08-09 21:00:00,33599.527344,29920.912109,39171.546875
165,2018-08-09 22:00:00,31575.371094,27896.755859,37147.390625
166,2018-08-09 23:00:00,29729.650391,26051.035156,35301.671875


In [12]:
metrics

Unnamed: 0,Model Metrics,Performance
0,RMSE,2283.305275
1,MAE,1643.021246
2,MAPE,5.139411
