# How to Use XGBoost for Time-Series Forecasting?

Source: 
Nitika Sharma, How to Use XGBoost for Time-Series Forecasting?
https://www.analyticsvidhya.com/blog/2024/01/xgboost-for-time-series-forecasting/

Raghav Agrawal, Time series Forecasting: Complete Tutorial | Part-1
https://www.analyticsvidhya.com/blog/2021/07/time-series-forecasting-complete-tutorial-part-1/


## Traditional Methods for Forecasting

In [None]:
# Step 1: Load the data: 
#import numpy as np # linear algebra
import pandas as pd
import matplotlib.pyplot as plt
#from statsmodels.tsa.api import ExponentialSmoothing, SimpleExpSmoothing, Holt

df = pd.read_csv("Electric_Production.csv", header=0, index_col=0)
plt.plot(df[1:50]["Value"])
plt.xticks(rotation=30)
plt.figure(figsize = (20, 10))
plt.show()

In [None]:
# Step 2: Moving Average method
rollingseries = df[1:50].rolling(window=5)
rollingmean = rollingseries.mean() #we can compute any statistical measure
#print(rollingmean.head(10))
rollingmean.plot(color="red")
plt.show()

In [None]:
# Step 3: Simple Exponential Smoothing
data = df[1:50]
fit1 = SimpleExpSmoothing(data).fit(smoothing_level=0.2, optimized=False)
fit2 = SimpleExpSmoothing(data).fit(smoothing_level=0.8, optimized=False)
plt.figure(figsize=(18, 8))
plt.plot(df[1:50], marker='o', color="black")
plt.plot(fit1.fittedvalues, marker="o", color="b")
plt.plot(fit2.fittedvalues, marker="o", color="r")
plt.xticks(rotation="vertical")
plt.show()

In [None]:
# Step 4: Holt method for exponential smoothing
fit1 = Holt(data).fit()  #linear trend
fit2 = Holt(data, exponential=True).fit()  #exponential trend
plt.plot(data, marker='o', color='black')
plt.plot(fit1.fittedvalues, marker='o', color='b')
plt.plot(fit2.fittedvalues, marker='o', color='r')
plt.xticks(rotation="vertical")
plt.show()

In [None]:
# Decomposition and stationarity check practicals

# Step 1: Load dataset
from statsmodels.tsa.seasonal import seasonal_decompose
from dateutil.parser import parse
import pandas as pd
DrugSalesData = pd.read_csv('TimeSeries.csv', parse_dates=['Date'], index_col='Date')
DrugSalesData.reset_index(inplace=True)
import matplotlib.pyplot as plt
plt.rcParams.update({'figure.figsize': (10,6)})
plt.plot(DrugSalesData['Value'])

In [None]:
# Step 2: Decomposition of time-series data

# Additive Decomposition
add_result = seasonal_decompose(DrugSalesData['Value'], model='additive',period=1)
# Multiplicative Decomposition 
mul_result = seasonal_decompose(DrugSalesData['Value'], model='multiplicative',period=1)

In [None]:
add_result.plot().suptitle('nAdditive Decompose', fontsize=12)
plt.show()

In [None]:
new_df_add = pd.concat([add_result.seasonal, add_result.trend, add_result.resid, add_result.observed], axis=1)
new_df_add.columns = ['seasoanilty', 'trend', 'residual', 'actual_values']
new_df_add.head()

In [None]:
# Step 3: ADfuller test for stationary
from statsmodels.tsa.stattools import adfuller
adfuller_result = adfuller(DrugSalesData.Value.values, autolag='AIC')
print(f'ADF Statistic: {adfuller_result[0]}')
print(f'p-value: {adfuller_result[1]}')
for key, value in adfuller_result[4].items():
    print('Critial Values:')
    print(f'   {key}, {value}')

## XGBoost Regressor for Forecasting 

In [None]:
# Load data:
df = pd.read_csv("Electric_Production.csv", header=0, index_col=0)
plt.plot(df[1:50]["Value"])
plt.xticks(rotation=30)
plt.figure(figsize = (20, 10))
plt.show()

In [None]:
original_data = df.copy()

In [None]:
# Creating lag features for time-series data

def create_lag_features(data, lag_steps=1):

    for i in range(1, lag_steps + 1):

        data[f'lag_{i}'] = data['target'].shift(i)

    return data

# Applying lag feature creation to the dataset

lagged_data = create_lag_features(original_data, lag_steps=3)

In [None]:
# Creating rolling mean for time-series data

def create_rolling_mean(data, window_size=3):

    data['rolling_mean'] = data['target'].rolling(window=window_size).mean()

    return data

# Applying rolling mean to the dataset

rolled_data = create_rolling_mean(original_data, window_size=5)

In [None]:
# Applying Fourier transformation for capturing seasonality

from scipy.fft import fft

def apply_fourier_transform(data):

    values = data['target'].values

    fourier_transform = fft(values)

    data['fourier_transform'] = np.abs(fourier_transform)

    return data

# Applying Fourier transformation to the dataset

fourier_data = apply_fourier_transform(original_data)

In [None]:
# Splitting time-series data into training and testing sets

train_size = int(len(data) * 0.8)

train_data, test_data = data[:train_size], data[train_size:]

In [None]:
X_train, y_train = train_data, test_data

In [None]:
# Hyperparameter tuning using grid search

from xgboost import XGBRegressor
from sklearn.model_selection import GridSearchCV

param_grid = {

    'learning_rate': [0.01, 0.1, 0.2],

    'max_depth': [3, 5, 7],

    'subsample': [0.8, 0.9, 1.0]

}

grid_search = GridSearchCV(XGBRegressor(), param_grid, cv=3)

grid_search.fit(X_train, y_train)

best_params = grid_search.best_params_

xgb_model = XGBRegressor(**best_params)

In [None]:
# Training the XGBoost model

xgb_model.fit(X_train, y_train)