In [51]:
import pandas as pd
from pandas import DataFrame
import numpy as np
import math
from random import random
import matplotlib.pyplot as plt
import seaborn as sns
import operator
from functools import reduce
from tqdm import tqdm
import matplotlib.animation as ani
import plotly.graph_objects as go
from sklearn.metrics import mean_absolute_error
import xgboost as xgb
from sklearn import metrics

In [52]:
np.set_printoptions(formatter={'float': lambda x: "{0:0.3f}".format(x)})
plt.rcParams["font.family"] = "Times New Roman"
plt.style.use('seaborn-whitegrid')
plt.style.use('seaborn-poster')
plt.style.use('seaborn-dark-palette')
plt.rcParams["mathtext.fontset"] = "cm"

In [55]:
def xgboost_one_step(data, coef, radar):
    predictions = []
    t_i = int(coef * len(data))
    # step over each time-step in the test set
    X = data.iloc[:,1:]
    y = data.iloc[:,0] 
    
    for i in tqdm(range(int((1-coef) * len(data)))):
        current_t = t_i + i
        # split test row into input and output columns
        
        testX = X.iloc[current_t, :]
        trainX, trainY = X[:current_t], y[:current_t]
        # fit model on history and make a prediction
        yhat = xgboost_forecast(trainX.values, trainY.values, testX.values.reshape(1,-1))
        # store forecast in list of predictions
        predictions.append(yhat)
    # estimate prediction error
    predictions = pd.DataFrame(predictions, columns = [radar])
    trainY = y[(len(y) - int(coef * len(data))):]
    error = mean_absolute_error(trainY, predictions)
    r2 = metrics.r2_score(trainY, predictions)
    return error, predictions, r2

# fit an xgboost model and make a one step prediction
def xgboost_forecast(trainX, trainY, testX):
    # fit model
    model = xgb.XGBRegressor(objective='reg:squarederror', n_estimators=1000, seed = 1850)
    model.fit(trainX, trainY)
    # make a one-step prediction
    yhat = model.predict(testX)
    return yhat

def xgboost_multistep(data, coef, radar):
    X = data.iloc[:,1:]
    y = data.iloc[:,0]
    trainX, testX = X[:int(coef * len(X))], X[int(coef * len(X)):]
    trainY, testY = y[:int(coef * len(y))], y[int(coef * len(y)):]
    #print(testX.values)
    predictions = xgboost_forecast(trainX.values, trainY.values, testX.values)
    predictions = pd.DataFrame(predictions, columns = [radar])
    trainY = y[(len(y) - int(coef * len(data))):]
    error = mean_absolute_error(testY, predictions)
    r2 = metrics.r2_score(testY, predictions)
    return error, predictions, r2

### intensity lags

In [56]:
coef = 0.5
radar = 'KABR'
df_forecasting = pd.read_csv('../dataset/radar/features/intensity_lags.csv')
mae_step1, pred_step1, r2_step1 = xgboost_one_step(df_forecasting, coef, radar)
mae_multi, pred_multi, r2_multi = xgboost_multistep(df_forecasting, coef, radar)

100%|██████████| 453/453 [16:02<00:00,  2.12s/it]


(1311.4090106982587,             KABR
0     631.985962
1     176.050400
2      50.815929
3    3910.911865
4      96.018303
..           ...
448    74.550598
449  -485.779053
450  2538.199707
451   561.585144
452  -305.522736

[453 rows x 1 columns])
(1253.096546795863,             KABR
0     631.985962
1     135.722412
2     135.344498
3    1620.561157
4     140.604431
..           ...
449  -121.376106
450  2150.363037
451  1938.116943
452   123.362717
453  1777.856445

[454 rows x 1 columns], -0.24875734239346703)
