In [1]:
import pandas as pd
from pandas import DataFrame
import numpy as np
import math
from random import random
import matplotlib.pyplot as plt
import seaborn as sns
import operator
from functools import reduce
from tqdm import tqdm
import matplotlib.animation as ani
import plotly.graph_objects as go
from sklearn.metrics import mean_absolute_error
import xgboost as xgb
from sklearn import metrics

In [2]:
np.set_printoptions(formatter={'float': lambda x: "{0:0.3f}".format(x)})
plt.rcParams["font.family"] = "Times New Roman"
plt.style.use('seaborn-whitegrid')
plt.style.use('seaborn-poster')
plt.style.use('seaborn-dark-palette')
plt.rcParams["mathtext.fontset"] = "cm"

In [9]:
def xgboost_one_step(data, coef, radar):
    predictions = []
    t_i = int(coef * len(data))
    # step over each time-step in the test set
    X = data.iloc[:,1:]
    y = data.iloc[:,0] 
    
    for i in tqdm(range(int((1-coef) * len(data)))):
        current_t = t_i + i
        # split test row into input and output columns
        
        testX = X.iloc[current_t, :]
        trainX, trainY = X[:current_t], y[:current_t]
        # fit model on history and make a prediction
        yhat = xgboost_forecast(trainX.values, trainY.values, testX.values.reshape(1,-1))
        # store forecast in list of predictions
        predictions.append(yhat)
    # estimate prediction error
    predictions = pd.DataFrame(predictions, columns = [radar])
    testY = y[(len(y) - len(predictions)):]
    error = mean_absolute_error(testY, predictions)
    r2 = metrics.r2_score(testY, predictions)
    var = metrics.explained_variance_score(testY, predictions)
    return predictions, r2, error, var

# fit an xgboost model and make a one step prediction
def xgboost_forecast(trainX, trainY, testX):
    # fit model
    model = xgb.XGBRegressor(objective='reg:squarederror', n_estimators=1000, seed = 1850)
    model.fit(trainX, trainY)
    # make a one-step prediction
    yhat = model.predict(testX)
    return yhat

def xgboost_multi(data, coef, radar):
    X = data.iloc[:,1:]
    y = data.iloc[:,0]
    trainX, testX = X[:int(coef * len(X))], X[int(coef * len(X)):]
    trainY, testY = y[:int(coef * len(y))], y[int(coef * len(y)):]
    #print(testX.values)
    predictions = xgboost_forecast(trainX.values, trainY.values, testX.values)
    predictions = pd.DataFrame(predictions, columns = [radar])
    error = mean_absolute_error(testY, predictions)
    r2 = metrics.r2_score(testY, predictions)
    var = metrics.explained_variance_score(testY, predictions)
    return predictions, r2, error, var

### intensity lags

In [10]:
coef = 0.5
radar = 'KABR'
df_forecasting = pd.read_csv('../dataset/radar/features/intensity_lags.csv')
pred_step1, r2_step1, mae_step1, var_step1 = xgboost_one_step(df_forecasting, coef, radar)
pred_multi, r2_multi, mae_multi, var_multi = xgboost_multi(df_forecasting, coef, radar)

100%|██████████| 453/453 [06:39<00:00,  1.14it/s]


In [11]:
print(r2_step1, mae_step1, var_step1)
print(r2_multi, mae_multi, var_multi)

-0.28072383057848715 1311.4090106982587 -0.28071744757910055
-0.24875734239346703 1253.096546795863 -0.24835855464725265


### lags mean

In [12]:
coef = 0.5
radar = 'KABR'
df_forecasting = pd.read_csv('../dataset/radar/features/lags_mean.csv')
pred_step1, r2_step1, mae_step1, var_step1 = xgboost_one_step(df_forecasting, coef, radar)
pred_multi, r2_multi, mae_multi, var_multi = xgboost_multi(df_forecasting, coef, radar)
print(r2_step1, mae_step1, var_step1)
print(r2_multi, mae_multi, var_multi)

100%|██████████| 414/414 [05:12<00:00,  1.32it/s]


-0.1337664803843155 1189.466164317569 -0.13335793449233346
-0.13263833001387426 1250.6226884297091 -0.13263020810109394


### weather lags (t-2) selected

In [19]:
coef = 0.5
radar = 'KABR'
df_forecasting = pd.read_csv('../dataset/radar/features/weather_lags2_selected.csv')
pred_step1, r2_step1, mae_step1, var_step1 = xgboost_one_step(df_forecasting, coef, radar)
pred_multi, r2_multi, mae_multi, var_multi = xgboost_multi(df_forecasting, coef, radar)
print(r2_step1, mae_step1, var_step1)
print(r2_multi, mae_multi, var_multi)

100%|██████████| 459/459 [35:16<00:00,  4.61s/it]


-0.49038889429053945 1181.457509366874 -0.4882807630362207
-0.4169311598972145 1144.1870198327467 -0.4167698246449918


### weather lags (t-2)

In [20]:
coef = 0.5
radar = 'KABR'
df_forecasting = pd.read_csv('../dataset/radar/features/weather_lags2.csv')
pred_step1, r2_step1, mae_step1, var_step1 = xgboost_one_step(df_forecasting, coef, radar)
pred_multi, r2_multi, mae_multi, var_multi = xgboost_multi(df_forecasting, coef, radar)
print(r2_step1, mae_step1, var_step1)
print(r2_multi, mae_multi, var_multi)

100%|██████████| 459/459 [34:31<00:00,  4.51s/it]


0.02079011908859074 956.8834620107923 0.028254088184270443
-0.6689782141176692 1112.6447770450654 -0.6509164446126239


### weather lags (t-5) selected

In [None]:
coef = 0.5
radar = 'KABR'
df_forecasting = pd.read_csv('../dataset/radar/features/weather_lags5_selected.csv')
pred_step1, r2_step1, mae_step1, var_step1 = xgboost_one_step(df_forecasting, coef, radar)
pred_multi, r2_multi, mae_multi, var_multi = xgboost_multi(df_forecasting, coef, radar)
print(r2_step1, mae_step1, var_step1)
print(r2_multi, mae_multi, var_multi)

 55%|█████▌    | 253/457 [14:01<13:06,  3.86s/it]

### weather lags (t-5)

In [None]:
coef = 0.5
radar = 'KABR'
df_forecasting = pd.read_csv('../dataset/radar/features/weather_lags5.csv')
pred_step1, r2_step1, mae_step1, var_step1 = xgboost_one_step(df_forecasting, coef, radar)
pred_multi, r2_multi, mae_multi, var_multi = xgboost_multi(df_forecasting, coef, radar)
print(r2_step1, mae_step1, var_step1)
print(r2_multi, mae_multi, var_multi)

### weather lags mean(t-2)

In [None]:
coef = 0.5
radar = 'KABR'
df_forecasting = pd.read_csv('../dataset/radar/features/weather_lags2_mean.csv')
pred_step1, r2_step1, mae_step1, var_step1 = xgboost_one_step(df_forecasting, coef, radar)
pred_multi, r2_multi, mae_multi, var_multi = xgboost_multi(df_forecasting, coef, radar)
print(r2_step1, mae_step1, var_step1)
print(r2_multi, mae_multi, var_multi)

### weather

In [16]:
coef = 0.5
radar = 'KABR'
df_forecasting = pd.read_csv('../dataset/radar/features/weather.csv')
pred_step1, r2_step1, mae_step1, var_step1 = xgboost_one_step(df_forecasting, coef, radar)
pred_multi, r2_multi, mae_multi, var_multi = xgboost_multi(df_forecasting, coef, radar)
print(r2_step1, mae_step1, var_step1)
print(r2_multi, mae_multi, var_multi)

100%|██████████| 460/460 [12:04<00:00,  1.57s/it]


-0.17458480364667306 982.0138259239143 -0.1654198033680363
-1.8034394605827542 1405.8023561708965 -1.7293858548579357
