In [1]:
import cupy as cp
import numpy as np
import pandas as pd
import time
import cudf as cf
import cuml as ml

from sklearn.model_selection import KFold, train_test_split, StratifiedKFold
from sklearn.ensemble import RandomForestRegressor, AdaBoostRegressor, ExtraTreesRegressor
from sklearn.metrics import accuracy_score, mean_absolute_error, mean_squared_error
from sklearn.linear_model import Ridge, Lasso, ElasticNet
from sklearn.preprocessing import StandardScaler, MinMaxScaler, RobustScaler
from evaluation_metrics import compute_metrics, compute_metrics_csv, mean_absolute_percentage_error, symetric_mean_absolute_percentage_error

from statsmodels.tsa.stattools import acf, pacf, ccf, ccovf
from statsmodels.tsa.seasonal import STL
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf 
from statsmodels.tsa.stattools import adfuller, kpss
from scipy.stats import boxcox, yeojohnson
from scipy.special import inv_boxcox
import rstl

from time import perf_counter



In [2]:
def cuda_test_batch():
    print(cp.__version__)
    print(cf.__version__)
    print(ml.__version__)

    s = time.time()
    x_cpu = np.ones((1000,1000,1000))
    e = time.time()
    print(e - s)

    s = time.time()
    x_gpu = cp.ones((1000,1000,1000))
    cp.cuda.Stream.null.synchronize()
    e = time.time()
    print(e - s)
    
    ### Numpy and CPU
    s = time.time()
    x_cpu *= 5
    x_cpu *= x_cpu
    x_cpu += x_cpu
    e = time.time()
    print(e - s)

    ### CuPy and GPU
    s = time.time()
    x_gpu *= 5
    x_gpu *= x_gpu
    x_gpu += x_gpu
    cp.cuda.Stream.null.synchronize()
    e = time.time()
    print(e - s)


In [3]:
cuda_test_batch()

7.6.0
0.15.0a+1827.g585d94c21
0.15.0a+772.gade1b044c
2.1061341762542725
2.0452466011047363
2.7399961948394775
0.2405083179473877


In [4]:
def fill_seasonal(month, day, hour, seasonal_dict):
    season = seasonal_dict[month]
    return season[(season.Day_of_week == day) & (season.Hour == hour)].Seasonal.values[0]

In [34]:
n_diff=24
df = cf.read_csv('data/ppnet_metar_v7.csv',  sep=';', index_col=0)
# TODO: Odkomentovat pro repeat datasetu 10x
# df = cf.concat([df]*10, ignore_index=True)
#     start = perf_counter()
df = df[df.Year < 2019]
df['TestSet'] = 0
df.loc[df.Year == 2018, 'TestSet'] = 1
X, y = df.drop('Consumption', axis=1), df.Consumption

y_diff = y.diff(n_diff).dropna()
y_diff_index = y_diff.index
X['Consumption_diff_1'] = y_diff
lags = cp.array([24, 25, 35, 36, 37, 47, 48, 49, 71,72,73, 95, 96, 97, 119, 120, 121, 143,144,145, 168, 167, 169])
lags_arr = cp.arange(n_diff, 24)
lags_con = cp.concatenate((lags, lags_arr))

for x in lags:
    X[f'Consumption_lag_{x}'] = y.shift(x)
    X[f'Temperature_lag_{x}'] = X['Temperature'].shift(x)
    X[f'Consumption_diff_1_lag_{x}'] = X['Consumption_diff_1'].shift(x)
    X[f'Humidity_lag_{x}'] = X['Humidity'].shift(x)
    X[f'Cena_lag_{x}'] = X['Cena_bfill'].shift(x)
    X[f'Windspeed_lag_{x}'] = X['Wind_speed'].shift(x)
    X[f'Pressure_lag_{x}'] = X['Pressure'].shift(x)

X['Day_of_week_sin'] = cp.sin(2 * cp.pi * X['Day_of_week']/7.0)
X['Day_of_week_cos'] = cp.cos(2 * cp.pi * X['Day_of_week']/7.0)
X['Month_sin'] = cp.sin(2 * cp.pi * X['Month']/12.0)
X['Month_cos'] = cp.cos(2 * cp.pi * X['Month']/12.0)
X['Hour_sin'] = cp.sin(2 * cp.pi * X['Hour']/23.0)
X['Hour_cos'] = cp.cos(2 * cp.pi * X['Hour']/23.0)
X['Day_sin'] = cp.sin(2 * cp.pi * X['Day']/31.0)
X['Day_cos'] = cp.cos(2 * cp.pi * X['Day']/31.0)



# seasonal_dict = {k:X[(X.TestSet == 0) & (X.Month == k)].groupby(['Day_of_week', 'Hour']).Seasonal.mean().reset_index() for k in  X.Month.value_counts().index.values}
# print(seasonal_dict)

seasonal_cparray = cp.ndarray((12, 7, 23))

# cp.cuda.Stream.null.synchronize()
# end = perf_counter()
# print(f'Difference: {end - start}')

In [35]:
start_rstl = perf_counter()
y_diff_np = cp.asnumpy(y_diff.values)
stl_decomp = rstl.STL(y_diff_np, freq=24*7, robust=False, s_window='periodic')
end_rstl = perf_counter()
print(end_rstl - start_rstl)

1.7162403389811516


In [36]:
trend, seasonal, residual, weights = cp.array(stl_decomp.trend), cp.array(stl_decomp.seasonal), cp.array(stl_decomp.remainder), cp.array(stl_decomp.weights)

In [19]:
residual_trend = residual + trend

In [20]:
X.shape

(52584, 203)

In [21]:
y.shape

(52584,)

In [30]:
X[f'Seasonal'] = cf.Series((cp.concatenate([cp.array([cp.nan]*n_diff), seasonal])), index=X.index)
X[f'Residual'] = cf.Series((cp.concatenate([cp.array([cp.nan]*n_diff), residual_trend])), index=X.index)
X[f'Trend'] = cf.Series((cp.concatenate([cp.array([cp.nan]*n_diff),trend])), index=X.index)

Difference RSTL: 237.84162295609713
