In [1]:
import numpy as np
import pandas as pd
import scipy.sparse
import xgboost as xgb
import mix_pandas as mix
import predict as predict_mix
import db_column_name as db
import processing 

%matplotlib inline
import matplotlib.pyplot as plt
%config InlineBackend.figure_format = 'svg'

from pylab import rcParams
rcParams['figure.figsize'] = 10, 7


cn = db.ColumnName()

target_minT = pd.read_csv('./data/31286_103.csv')
mix.set_index_date(target_minT, cn.date)

X = pd.read_csv('./data/character_31286.csv')
mix.set_index_date(X, cn.date)

X = X.drop([cn.point], axis=1)
X = X[[x for x in X.columns if 'avg' in x or 
       x == cn.offset]]
    
X = mix.mean_day(X)
target_minT.index = target_minT.index.round('D')

X = X.drop([cn.offset], axis=1)

target_minT = target_minT.reindex(X.index)
target_minT = mix.clean(target_minT)
X = X.reindex(target_minT.index)
X = mix.clean(X)

target_minT = mix.winsorized(target_minT, cn.value, [0.05, 0.95])
X = X.reindex(target_minT.index)
print(X.shape)

(1517, 54)


In [6]:
from scipy.fftpack import ifft, idct

fft_X, fft_target = processing.fft_target(X, target_minT, cn.value)        
        
        
params = {
    'max_depth': 4,
    'learning_rate': 0.09,
    'min_child_weight': 6,
}
reg = xgb.XGBRegressor(**params)

predict_am = predict_mix.predict_model_split(reg, fft_X, fft_target, 'amplitude', 5)
predict_ph = predict_mix.predict_model_split(reg, fft_X, fft_target, 'phase', 5)

for am, ph in zip(predict_am, predict_ph):
    real_target_train = target_minT.loc[am[0][cn.date]]
    real_target_test = target_minT.loc[am[1][cn.date]]
    
    Y = processing.reshapeToComplex(am[0].loc[:, 'prediction'], ph[0].loc[:, 'prediction'])
    real_target_train['FFT prediction'] = ifft(Y).real

    Y = processing.reshapeToComplex(am[1].loc[:, 'prediction'], ph[1].loc[:, 'prediction'])
    real_target_test['FFT prediction'] = ifft(Y).real
    
    print("Train")
    predict_mix.print_mean(real_target_train[[cn.value]], real_target_train[['FFT prediction']])
    print("Test")
    predict_mix.print_mean(real_target_test[[cn.value]], real_target_test[['FFT prediction']])
    print()

Train
Mean squared error 0.19474
Mean absolute error 0.33534
Median absolute error 0.26243
Test
Mean squared error 18.70815
Mean absolute error 3.37402
Median absolute error 2.80144

Train
Mean squared error 0.83957
Mean absolute error 0.68834
Median absolute error 0.53495
Test
Mean squared error 15.52405
Mean absolute error 2.93629
Median absolute error 2.17148

Train
Mean squared error 1.87110
Mean absolute error 1.03980
Median absolute error 0.80161
Test
Mean squared error 16.71934
Mean absolute error 3.13794
Median absolute error 2.37399

Train
Mean squared error 2.64028
Mean absolute error 1.21976
Median absolute error 0.93519
Test
Mean squared error 53.25402
Mean absolute error 5.82082
Median absolute error 5.08427

Train
Mean squared error 3.09792
Mean absolute error 1.32024
Median absolute error 0.99616
Test
Mean squared error 15.19491
Mean absolute error 3.18005
Median absolute error 2.88537



In [5]:
dct_X, dct_target = processing.dct_target(X, target_minT, cn.value, {'norm':'ortho'})  


params = {
    'max_depth': 4,
    'learning_rate': 0.09,
    'min_child_weight': 6,
#     'subsample':0.8, 
#     'colsample_bytree':0.8,
#     'gamma': 12,
}

reg = xgb.XGBRegressor(**params)

predict = predict_mix.predict_model_split(reg, dct_X, dct_target, 'dct', 5)

for train, test in predict:
    target_train = target_minT.loc[train[cn.date]]
    target_test = target_minT.loc[test[cn.date]]
    
    Y = train[['prediction']].values
    target_train['DCT prediction'] = idct(Y, norm='ortho')

    Y = test[['prediction']].values
    target_test['DCT prediction'] = idct(Y, norm='ortho')
    
    print("Train")
    predict_mix.print_mean(target_train[[cn.value]], target_train[['DCT prediction']])
    print("Test")
    predict_mix.print_mean(target_test[[cn.value]], target_test[['DCT prediction']])
    print()

Train
Mean squared error 0.15264
Mean absolute error 0.29514
Median absolute error 0.23255
Test
Mean squared error 22.69288
Mean absolute error 3.53431
Median absolute error 2.60842

Train
Mean squared error 0.75297
Mean absolute error 0.63779
Median absolute error 0.49397
Test
Mean squared error 16.25762
Mean absolute error 2.97990
Median absolute error 2.23345

Train
Mean squared error 1.64621
Mean absolute error 0.97590
Median absolute error 0.73902
Test
Mean squared error 37.57456
Mean absolute error 4.08030
Median absolute error 2.64294

Train
Mean squared error 2.35100
Mean absolute error 1.15479
Median absolute error 0.87296
Test
Mean squared error 47.59718
Mean absolute error 5.50604
Median absolute error 4.74099

Train
Mean squared error 3.12799
Mean absolute error 1.33681
Median absolute error 1.04939
Test
Mean squared error 13.93264
Mean absolute error 3.06504
Median absolute error 2.66055

