In [2]:
import numpy as np
import pandas as pd
import scipy.sparse
import xgboost as xgb
import mix_pandas as mix
import predict as predict_mix
import db_column_name as db
import processing 

%matplotlib inline
import matplotlib.pyplot as plt
%config InlineBackend.figure_format = 'svg'

from pylab import rcParams
rcParams['figure.figsize'] = 10, 7


cn = db.ColumnName()

target_minT = pd.read_csv('./data/31286_103.csv')
mix.set_index_date(target_minT, cn.date)

X = pd.read_csv('./data/character_31286.csv')
mix.set_index_date(X, cn.date)

X = X.drop([cn.point], axis=1)
X = X[[x for x in X.columns if 'avg' in x or 
       x == cn.offset]]
    
X = mix.mean_day(X)
target_minT.index = target_minT.index.round('D')

X = X.drop([cn.offset], axis=1)

target_minT = target_minT.reindex(X.index)
target_minT = mix.clean(target_minT)
X = X.reindex(target_minT.index)
X = mix.clean(X)

target_minT = mix.winsorized(target_minT, cn.value, [0.05, 0.95])
X = X.reindex(target_minT.index)
print(X.shape)

(1517, 54)


In [3]:
X = X[X.index.year < 2016]
target_minT = target_minT[target_minT.index.year < 2016]
print(X.shape)

(818, 54)


In [9]:
from scipy.fftpack import ifft, idct

fft_X, fft_target = processing.fft_target(X, target_minT, cn.value)        
        
        
params = {
    'verbosity':0,
    'max_depth': 4,

    'learning_rate': 0.09,
    'min_child_weight': 6,
}
reg = xgb.XGBRegressor(**params)

predict_am = predict_mix.predict_model_split(reg, fft_X, fft_target, 'amplitude', 5)
predict_ph = predict_mix.predict_model_split(reg, fft_X, fft_target, 'phase', 5)

for am, ph in zip(predict_am, predict_ph):
    real_target_train = target_minT.loc[am[0][cn.date]]
    real_target_test = target_minT.loc[am[1][cn.date]]
    
    Y = processing.reshapeToComplex(am[0].loc[:, 'prediction'], ph[0].loc[:, 'prediction'])
    real_target_train['FFT prediction'] = ifft(Y).real

    Y = processing.reshapeToComplex(am[1].loc[:, 'prediction'], ph[1].loc[:, 'prediction'])
    real_target_test['FFT prediction'] = ifft(Y).real
    
    print("Train")
    predict_mix.print_mean(real_target_train[[cn.value]], real_target_train[['FFT prediction']])
    print("Test")
    predict_mix.print_mean(real_target_test[[cn.value]], real_target_test[['FFT prediction']])
    print()

Train
Mean squared error 0.06379
Mean absolute error 0.17832
Median absolute error 0.12835
Test
Mean squared error 25.86236
Mean absolute error 4.37130
Median absolute error 4.41377

Train
Mean squared error 0.26642
Mean absolute error 0.39867
Median absolute error 0.30933
Test
Mean squared error 13.80567
Mean absolute error 2.89722
Median absolute error 2.47564

Train
Mean squared error 0.58564
Mean absolute error 0.57988
Median absolute error 0.43897
Test
Mean squared error 18.97124
Mean absolute error 3.38303
Median absolute error 2.79444

Train
Mean squared error 1.11655
Mean absolute error 0.80186
Median absolute error 0.62111
Test
Mean squared error 14.97420
Mean absolute error 2.70558
Median absolute error 1.85231

Train
Mean squared error 1.67610
Mean absolute error 0.97274
Median absolute error 0.72917
Test
Mean squared error 16.32108
Mean absolute error 3.08658
Median absolute error 2.59601



In [15]:
dct_X, dct_target = processing.dct_target(X, target_minT, cn.value, {'norm':'ortho'})  


params = {
    'max_depth': 4,
    'learning_rate': 0.09,
    'min_child_weight': 6,
#     'subsample':0.8, 
#     'colsample_bytree':0.8,
#     'gamma': 12,
}

reg = xgb.XGBRegressor(**params)

predict = predict_mix.predict_model_split(reg, dct_X, dct_target, 'dct', 5)

for train, test in predict:
    target_train = target_minT.loc[train[cn.date]]
    target_test = target_minT.loc[test[cn.date]]
    
    Y = train[['prediction']].values
    target_train['DCT prediction'] = idct(Y, norm='ortho')

    Y = test[['prediction']].values
    target_test['DCT prediction'] = idct(Y, norm='ortho')
    
    print("Train")
    predict_mix.print_mean(target_train[[cn.value]], target_train[['DCT prediction']])
    print("Test")
    predict_mix.print_mean(target_test[[cn.value]], target_test[['DCT prediction']])
    print()

Train
Mean squared error 0.06475
Mean absolute error 0.17612
Median absolute error 0.12000
Test
Mean squared error 30.58746
Mean absolute error 4.77668
Median absolute error 4.75751

Train
Mean squared error 0.18376
Mean absolute error 0.32962
Median absolute error 0.24240
Test
Mean squared error 15.41314
Mean absolute error 2.96888
Median absolute error 2.10893

Train
Mean squared error 0.53732
Mean absolute error 0.53100
Median absolute error 0.38457
Test
Mean squared error 15.91568
Mean absolute error 2.95551
Median absolute error 2.27141

Train
Mean squared error 0.90543
Mean absolute error 0.71700
Median absolute error 0.56362
Test
Mean squared error 17.55546
Mean absolute error 2.95918
Median absolute error 2.05105

Train
Mean squared error 1.40607
Mean absolute error 0.89367
Median absolute error 0.68252
Test
Mean squared error 16.07702
Mean absolute error 3.10195
Median absolute error 2.62836

