In [1]:
import numpy as np
import pandas as pd
import scipy.sparse
import xgboost as xgb
import mix_pandas as mix
import predict as predict_mix
import db_column_name as db
import processing 

%matplotlib inline
import matplotlib.pyplot as plt
%config InlineBackend.figure_format = 'svg'

from pylab import rcParams
rcParams['figure.figsize'] = 10, 7


cn = db.ColumnName()

target_minT = pd.read_csv('./data/31286_103.csv')
mix.set_index_date(target_minT, cn.date)

X = pd.read_csv('./data/character_31286.csv')
mix.set_index_date(X, cn.date)

X = X.drop([cn.point], axis=1)
X = X[[x for x in X.columns if 'avg' in x or 
       x == cn.offset]]
    
X = mix.mean_day(X)
target_minT.index = target_minT.index.round('D')

X = X.drop([cn.offset], axis=1)

target_minT = target_minT.reindex(X.index)
target_minT = mix.clean(target_minT)
X = X.reindex(target_minT.index)
X = mix.clean(X)

target_minT = mix.winsorized(target_minT, cn.value, [0.1, 0.80])
X = X.reindex(target_minT.index)
print(X.shape)

(1369, 54)


In [2]:
from scipy.fftpack import ifft, idct

fft_X, fft_target = processing.fft_target(X, target_minT, cn.value)        
        
        
params = {
    'verbosity':0,
    'max_depth': 5,

    'learning_rate': 0.09,
    'min_child_weight': 6,
}
reg = xgb.XGBRegressor(**params)

predict_am = predict_mix.predict_model_split(reg, fft_X, fft_target, 'amplitude', 5)
predict_ph = predict_mix.predict_model_split(reg, fft_X, fft_target, 'phase', 5)

for am, ph in zip(predict_am, predict_ph):
    real_target_train = target_minT.loc[am[0][cn.date]]
    real_target_test = target_minT.loc[am[1][cn.date]]
    
    Y = processing.reshapeToComplex(am[0].loc[:, 'prediction'], ph[0].loc[:, 'prediction'])
    real_target_train['FFT prediction'] = ifft(Y).real

    Y = processing.reshapeToComplex(am[1].loc[:, 'prediction'], ph[1].loc[:, 'prediction'])
    real_target_test['FFT prediction'] = ifft(Y).real
    
    print("Train")
    predict_mix.print_mean(real_target_train[[cn.value]], real_target_train[['FFT prediction']])
    print("Test")
    predict_mix.print_mean(real_target_test[[cn.value]], real_target_test[['FFT prediction']])
    print()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self.obj[key] = _infer_fill_value(value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self.obj[item] = s


Train
Mean squared error 0.02884
Mean absolute error 0.12484
Median absolute error 0.09922
Test
Mean squared error 19.09037
Mean absolute error 3.38650
Median absolute error 2.82537

Train
Mean squared error 0.26374
Mean absolute error 0.38806
Median absolute error 0.31785
Test
Mean squared error 13.96401
Mean absolute error 2.76745
Median absolute error 2.26191

Train
Mean squared error 0.55173
Mean absolute error 0.56865
Median absolute error 0.44053
Test
Mean squared error 22.29600
Mean absolute error 3.51412
Median absolute error 2.62706

Train
Mean squared error 0.82348
Mean absolute error 0.68352
Median absolute error 0.53843
Test
Mean squared error 48.99088
Mean absolute error 5.61466
Median absolute error 4.65372

Train
Mean squared error 1.18063
Mean absolute error 0.81782
Median absolute error 0.64960
Test
Mean squared error 15.52701
Mean absolute error 3.30239
Median absolute error 2.95293



In [8]:
dct_X, dct_target = processing.dct_target(X, target_minT, cn.value, {'norm':'ortho'})  


params = {
    'max_depth': 5,
#     'learning_rate': 0.09,
#     'min_child_weight': 6,
#     'subsample':0.8, 
#     'colsample_bytree':0.8,
#     'gamma': 12,
}

reg = xgb.XGBRegressor(**params)

predict = predict_mix.predict_model_split(reg, dct_X, dct_target, 'dct', 5)

for train, test in predict:
    target_train = target_minT.loc[train[cn.date]]
    target_test = target_minT.loc[test[cn.date]]
    
    Y = train[['prediction']].values
    target_train['DCT prediction'] = idct(Y, norm='ortho')

    Y = test[['prediction']].values
    target_test['DCT prediction'] = idct(Y, norm='ortho')
    
    print("Train")
    predict_mix.print_mean(target_train[[cn.value]], target_train[['DCT prediction']])
    print("Test")
    predict_mix.print_mean(target_test[[cn.value]], target_test[['DCT prediction']])
    print()

Train
Mean squared error 0.00595
Mean absolute error 0.05541
Median absolute error 0.03909
Test
Mean squared error 23.38754
Mean absolute error 3.59568
Median absolute error 2.67965

Train
Mean squared error 0.08603
Mean absolute error 0.21506
Median absolute error 0.15998
Test
Mean squared error 15.53186
Mean absolute error 3.00276
Median absolute error 2.34132

Train
Mean squared error 0.26686
Mean absolute error 0.39235
Median absolute error 0.30060
Test
Mean squared error 28.06473
Mean absolute error 3.78659
Median absolute error 2.67940

Train
Mean squared error 0.50885
Mean absolute error 0.53644
Median absolute error 0.38943
Test
Mean squared error 60.27729
Mean absolute error 6.13861
Median absolute error 4.75010

Train
Mean squared error 0.83035
Mean absolute error 0.69369
Median absolute error 0.54235
Test
Mean squared error 14.88795
Mean absolute error 3.18431
Median absolute error 2.94614

