# 数据准备

## 数据导入

In [29]:
from ngboost import NGBRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

import pandas as pd
data = pd.read_csv('/Users/apple/Documents/ML_Project/ML - 2.1/Data/相近8个地点2012年数据/20739-2012.csv',
                  skiprows=[0,1,2])

data.describe()

Unnamed: 0,Year,Month,Day,Hour,Minute,power (MW),wind direction at 100m (deg),wind speed at 100m (m/s),air temperature at 2m (K),surface air pressure (Pa),density at hub height (kg/m^3)
count,105408.0,105408.0,105408.0,105408.0,105408.0,105408.0,105408.0,105408.0,105408.0,105408.0,105408.0
mean,2012.0,6.513661,15.756831,11.5,27.5,4.125135,193.870912,7.899902,287.464005,84606.872893,1.011686
std,0.0,3.45125,8.811571,6.922219,17.260345,3.694119,88.377513,4.1005,9.776362,468.523564,0.033038
min,2012.0,1.0,1.0,0.0,0.0,0.0,0.022,0.039,264.279,83157.784,0.939
25%,2012.0,4.0,8.0,5.75,13.75,0.647,122.95975,4.797,279.383,84290.328,0.986
50%,2012.0,7.0,16.0,11.5,27.5,2.9855,212.3705,7.562,287.852,84634.568,1.008
75%,2012.0,10.0,23.0,17.25,41.25,7.813,267.541,10.54625,295.039,84930.296,1.036
max,2012.0,12.0,31.0,23.0,55.0,10.0,359.989,33.057,309.62,86287.16,1.102


## 数据转换

In [28]:
X = data.drop('power (MW)', axis=1)
Y = data['power (MW)']
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=520)

from sklearn.preprocessing import MinMaxScaler
X_Scaler = MinMaxScaler()
X_train = X_Scaler.fit_transform(X_train)
X_test = X_Scaler.transform(X_test)

Y_Scaler = MinMaxScaler()
Y_train = Y_Scaler.fit_transform(Y_train.values.reshape(-1,1)).reshape(len(Y_train),)
Y_test = Y_Scaler.transform(Y_test.values.reshape(-1,1)).reshape(len(Y_test),)

# 模型预测

In [30]:
from ngboost import NGBRegressor
def model_test(Base, X_train, X_test, Y_train, Y_test, 
               n_estimators=500, verbose_eval=100):
    ngb = NGBRegressor(Base=Base, n_estimators=n_estimators,verbose_eval=verbose_eval)
    print(ngb,'\n')
    ngb.fit(X_train, Y_train)

    Y_preds = ngb.predict(X_test)
    Y_dists = ngb.pred_dist(X_test) # return norm method: mean std

    # test Mean Squared Error
    test_MSE = mean_squared_error(Y_preds, Y_test)
    print('\nTest MSE', test_MSE)

    # test Negative Log Likelihood
    test_NLL = -Y_dists.logpdf(Y_test).mean()
    print('Test NLL', test_NLL)

## default_linear_learner

In [31]:
from ngboost.learners import default_linear_learner
model_test(Base=default_linear_learner,
           X_train=X_train, X_test=X_test,
           Y_train=Y_train, Y_test=Y_test,
          n_estimators=500, verbose_eval=100)

NGBRegressor(Base=<function default_linear_learner at 0x1a1ea9b488>,
             Dist=<class 'ngboost.distns.normal.Normal'>,
             Score=<class 'ngboost.scores.MLE'>, learning_rate=0.01,
             minibatch_frac=1.0, n_estimators=500, natural_gradient=True,
             tol=0.0001, verbose=True, verbose_eval=100) 

[iter 0] loss=0.4233 val_loss=0.0000 scale=1.0000 norm=0.5075
[iter 100] loss=-0.1296 val_loss=0.0000 scale=1.0000 norm=0.3964
[iter 200] loss=-0.4713 val_loss=0.0000 scale=1.0000 norm=0.3808
[iter 300] loss=-0.6817 val_loss=0.0000 scale=1.0000 norm=0.3446
[iter 400] loss=-0.7670 val_loss=0.0000 scale=1.0000 norm=0.3483

Test MSE 0.015955682176653666
Test NLL -0.7861904540301601


## default_tree_learner

In [32]:
from ngboost.learners import default_tree_learner
model_test(Base=default_tree_learner,
           X_train=X_train, X_test=X_test,
           Y_train=Y_train, Y_test=Y_test,
          n_estimators=500, verbose_eval=100)

NGBRegressor(Base=<function default_linear_learner at 0x1a1ea9b488>,
             Dist=<class 'ngboost.distns.normal.Normal'>,
             Score=<class 'ngboost.scores.MLE'>, learning_rate=0.01,
             minibatch_frac=1.0, n_estimators=500, natural_gradient=True,
             tol=0.0001, verbose=True, verbose_eval=100) 

[iter 0] loss=0.4233 val_loss=0.0000 scale=1.0000 norm=0.5075
[iter 100] loss=-0.1296 val_loss=0.0000 scale=1.0000 norm=0.3964
[iter 200] loss=-0.4713 val_loss=0.0000 scale=1.0000 norm=0.3808
[iter 300] loss=-0.6817 val_loss=0.0000 scale=1.0000 norm=0.3446
[iter 400] loss=-0.7670 val_loss=0.0000 scale=1.0000 norm=0.3483

Test MSE 0.015955682176653666
Test NLL -0.7861904540301601


## esn_ridge_learner

In [19]:
from ngboost.learners import esn_ridge_learner
model_test(Base=default_tree_learner,
           X_train=X_train[:8000], X_test=X_test,
           Y_train=Y_train[:8000], Y_test=Y_test,
          n_estimators=500, verbose_eval=10)

NGBRegressor(Base=<function esn_ridge_learner at 0x1a1fceb7b8>,
             Dist=<class 'ngboost.distns.normal.Normal'>,
             Score=<class 'ngboost.scores.MLE'>, learning_rate=0.01,
             minibatch_frac=1.0, n_estimators=500, natural_gradient=True,
             tol=0.0001, verbose=True, verbose_eval=1)
[iter 0] loss=2.7261 val_loss=0.0000 scale=1.0000 norm=3.3491
[iter 1] loss=2.7167 val_loss=0.0000 scale=1.0000 norm=3.3216
[iter 2] loss=2.7074 val_loss=0.0000 scale=1.0000 norm=3.2942
[iter 3] loss=2.6984 val_loss=0.0000 scale=1.0000 norm=3.2673
[iter 4] loss=2.6896 val_loss=0.0000 scale=1.0000 norm=3.2403
[iter 5] loss=2.6810 val_loss=0.0000 scale=1.0000 norm=3.2139
[iter 6] loss=2.6727 val_loss=0.0000 scale=1.0000 norm=3.1880
[iter 7] loss=2.6644 val_loss=0.0000 scale=1.0000 norm=3.1619
[iter 8] loss=2.6564 val_loss=0.0000 scale=1.0000 norm=3.1364
[iter 9] loss=2.6485 val_loss=0.0000 scale=1.0000 norm=3.1110
[iter 10] loss=2.6407 val_loss=0.0000 scale=1.0000 norm=3.08

[iter 125] loss=2.0796 val_loss=0.0000 scale=1.0000 norm=1.4725
[iter 126] loss=2.0759 val_loss=0.0000 scale=1.0000 norm=1.4665
[iter 127] loss=2.0721 val_loss=0.0000 scale=1.0000 norm=1.4605
[iter 128] loss=2.0684 val_loss=0.0000 scale=1.0000 norm=1.4546
[iter 129] loss=2.0647 val_loss=0.0000 scale=1.0000 norm=1.4489
[iter 130] loss=2.0610 val_loss=0.0000 scale=1.0000 norm=1.4433
[iter 131] loss=2.0573 val_loss=0.0000 scale=1.0000 norm=1.4379
[iter 132] loss=2.0537 val_loss=0.0000 scale=1.0000 norm=1.4327
[iter 133] loss=2.0501 val_loss=0.0000 scale=1.0000 norm=1.4274
[iter 134] loss=2.0464 val_loss=0.0000 scale=1.0000 norm=1.4219
[iter 135] loss=2.0428 val_loss=0.0000 scale=1.0000 norm=1.4166
[iter 136] loss=2.0391 val_loss=0.0000 scale=1.0000 norm=1.4114
[iter 137] loss=2.0355 val_loss=0.0000 scale=1.0000 norm=1.4063
[iter 138] loss=2.0319 val_loss=0.0000 scale=1.0000 norm=1.4013
[iter 139] loss=2.0283 val_loss=0.0000 scale=1.0000 norm=1.3964
[iter 140] loss=2.0247 val_loss=0.0000 s

[iter 254] loss=1.6981 val_loss=0.0000 scale=1.0000 norm=1.1288
[iter 255] loss=1.6960 val_loss=0.0000 scale=1.0000 norm=1.1279
[iter 256] loss=1.6939 val_loss=0.0000 scale=1.0000 norm=1.1269
[iter 257] loss=1.6918 val_loss=0.0000 scale=1.0000 norm=1.1260
[iter 258] loss=1.6898 val_loss=0.0000 scale=1.0000 norm=1.1251
[iter 259] loss=1.6878 val_loss=0.0000 scale=1.0000 norm=1.1242
[iter 260] loss=1.6858 val_loss=0.0000 scale=1.0000 norm=1.1234
[iter 261] loss=1.6838 val_loss=0.0000 scale=1.0000 norm=1.1225
[iter 262] loss=1.6818 val_loss=0.0000 scale=1.0000 norm=1.1217
[iter 263] loss=1.6798 val_loss=0.0000 scale=1.0000 norm=1.1208
[iter 264] loss=1.6778 val_loss=0.0000 scale=1.0000 norm=1.1200
[iter 265] loss=1.6759 val_loss=0.0000 scale=1.0000 norm=1.1191
[iter 266] loss=1.6739 val_loss=0.0000 scale=1.0000 norm=1.1184
[iter 267] loss=1.6720 val_loss=0.0000 scale=1.0000 norm=1.1175
[iter 268] loss=1.6701 val_loss=0.0000 scale=1.0000 norm=1.1167
[iter 269] loss=1.6682 val_loss=0.0000 s

[iter 383] loss=1.5298 val_loss=0.0000 scale=1.0000 norm=1.0717
[iter 384] loss=1.5291 val_loss=0.0000 scale=1.0000 norm=1.0716
[iter 385] loss=1.5284 val_loss=0.0000 scale=1.0000 norm=1.0714
[iter 386] loss=1.5278 val_loss=0.0000 scale=1.0000 norm=1.0713
[iter 387] loss=1.5271 val_loss=0.0000 scale=1.0000 norm=1.0712
[iter 388] loss=1.5264 val_loss=0.0000 scale=1.0000 norm=1.0710
[iter 389] loss=1.5258 val_loss=0.0000 scale=1.0000 norm=1.0709
[iter 390] loss=1.5252 val_loss=0.0000 scale=1.0000 norm=1.0707
[iter 391] loss=1.5246 val_loss=0.0000 scale=1.0000 norm=1.0706
[iter 392] loss=1.5240 val_loss=0.0000 scale=1.0000 norm=1.0705
[iter 393] loss=1.5233 val_loss=0.0000 scale=1.0000 norm=1.0704
[iter 394] loss=1.5227 val_loss=0.0000 scale=1.0000 norm=1.0703
[iter 395] loss=1.5221 val_loss=0.0000 scale=1.0000 norm=1.0702
[iter 396] loss=1.5216 val_loss=0.0000 scale=1.0000 norm=1.0701
[iter 397] loss=1.5210 val_loss=0.0000 scale=1.0000 norm=1.0700
[iter 398] loss=1.5204 val_loss=0.0000 s

KeyboardInterrupt: 