In [2]:
import numpy as np

In [3]:
import PreProcess as pp
# ボストンハウジングデータの読込み

data = np.loadtxt('./housing.data', dtype=str)

x = pp.scaling(data, [0,1,2,4,5,6,7,9,10,11,12])
x = np.hstack((x, pp.to_dummy(data, 3, ['0','1'])))
x = np.hstack((x, pp.to_dummy(data, 8, ['1','2','3','4','5','6','7','8','24'])))
y = data[:, -1].astype(float)

In [4]:
from sklearn.model_selection import ShuffleSplit

ss = ShuffleSplit(n_splits=2, train_size=0.7, random_state=0)
train_idx, test_idx = ss.split(x, y)
x_train, x_test, y_train, y_test = x[train_idx[0]], x[train_idx[1]], y[train_idx[0]], y[train_idx[1]]

In [5]:
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import Ridge
from sklearn.linear_model import Lasso
from sklearn.linear_model import SGDRegressor
from sklearn.svm import SVR
from sklearn.ensemble import BaggingRegressor
from sklearn import tree

estimators = [
    ('LNR', LinearRegression()),
    ('RDG', Ridge()),
    ('LAS', Lasso()),
    ('SVR', SVR(kernel='linear')),
    ('RBF', SVR(kernel='rbf')),
    ('BAG', BaggingRegressor(tree.DecisionTreeRegressor())),
    ('SGD', SGDRegressor(max_iter=1000))
]

In [6]:
grid_params = {
    'LNR':{},
    'RDG':{},
    'LAS':{},
    'SVR':{},
    'RBF':{},
    'BAG':{},
    'SGD':{}
}

In [7]:
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score
import MLUtils as ut

scaler = None#ut.scaler(0)
reductor = None#ut.reductor(0)

scores = {}
for name, pipeline in ut.create_pipelines(estimators, scaler, reductor).items():
    #学習
    est = GridSearchCV(pipeline, ut.get_params(pipeline, grid_params[name]), cv=3, scoring='r2', return_train_score=False, n_jobs=-1)
    est.fit(x_train, y_train)
    #スコア（訓練）
    train_pred = est.predict(x_train)
    scores[(name, 'train_mse')] = mean_squared_error(y_train, train_pred)
    scores[(name, 'train_r2')] = r2_score(y_train, train_pred)
    #スコア（テスト）
    test_pred = est.predict(x_test)
    scores[(name, 'test_mse')] = mean_squared_error(y_test, test_pred)
    scores[(name, 'test_r2')] = r2_score(y_test, test_pred)

#スコア表示
for k, v in scores.items():
    print(k, v)

('LNR', 'train_mse') 19.216310128285837
('LNR', 'train_r2') 0.7732977003979536
('LNR', 'test_mse') 26.730412397209232
('LNR', 'test_r2') 0.6789737421956811
('RDG', 'train_mse') 19.224104626445573
('RDG', 'train_r2') 0.773205745665476
('RDG', 'test_mse') 26.850127544828418
('RDG', 'test_r2') 0.6775359901224399
('LAS', 'train_mse') 26.041379268714255
('LAS', 'train_r2') 0.69277969986878
('LAS', 'test_mse') 33.40658128772811
('LAS', 'test_r2') 0.5987944511490286
('SVR', 'train_mse') 21.85746604517524
('SVR', 'train_r2') 0.7421389547298647
('SVR', 'test_mse') 31.5747497261627
('SVR', 'test_r2') 0.6207943373609818
('RBF', 'train_mse') 23.160071102661714
('RBF', 'train_r2') 0.7267716152128593
('RBF', 'test_mse') 33.30782452894624
('RBF', 'test_r2') 0.5999804976728726
('BAG', 'train_mse') 2.040625141242939
('BAG', 'train_r2') 0.9759259499322624
('BAG', 'test_mse') 14.160733552631577
('BAG', 'test_r2') 0.8299327659965324
('SGD', 'train_mse') 19.288463241546452
('SGD', 'train_r2') 0.77244648200



In [21]:
from sklearn.metrics import mean_absolute_error
a = LinearRegression()
a.fit(x_train, y_train)
pred = a.predict(x_train)
print(r2_score(y_train, pred))
print(mean_squared_error(y_train, pred))
print(mean_absolute_error(y_train, pred))

0.7732977003979536
19.216310128285837
3.02984091559643


In [24]:
import Evaluation as ev
print(ev.R2(y_train, pred))
print(ev.MSE(y_train, pred))
print(ev.MAE(y_train, pred))

0.7732977003979536
19.216310128285837
3.02984091559643
