In [None]:
import PreProcess as pp
# ボストンハウジングデータの読込み

data = np.loadtxt('./housing.data', dtype=str)

x = pp.scaling(data, [0,1,2,4,5,6,7,9,10,11,12])
x = np.hstack((x, pp.to_dummy(data, 3, ['0','1'])))
x = np.hstack((x, pp.to_dummy(data, 8, ['1','2','3','4','5','6','7','8','24'])))
y = data[:, -1].astype(float)

In [None]:
from sklearn.model_selection import ShuffleSplit

ss = ShuffleSplit(n_splits=2, train_size=0.7, random_state=0)
train_idx, test_idx = ss.split(x, y)
x_train, x_test, y_train, y_test = x[train_idx[0]], x[train_idx[1]], y[train_idx[0]], y[train_idx[1]]

In [None]:
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import Ridge
from sklearn.linear_model import Lasso
from sklearn.linear_model import SGDRegressor
from sklearn.svm import SVR
from sklearn.ensemble import BaggingRegressor
from sklearn import tree

estimators = [
    ('LNR', LinearRegression()),
    ('RDG', Ridge()),
    ('LAS', Lasso()),
    ('SVR', SVR(kernel='linear')),
    ('RBF', SVR(kernel='rbf')),
    ('BAG', BaggingRegressor(tree.DecisionTreeRegressor())),
    ('SGD', SGDRegressor(max_iter=1000))
]

In [None]:
grid_params = {
    'LNR':{},
    'RDG':{},
    'LAS':{},
    'SVR':{},
    'RBF':{},
    'BAG':{},
    'SGD':{}
}

In [None]:
import numpy as np
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score
import MLUtils as ut

scaler = ut.scaler(0)
reductor = ut.reductor()

scores = {}
for name, pipeline in ut.create_pipelines(estimators, scaler, reductor).items():
    #学習
    est = GridSearchCV(pipeline, ut.get_params(pipeline, grid_params[name]), cv=3, scoring='r2', return_train_score=False, n_jobs=-1)
    est.fit(x_train, y_train)
    #スコア（訓練）
    train_pred = est.predict(x_train)
    scores[(name, 'train_mse')] = mean_squared_error(y_train, train_pred)
    scores[(name, 'train_r2')] = r2_score(y_train, train_pred)
    #スコア（テスト）
    test_pred = est.predict(x_test)
    scores[(name, 'test_mse')] = mean_squared_error(y_test, test_pred)
    scores[(name, 'test_r2')] = r2_score(y_test, test_pred)

#スコア表示
for k, v in scores.items():
    print(k, v)