In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import r2_score 

In [2]:
df = pd.read_csv('./input/04_California Housing.csv', encoding = 'ansi')
df.columns

Index(['MedInc', 'HouseAge', 'AveRooms', 'AveBedrms', 'Population', 'AveOccup',
       'Latitude', 'Longitude', 'Price'],
      dtype='object')

In [3]:
X = df[[
    'MedInc', 'HouseAge', 'AveRooms', 'AveBedrms', 
    'Population', 'AveOccup','Latitude', 'Longitude'
]]
y = df['Price']
(X_train, X_test ,y_train, y_test) = train_test_split(X, y, test_size = 0.3, random_state=1)


In [4]:
model = GradientBoostingRegressor()
param_grid = [{'max_depth': [5, 10],
               'n_estimators': [300, 600],
               'random_state': [12]}]

# HPのすべての組み合わせのモデルのセットを作成する
grid_search = GridSearchCV(model, param_grid, cv=3, scoring='r2')

# 上記モデルをすべて学習する
grid_search.fit(X_train, y_train)

# 最も良かったHPを選ぶ
best_param = grid_search.best_params_

# 最も良かったHPで作成したモデルでテストデータを予測する
print(f'best_param = {best_param}')

y_pred = grid_search.best_estimator_.predict(X_test)

r2 = r2_score(y_test, y_pred)
print(f'R2 {r2}')

best_param = {'max_depth': 5, 'n_estimators': 600, 'random_state': 12}
R2 0.8381145903771181
