In [2]:
import pandas as pd
import numpy as np
from sklearn.datasets import load_boston
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import r2_score

In [3]:
boston = load_boston()
X = boston["data"]
y = boston["target"]
names = boston["feature_names"]

In [4]:
X = pd.DataFrame(X, columns=names)
X.head()

Unnamed: 0,CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,B,LSTAT
0,0.00632,18.0,2.31,0.0,0.538,6.575,65.2,4.09,1.0,296.0,15.3,396.9,4.98
1,0.02731,0.0,7.07,0.0,0.469,6.421,78.9,4.9671,2.0,242.0,17.8,396.9,9.14
2,0.02729,0.0,7.07,0.0,0.469,7.185,61.1,4.9671,2.0,242.0,17.8,392.83,4.03
3,0.03237,0.0,2.18,0.0,0.458,6.998,45.8,6.0622,3.0,222.0,18.7,394.63,2.94
4,0.06905,0.0,2.18,0.0,0.458,7.147,54.2,6.0622,3.0,222.0,18.7,396.9,5.33


In [5]:
sc = StandardScaler()
X = sc.fit_transform(X)

In [7]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3)

In [15]:
def gridsearch(  model, cv, params, model_name, X = X_train, y = y_train):
    grid = GridSearchCV(model, params, scoring = 'r2', cv = cv, n_jobs= -1)
    grid.fit(X, y)
    print('Обучение модели {} завершено'.format(model_name))
    return(grid)

In [16]:
cv = 10

In [17]:
from sklearn.tree import DecisionTreeRegressor

tr = DecisionTreeRegressor()
params = {'criterion':['mse','friedman_mse'],
          'max_depth':[4,5,6,7,8,9,10,11,12,15,20,30,40,50,70,90,120,150], 
          'min_samples_leaf':[1, 2, 3],
          'min_samples_split':[2,3,4]}
grid_tr = gridsearch(tr, cv, params, 'DecisionTreeRegressor')

Обучение модели DecisionTreeRegressor завершено


In [18]:
from sklearn.neural_network import MLPRegressor

mlp = MLPRegressor()
params = {"hidden_layer_sizes": [1,50],
          "activation": ["identity", "logistic", "tanh"],
          "solver": ["lbfgs", "sgd", "adam"], 
          "alpha": [0.00005,0.0005]}
grid_mlp= gridsearch(mlp, cv, params, 'MLPRegressor')

Обучение модели MLPRegressor завершено




In [19]:
from sklearn.ensemble import RandomForestRegressor

rfr = RandomForestRegressor()
params = {'n_estimators': [500, 700, 1000], 'max_depth': [None, 1, 2, 3], 'min_samples_split': [2, 3]}
grid_rfr= gridsearch(rfr, cv, params, 'RandomForestRegressor')

Обучение модели RandomForestRegressor завершено


In [20]:
estimators = {
    'DecisionTree': grid_tr,
    'MLP': grid_mlp,
    'RandomForest': grid_rfr
}

In [21]:
for i in estimators:
    model = estimators[i]
    print('Модель - {}, CV best score - {}, Validation best score - {}'.format(i, model.best_score_, r2_score(model.best_estimator_.predict(X_test), y_test)))

Модель - DecisionTree, CV best score - 0.8311124164237503, Validation best score - 0.8303718385406441
Модель - MLP, CV best score - 0.80427371604966, Validation best score - 0.8074528869735631
Модель - RandomForest, CV best score - 0.8688344097933933, Validation best score - 0.8793204914994022
