In [6]:
from sklearn.datasets import load_boston

boston = load_boston()
print(boston.DESCR)
print(boston.data.shape)

.. _boston_dataset:

Boston house prices dataset
---------------------------

**Data Set Characteristics:**  

    :Number of Instances: 506 

    :Number of Attributes: 13 numeric/categorical predictive. Median Value (attribute 14) is usually the target.

    :Attribute Information (in order):
        - CRIM     per capita crime rate by town
        - ZN       proportion of residential land zoned for lots over 25,000 sq.ft.
        - INDUS    proportion of non-retail business acres per town
        - CHAS     Charles River dummy variable (= 1 if tract bounds river; 0 otherwise)
        - NOX      nitric oxides concentration (parts per 10 million)
        - RM       average number of rooms per dwelling
        - AGE      proportion of owner-occupied units built prior to 1940
        - DIS      weighted distances to five Boston employment centres
        - RAD      index of accessibility to radial highways
        - TAX      full-value property-tax rate per $10,000
        - PTRATIO  pu

In [7]:
X = boston.data
y = boston.target
X = X[y < 50.0]
y = y[y < 50.0]

In [8]:
from sklearn.model_selection import train_test_split

X_train,X_test,y_train,y_test = train_test_split(X, y, random_state=666)

# sklearn中的多元线性回归

In [9]:
from sklearn.linear_model import LinearRegression

regression = LinearRegression()
regression.fit(X_train, y_train)
regression.score(X_test, y_test)

0.8009390227581041

# sklearn中的kNN回归算法


In [11]:
from sklearn.neighbors import KNeighborsRegressor
from sklearn.model_selection import GridSearchCV

knn_Reg = KNeighborsRegressor()
param_grid = [
    {
        "weights": ["uniform"],
        "n_neighbors": [i for i in range(1, 11)]
    },
    {
        "weights": ["distance"],
        "n_neighbors": [i for i in range(1, 11)],
        "p": [i for i in range(1,6)]
    }
]
cv = GridSearchCV(knn_Reg, param_grid, n_jobs=-1)
cv.fit(X_train,y_train)
cv.score(X_test,y_test)

0.7353138117643773

In [12]:
cv.best_estimator_

KNeighborsRegressor(n_neighbors=6, p=1, weights='distance')