In [2]:
from sklearn.datasets import load_boston
boston = load_boston()

In [3]:
print(boston.DESCR)

.. _boston_dataset:

Boston house prices dataset
---------------------------

**Data Set Characteristics:**  

    :Number of Instances: 506 

    :Number of Attributes: 13 numeric/categorical predictive. Median Value (attribute 14) is usually the target.

    :Attribute Information (in order):
        - CRIM     per capita crime rate by town
        - ZN       proportion of residential land zoned for lots over 25,000 sq.ft.
        - INDUS    proportion of non-retail business acres per town
        - CHAS     Charles River dummy variable (= 1 if tract bounds river; 0 otherwise)
        - NOX      nitric oxides concentration (parts per 10 million)
        - RM       average number of rooms per dwelling
        - AGE      proportion of owner-occupied units built prior to 1940
        - DIS      weighted distances to five Boston employment centres
        - RAD      index of accessibility to radial highways
        - TAX      full-value property-tax rate per $10,000
        - PTRATIO  pu

In [5]:
import pandas as pd 
df = pd.DataFrame(boston.data,columns = boston.feature_names)
df['MEDV'] = boston.target
x = df.RM.to_frame()
y = df.MEDV
print('input',x)
print('output',y)

input         RM
0    6.575
1    6.421
2    7.185
3    6.998
4    7.147
..     ...
501  6.593
502  6.120
503  6.976
504  6.794
505  6.030

[506 rows x 1 columns]
output 0      24.0
1      21.6
2      34.7
3      33.4
4      36.2
       ... 
501    22.4
502    20.6
503    23.9
504    22.0
505    11.9
Name: MEDV, Length: 506, dtype: float64


In [7]:
from sklearn.model_selection import train_test_split
x_train,x_test,y_train,y_test = train_test_split(x,y,test_size = 0.3,random_state = 0)


In [8]:
from sklearn.linear_model import LinearRegression
lr = LinearRegression()
lr.fit(x_train,y_train)
print(lr)
y_pred = lr.predict(x_test)
print(y_pred)

LinearRegression()
[ 2.29742916e+01  2.18848663e+01  2.33467448e+01  1.38119448e+01
  2.20338475e+01  1.83279389e+01  1.89890432e+01  1.95756569e+01
  1.03108855e+01  2.36447073e+01  1.94732323e+01  2.59911619e+01
  2.30487823e+01  7.32194917e+00  3.66526326e+01  2.95667119e+01
  2.21269608e+01  3.18572986e+01  2.49576045e+01  1.84769202e+01
  2.38402452e+01  1.87562600e+01  2.10561581e+01  2.43616795e+01
  1.69126171e+01 -2.46886698e-02  1.95477229e+01  1.71640229e+01
  3.91387572e+01  2.07302616e+01  2.15962151e+01  2.06278370e+01
  2.19779795e+01  2.07581956e+01  2.37471319e+01  1.80392878e+01
  1.97525721e+01  1.87097034e+01  2.31139616e+01  2.13820545e+01
  2.10561581e+01  1.78809952e+01  2.12144506e+01  1.45382284e+01
  2.49017366e+01  2.37936885e+01  1.79182405e+01  2.10747807e+01
  9.65909252e+00  2.66429549e+01  2.12889413e+01  1.68008811e+01
  1.96967042e+01  4.09079095e+01  1.76109667e+01  1.88400620e+01
  1.71360890e+01  1.89238639e+01  1.76109667e+01  1.95011663e+01
  1.87

In [9]:
from sklearn.metrics import mean_squared_error
mse = mean_squared_error(y_test,y_pred)
print(mse)

47.03304747975518
