In [2]:
from sklearn.datasets import load_boston
import matplotlib.pyplot as plt
import numpy as np

In [3]:
boston = load_boston()

In [10]:
x_data = boston.data
y_data = boston.target.reshape(boston.target.size)
x_data[:3]

array([[6.3200e-03, 1.8000e+01, 2.3100e+00, 0.0000e+00, 5.3800e-01,
        6.5750e+00, 6.5200e+01, 4.0900e+00, 1.0000e+00, 2.9600e+02,
        1.5300e+01, 3.9690e+02, 4.9800e+00],
       [2.7310e-02, 0.0000e+00, 7.0700e+00, 0.0000e+00, 4.6900e-01,
        6.4210e+00, 7.8900e+01, 4.9671e+00, 2.0000e+00, 2.4200e+02,
        1.7800e+01, 3.9690e+02, 9.1400e+00],
       [2.7290e-02, 0.0000e+00, 7.0700e+00, 0.0000e+00, 4.6900e-01,
        7.1850e+00, 6.1100e+01, 4.9671e+00, 2.0000e+00, 2.4200e+02,
        1.7800e+01, 3.9283e+02, 4.0300e+00]])

In [13]:
print(boston["DESCR"])

.. _boston_dataset:

Boston house prices dataset
---------------------------

**Data Set Characteristics:**  

    :Number of Instances: 506 

    :Number of Attributes: 13 numeric/categorical predictive. Median Value (attribute 14) is usually the target.

    :Attribute Information (in order):
        - CRIM     per capita crime rate by town
        - ZN       proportion of residential land zoned for lots over 25,000 sq.ft.
        - INDUS    proportion of non-retail business acres per town
        - CHAS     Charles River dummy variable (= 1 if tract bounds river; 0 otherwise)
        - NOX      nitric oxides concentration (parts per 10 million)
        - RM       average number of rooms per dwelling
        - AGE      proportion of owner-occupied units built prior to 1940
        - DIS      weighted distances to five Boston employment centres
        - RAD      index of accessibility to radial highways
        - TAX      full-value property-tax rate per $10,000
        - PTRATIO  pu

# 스케일링

In [16]:
from sklearn import preprocessing

minmax_scale = preprocessing.MinMaxScaler().fit(x_data)
x_scaled_data = minmax_scale.transform(x_data)

x_scaled_data

array([[0.00000000e+00, 1.80000000e-01, 6.78152493e-02, ...,
        2.87234043e-01, 1.00000000e+00, 8.96799117e-02],
       [2.35922539e-04, 0.00000000e+00, 2.42302053e-01, ...,
        5.53191489e-01, 1.00000000e+00, 2.04470199e-01],
       [2.35697744e-04, 0.00000000e+00, 2.42302053e-01, ...,
        5.53191489e-01, 9.89737254e-01, 6.34657837e-02],
       ...,
       [6.11892474e-04, 0.00000000e+00, 4.20454545e-01, ...,
        8.93617021e-01, 1.00000000e+00, 1.07891832e-01],
       [1.16072990e-03, 0.00000000e+00, 4.20454545e-01, ...,
        8.93617021e-01, 9.91300620e-01, 1.31070640e-01],
       [4.61841693e-04, 0.00000000e+00, 4.20454545e-01, ...,
        8.93617021e-01, 1.00000000e+00, 1.69701987e-01]])

# train, test data 분리

In [25]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(x_scaled_data, y_data, test_size = 0.3)

In [27]:
len(x_data)

506

# 모델 학습시키기 - LinearRegression

In [28]:
from sklearn import linear_model

regr = linear_model.LinearRegression(fit_intercept=True,
                                    normalize=False,
                                    copy_X=True,
                                    n_jobs=8)
regr.fit(X_train, y_train)
regr

LinearRegression(n_jobs=8)

In [29]:
print('Coefficients: ', regr.coef_)
print('intercept: ', regr.intercept_)

Coefficients:  [ -4.74964588   3.45612614   0.91736238   2.15258516  -8.00720421
  21.5392434   -0.5689352  -14.2519353    6.15197199  -6.32695067
  -8.93873139   3.85088955 -17.91726927]
intercept:  25.12683030301075


# 모델 적용하기

In [30]:
regr.predict(X_test[:10])

array([25.66503547, 15.79276248, 17.4578389 , 19.88175293, 34.18430016,
       19.69256844, 20.43423475, 20.30358636, 28.0630998 , 17.78127899])

예측된 y값

In [31]:
X_test[0].dot(regr.coef_.T) + regr.intercept_

25.665035472779294

# 모델 평가하기 

In [32]:
y_true = y_test
y_pred = regr.predict(X_test)

In [33]:
np.sqrt(((y_true - y_pred) ** 2).sum() / len(y_true))

4.290242567904282

In [34]:
from sklearn.metrics import mean_squared_error
np.sqrt(mean_squared_error(y_true, y_pred))

4.290242567904282