### Predicting House Prices Using Linear Regression Model

In [1]:
# Importing dependencies
import pandas as pd
import numpy as np
from sklearn import linear_model
from sklearn.model_selection import train_test_split

In [2]:
# Here we will import the dataset
from sklearn.datasets import load_boston
boston = load_boston()

In [3]:
# Defining our dataframe, target and features
X = pd.DataFrame(boston.data)
y = pd.DataFrame(boston.target)

In [4]:
# We use the describe method to produce statistics about the X data
# X.describe()
print(boston.data)

[[6.3200e-03 1.8000e+01 2.3100e+00 ... 1.5300e+01 3.9690e+02 4.9800e+00]
 [2.7310e-02 0.0000e+00 7.0700e+00 ... 1.7800e+01 3.9690e+02 9.1400e+00]
 [2.7290e-02 0.0000e+00 7.0700e+00 ... 1.7800e+01 3.9283e+02 4.0300e+00]
 ...
 [6.0760e-02 0.0000e+00 1.1930e+01 ... 2.1000e+01 3.9690e+02 5.6400e+00]
 [1.0959e-01 0.0000e+00 1.1930e+01 ... 2.1000e+01 3.9345e+02 6.4800e+00]
 [4.7410e-02 0.0000e+00 1.1930e+01 ... 2.1000e+01 3.9690e+02 7.8800e+00]]


In [5]:
# Initialize the linear regression model
# We will split the data into a train/test split
lm = linear_model.LinearRegression()
X_train, X_test,  y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=1)

In [6]:
# Fitting the model with our training data
lm.fit(X_train, y_train)

LinearRegression()

In [7]:
# Printing the coefficients of the columns/features of the model
print(lm.coef_)

[[-9.91995332e-02  6.27806786e-02  7.25812126e-02  3.01077411e+00
  -2.06617880e+01  3.44496595e+00  3.78004138e-03 -1.44827470e+00
   3.09305140e-01 -1.16485640e-02 -9.46002716e-01  7.20017685e-03
  -5.28368116e-01]]


In [8]:
# Model predictions based on testing set
predictions = lm.predict(X_test)
print(predictions[:4])
print(y_test[:4])

[[32.45826885]
 [28.03728153]
 [17.93647207]
 [21.2832492 ]]
        0
307  28.2
343  23.9
47   16.6
67   22.0


In [9]:
# Checking the model performance/accuracy using the Mean Squared Error (MSE)
print(np.mean((predictions - y_test)** 2))

0    20.698476
dtype: float64


In [10]:
# We can see that our model's accuracy is between 4-5 squared variance from the actual house prices
from sklearn.metrics import mean_squared_error
print(mean_squared_error(y_test, predictions))

20.698475744484373


We can see that our model has relatively good accuracy, but can still be further tuned to get closer results.