In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

In [None]:
from sklearn.datasets import load_boston
boston_dataset = load_boston()
print(boston_dataset.keys())
#importing and loading data

In [None]:
print(boston_dataset.DESCR)
#description

In [None]:
boston = pd.DataFrame(boston_dataset.data, columns=boston_dataset.feature_names)
#loading data
boston.head()

In [None]:
boston.isnull().sum()
#check for null values (data processing)

In [None]:
#The prices of the house indicated by the variable MEDV is our target variable and the remaining are the feature variables
#We create a new column of target values and add medv to the dataframe.
boston['MEDV'] = boston_dataset.target
#data analysis
sns.set(rc={'figure.figsize':(11.7, 8.27)})
sns.distplot(boston['MEDV'], bins=40)
plt.show()

In [None]:
correlation_matrix = boston.corr().round(3)
sns.heatmap(data=correlation_matrix, annot=True)

In [None]:
plt.figure(figsize=(20, 5))

features = ['LSTAT', 'RM']
target = boston['MEDV']

for i, col in enumerate(features):
    plt.subplot(1, 2 , i+1)
    x = boston[col]
    y = target
    plt.scatter(x, y, marker='o')
    plt.title(col)
    plt.xlabel(col)
    plt.ylabel('MEDV')

In [None]:
#data training model
X = pd.DataFrame(np.c_[boston['LSTAT'], boston['RM']], columns = ['LSTAT','RM'])
Y = boston['MEDV']

In [None]:
#Splitting the data into training and testing sets
from sklearn.model_selection import train_test_split

X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size = 0.2, random_state=5)
print(X_train.shape)
print(X_test.shape)
print(Y_train.shape)
print(Y_test.shape)


In [None]:
#Training and testing the model
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error

lin_model = LinearRegression()
lin_model.fit(X_train, Y_train)

In [None]:
# model evaluation for training set
from sklearn.metrics import r2_score
y_train_predict = lin_model.predict(X_train)
rmse = (np.sqrt(mean_squared_error(Y_train, y_train_predict)))
r2 = r2_score(Y_train, y_train_predict)
print("____________________________________________")
print("|The model performance for this training set ")
print("|____________________________________________")
print("|RMSE is ",rmse,)
print('|R2 score is ',r2)
print("|____________________________________________")
print("\n")

# model evaluation for testing set
y_test_predict = lin_model.predict(X_test)
rmse = (np.sqrt(mean_squared_error(Y_test, y_test_predict)))
r2 = r2_score(Y_test, y_test_predict)
print("____________________________________________")
print("|The model performance for this  testing set")
print("|____________________________________________")
print('|RMSE is',(rmse))
print('|R2 score is ',(r2))
print("|____________________________________________")