## Importing the data and necessary Libraries

In [None]:
from sklearn.datasets import load_boston
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

In [None]:
data = load_boston()

## Creating and Analysing the dataset

In [None]:
df = pd.DataFrame(data['data'], columns=data['feature_names'])

In [None]:
df['Target'] = data.target

In [None]:
df.head()

In [None]:
df.describe()

In [None]:
df.isnull().sum()

In [None]:
df.shape

In [None]:
df.info()

In [None]:
plt.figure(figsize=(20,20))
sns.heatmap(df.corr(), cbar=True, square= True, fmt='.1f', annot=True, annot_kws={'size':15}, cmap='Greens')

## Preparing the data for modelling

In [None]:
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
%matplotlib inline

In [None]:
X = df.drop('Target',axis=1)
y = df['Target']

In [None]:
print(df.shape)
print(X.shape)
print(y.shape)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

In [None]:
print("X_train: ", X_train.shape)
print("X_test: ", X_test.shape)
print("y_train: ", y_train.shape)
print("y_test: ", y_test.shape)

## Building the model and visualising the regression line

In [None]:
from sklearn.linear_model import LinearRegression
regressor = LinearRegression()
regressor.fit(X_train, y_train)

In [None]:
print("Intercept: ", regressor.intercept_)

In [None]:
coeff_df = pd.DataFrame(regressor.coef_,X.columns,columns=['Coefficient'])
coeff_df

In [None]:
pred_linreg = regressor.predict(X_test)

In [None]:
plt.scatter(y_test,pred_linreg)
plt.xlabel("Prices")
plt.ylabel("Predicted prices")
plt.title("Prices vs Predicted prices")
plt.show()

In [None]:
plt.scatter(pred_linreg,y_test-pred_linreg)
plt.title("Predicted vs residuals")
plt.xlabel("Predicted")
plt.ylabel("Residuals")
plt.show()

In [None]:
sns.distplot((y_test-pred_linreg),bins=50)
plt.title("Distribution of the predicted values")

In [None]:
sns.distplot(y_test-pred_linreg)
plt.title("Histogram of Residuals")
plt.xlabel("Residuals")
plt.ylabel("Frequency")
plt.show()

In [None]:
plt.scatter(X_train.iloc[:,0], y_train, color = 'red')
plt.plot(X_train.iloc[:, 0], regressor.predict(X_train), color = 'green')
plt.title("Plotting the regression line for the first column")
plt.show()

## Evaluating the model

In [None]:
from sklearn import metrics
import numpy as np 

print('Mean Absolute Error:', metrics.mean_absolute_error(y_test, pred_linreg))
print('Mean Squared Error:', metrics.mean_squared_error(y_test, pred_linreg))
print('Root Mean Squared Error:', np.sqrt(metrics.mean_squared_error(y_test, pred_linreg)))
print('R^2:',metrics.r2_score(y_test, pred_linreg))

In [None]:
regressor.score(X_test, y_test)

## Computing SSE for classifing a random new sample

In [None]:
import random
n = random.randint(0,101)
print("Sample number: ", n)

In [None]:
sse = np.square(y_test.iloc[n] - pred_linreg[n])
print("The mean squared error is: ", sse)
print("Actual value is: ", y_test.iloc[n])
print("Predicted value is: ", pred_linreg[n])