In [0]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

## Lets load the Boston House Pricing Dataset

In [0]:
from sklearn.datasets import load_boston

In [0]:
boston=load_boston()

In [0]:
boston.keys()

In [0]:
## Lets check the description of the dataset
print(boston.DESCR)

In [0]:
print(boston.data)

In [0]:
print(boston.target)

In [0]:
print(boston.feature_names)

## Preparing The Dataset

In [0]:
dataset=pd.DataFrame(boston.data,columns=boston.feature_names)

In [0]:
dataset.head()

In [0]:
dataset['Price']=boston.target

In [0]:
dataset.head()

In [0]:
dataset.info()

In [0]:
## Summarizing The Stats of the data
dataset.describe()

In [0]:
## Check the missing Values
dataset.isnull().sum()

In [0]:
### EXploratory Data Analysis
## Correlation
dataset.corr()

In [0]:
import seaborn as sns
sns.pairplot(dataset)

## Analyzing The Correlated Features

In [0]:
dataset.corr()

In [0]:
plt.scatter(dataset['CRIM'],dataset['Price'])
plt.xlabel("Crime Rate")
plt.ylabel("Price")

In [0]:
plt.scatter(dataset['RM'],dataset['Price'])
plt.xlabel("RM")
plt.ylabel("Price")

In [0]:
import seaborn as sns
sns.regplot(x="RM",y="Price",data=dataset)

In [0]:
sns.regplot(x="LSTAT",y="Price",data=dataset)

In [0]:
sns.regplot(x="CHAS",y="Price",data=dataset)

In [0]:
sns.regplot(x="PTRATIO",y="Price",data=dataset)

In [0]:
## Independent and Dependent features

X=dataset.iloc[:,:-1]
y=dataset.iloc[:,-1]

In [0]:
X.head()

In [0]:
y

In [0]:
##Train Test Split
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.3,random_state=42)

In [0]:
X_train

In [0]:
X_test

In [0]:
## Standardize the dataset
from sklearn.preprocessing import StandardScaler
scaler=StandardScaler()

In [0]:
X_train=scaler.fit_transform(X_train)

In [0]:
X_test=scaler.transform(X_test)

In [0]:
import pickle
pickle.dump(scaler,open('scaling.pkl','wb'))

In [0]:
X_train

In [0]:
X_test

## Model Training

In [0]:
from sklearn.linear_model import LinearRegression

In [0]:
regression=LinearRegression()

In [0]:
regression.fit(X_train,y_train)

In [0]:
## print the coefficients and the intercept
print(regression.coef_)

In [0]:
print(regression.intercept_)

In [0]:
## on which parameters the model has been trained
regression.get_params()

In [0]:
### Prediction With Test Data
reg_pred=regression.predict(X_test)

In [0]:
reg_pred

## Assumptions

In [0]:
## plot a scatter plot for the prediction
plt.scatter(y_test,reg_pred)

In [0]:
## Residuals
residuals=y_test-reg_pred

In [0]:
residuals

In [0]:
## Plot this residuals 

sns.displot(residuals,kind="kde")

In [0]:
## Scatter plot with respect to prediction and residuals
## uniform distribution
plt.scatter(reg_pred,residuals)

In [0]:
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error

print(mean_absolute_error(y_test,reg_pred))
print(mean_squared_error(y_test,reg_pred))
print(np.sqrt(mean_squared_error(y_test,reg_pred)))

## R square and adjusted R square


Formula

**R^2 = 1 - SSR/SST**


R^2	=	coefficient of determination
SSR	=	sum of squares of residuals
SST	=	total sum of squares


In [0]:
from sklearn.metrics import r2_score
score=r2_score(y_test,reg_pred)
print(score)

**Adjusted R2 = 1 – [(1-R2)*(n-1)/(n-k-1)]**

where:

R2: The R2 of the model
n: The number of observations
k: The number of predictor variables

In [0]:
#display adjusted R-squared
1 - (1-score)*(len(y_test)-1)/(len(y_test)-X_test.shape[1]-1)

## New Data Prediction

In [0]:
boston.data[0].reshape(1,-1)

In [0]:
##transformation of new data
scaler.transform(boston.data[0].reshape(1,-1))

In [0]:
regression.predict(scaler.transform(boston.data[0].reshape(1,-1)))

## Pickling The Model file For Deployment

In [0]:
import pickle

In [0]:
pickle.dump(regression,open('regmodel.pkl','wb'))

In [0]:
pickled_model=pickle.load(open('regmodel.pkl','rb'))

In [0]:
## Prediction
pickled_model.predict(scaler.transform(boston.data[0].reshape(1,-1)))