## Multiple Linear Regression

In [None]:
from sklearn.datasets import fetch_california_housing

In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline

In [None]:
california=fetch_california_housing()

In [None]:
california

In [None]:
california.keys()

In [None]:
print(california.DESCR)

In [None]:
california.target_names

In [None]:
print(california.data)

In [None]:
print(california.target)

In [None]:
california.feature_names

In [None]:
## Lets prepare the dataframe 
dataset=pd.DataFrame(california.data,columns=california.feature_names)
dataset.head()

In [None]:
dataset['Price']=california.target

In [None]:
dataset.head()

In [None]:
dataset.info()

In [None]:
dataset.isnull().sum()

In [None]:
dataset.describe()

In [None]:
dataset.corr()

In [None]:
import seaborn as sns
sns.heatmap(dataset.corr(),annot=True)

In [None]:
dataset.head()

In [None]:
## Independent and Dependent features
X=dataset.iloc[:,:-1] #independent features
y=dataset.iloc[:,-1] #dependent features

In [None]:
X.head()

In [None]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.33, random_state=10)

In [None]:
X_train.shape,y_train.shape,X_test.shape,y_test.shape

In [None]:
from sklearn.preprocessing import StandardScaler
scaler=StandardScaler()

In [None]:
X_train=scaler.fit_transform(X_train)
X_test=scaler.transform(X_test)

In [None]:
X_train

In [None]:
X_test

## Model Training

In [None]:
from sklearn.linear_model import LinearRegression

In [None]:
regression=LinearRegression()

In [None]:
regression

In [None]:
regression.fit(X_train,y_train)

In [None]:
## slope or coefficients
regression.coef_

In [None]:
## intercepts
regression.intercept_

In [None]:
## Prediction for the test data
y_pred=regression.predict(X_test)

In [None]:
y_test

In [None]:
y_pred

In [None]:
## Performance Metrics
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error
print(mean_squared_error(y_test,y_pred))
print(mean_absolute_error(y_test,y_pred))
print(np.sqrt(mean_squared_error(y_test,y_pred)))

In [None]:
## R square and adjusted R square

from sklearn.metrics import r2_score
score=r2_score(y_test,y_pred)
print(score)

In [None]:
## Adjusted R square
#display adjusted R-squared
1 - (1-score)*(len(y_test)-1)/(len(y_test)-X_test.shape[1]-1)

## Assumptions

In [None]:
plt.scatter(y_test,y_pred)
plt.xlabel("Test Truth Data")
plt.ylabel("Test Predicted Data")

In [None]:
residuals=y_test-y_pred

In [None]:
residuals

In [None]:
sns.displot(residuals,kind="kde")

In [None]:
## SCatter plot with predictions and residual
##uniform distribution
plt.scatter(y_pred,residuals)

In [None]:
regression

## Pickling 
Python pickle module is used for serialising and de-serialising a Python object structure. Any object in Python can be pickled so that it can be saved on disk. What pickle does is that it “serialises” the object first before writing it to file. Pickling is a way to convert a python object (list, dict, etc.) into a character stream. The idea is that this character stream contains all the information necessary to reconstruct the object in another python script.

In [None]:
import pickle
pickle.dump(regression,open('regressor.pkl','wb'))

In [None]:
model=pickle.load(open('regressor.pkl','rb'))

In [None]:
model.predict(X_test)