## Linear Regression with multiple variables
### Formula
![Multiple Variable Linear regression](img/multi-variate-linear-regression.png)

In [1]:
### Import data 
import pandas as pd

data = pd.read_csv("homeprices-multiple-variable.csv")
data

Unnamed: 0,area,rooms,age,price
0,2600,2.0,5,550000
1,3000,3.0,11,525000
2,3200,4.0,2,610000
3,3600,,1,680000
4,4000,4.0,3,725000
5,5000,6.0,8,713000
6,5500,2.0,2,810000
7,6000,3.0,7,910000
8,6200,4.0,4,970000
9,7000,1.0,4,900000


## Lets find any missing values in variables

In [2]:
data.isnull().sum()

area     0
rooms    2
age      0
price    0
dtype: int64

## We will fill the missing value with median of that field. Lets find the median and take floor of it so that we get only integer value

In [3]:
import math
median = math.floor(data.rooms.median())
print("Median value for rooms is,", median)

Median value for rooms is, 3


In [4]:
data.rooms = data.rooms.fillna(median)
data

Unnamed: 0,area,rooms,age,price
0,2600,2.0,5,550000
1,3000,3.0,11,525000
2,3200,4.0,2,610000
3,3600,3.0,1,680000
4,4000,4.0,3,725000
5,5000,6.0,8,713000
6,5500,2.0,2,810000
7,6000,3.0,7,910000
8,6200,4.0,4,970000
9,7000,1.0,4,900000


## Lets now do a train test split

In [5]:
from sklearn.model_selection import train_test_split

train_features, test_features, train_labels, test_labels = train_test_split(data[['area','rooms','age']], data.price, test_size=0.2)

## Now lets create Regressing Models

In [6]:
from sklearn.linear_model import LinearRegression

reg = LinearRegression()
reg.fit(train_features, train_labels)

## Lets test the model on test set

In [7]:
prediction = reg.predict(test_features)
print(list(zip(prediction, test_labels)))

[(489236.9984559085, 525000), (661507.4034517666, 680000), (628122.5302571132, 610000)]


## And we can see the predictions are quite close

## Now lets store the model to a pickle file and later we will load it and run test again

In [8]:
import joblib
joblib.dump(reg, 'joblib/MVLinearRegression.pkl')

['joblib/MVLinearRegression.pkl']

#### Now load and run Test again

In [9]:
reg2 = joblib.load('joblib/MVLinearRegression.pkl')
prediction = reg2.predict(test_features)
print(list(zip(prediction, test_labels)))

[(489236.9984559085, 525000), (661507.4034517666, 680000), (628122.5302571132, 610000)]
