## Given these home prices find out price of a home that has,
## 1.   3000 sqr ft area, 3 bedrooms, 40 year old
## 2.   2500 sqr ft area, 4 bedrooms, 5 year old

## Let's import our library 

In [1]:
import pandas as pd
import numpy as np
from sklearn import linear_model

In [2]:
df = pd.read_excel("homeprice.xlsx")
df

Unnamed: 0,area,bedrooms,age,price
0,2200,3.0,21,550080
1,3200,4.0,15,565500
2,3800,,17,614520
3,3600,3.0,31,595000
4,4200,5.0,7,765400
5,4700,6.0,8,812000


## Data Preprocessing: Fill NA values with median value of a column

In [3]:
df.bedrooms.median()

4.0

In [4]:
df.bedrooms = df.bedrooms.fillna(df.bedrooms.median())
df

Unnamed: 0,area,bedrooms,age,price
0,2200,3.0,21,550080
1,3200,4.0,15,565500
2,3800,4.0,17,614520
3,3600,3.0,31,595000
4,4200,5.0,7,765400
5,4700,6.0,8,812000


## One way of fitting

In [5]:
reg = linear_model.LinearRegression()
reg.fit(df.drop("price",axis = "columns"),df.price)

LinearRegression()

## Another way of fitting

In [6]:
reg = linear_model.LinearRegression()
reg.fit(df[["area","bedrooms", "age"]],df.price)

LinearRegression()

## Checking Coefficient

In [7]:
reg.coef_

array([3.90735881e+01, 6.74228140e+04, 6.65430171e+02])

## checking Intercept

In [8]:
reg.intercept_

217192.5333842957

## Find price of home with 3000 sqr ft area, 3 bedrooms, 40 year old

In [9]:
reg.predict([[3000,3,40]])

array([563298.9466546])

## Now I want to know how 563298.9466546 this prediction value is calculated

In [12]:
3.90735881e+01 * 3000 + 6.74228140e+04 * 3 +6.65430171e+02 *40 +217192.5333842957

563298.9465242957

## Find price of home with 2500 sqr ft area, 4 bedrooms, 5 year old

In [13]:
reg.predict([[2500,4,5]])

array([587894.91062733])