## House price prediction using Linear Regression

In [1]:
import pandas as pd
import scipy.stats as ss
import statistics as s

### Create sample data

In [2]:
sizes = [1000,2000,1500,1100,1670,1550,3000,2400,3400,1200]  # SQFT
ages =  [5,6,2,10,11,7,5,6,8,1]    # Years
prices = [15.5,25.5,22,10.5,30,35,45,25,43,22]  # Lakhs 

### Find out correlation 

In [8]:
ss.pearsonr(sizes,prices)

(0.83618504618361, 0.0025725398031045785)

In [9]:
ss.pearsonr(ages,prices)

(0.08633046178089812, 0.8125533057129931)

### Calculate slope and intercept 

In [3]:
size_reg = ss.linregress(sizes, prices)
size_reg

LinregressResult(slope=0.011393721128941818, intercept=5.9070168353315005, rvalue=0.83618504618361, pvalue=0.002572539803104576, stderr=0.002642123965417807)

In [4]:
age_reg = ss.linregress(ages, prices)
age_reg

LinregressResult(slope=0.3053993250843644, intercept=25.48706411698538, rvalue=0.08633046178089812, pvalue=0.8125533057129929, stderr=1.246047411291669)

In [5]:
size_avg = s.mean(sizes)
age_avg =  s.mean(ages)
price_avg = s.mean(prices)

In [6]:
intercept = price_avg - size_reg.slope * size_avg  - age_reg.slope * age_avg

In [7]:
# Calculate prices based on reg line for both size and age
for (size,age, price) in zip(sizes,ages,prices):
    p_price = intercept + size * size_reg.slope +  age * age_reg.slope
    # find out the difference between predicted and acutal 
    diff = p_price - price
    print(f"{size:5} {age:5} {p_price:8.2f} {price:8.2f} {diff:8.2f}")    

 1000     5    16.96    15.50     1.46
 2000     6    28.66    25.50     3.16
 1500     2    21.75    22.00    -0.25
 1100    10    19.63    10.50     9.13
 1670    11    26.43    30.00    -3.57
 1550     7    23.84    35.00   -11.16
 3000     5    39.75    45.00    -5.25
 2400     6    33.22    25.00     8.22
 3400     8    45.23    43.00     2.23
 1200     1    18.02    22.00    -3.98
