<a href="https://colab.research.google.com/github/makhijakabir/machine-learning/blob/main/multivariateRegressionBHPValuationTool.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [16]:
from sklearn.datasets import load_boston
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error

import numpy as np
import pandas as pd

In [17]:
#Gather Data

bostonDataset = load_boston()
data = pd.DataFrame(data=bostonDataset.data, columns=bostonDataset.feature_names)
features = data.drop(['INDUS', 'AGE'], axis=1)   
logPrices = np.log(bostonDataset.target)
target = pd.DataFrame(logPrices, columns=['PRICES'])

In [18]:
crimeIDX = 0
znIDX = 1
chasIDX = 2 
rmIDX = 4
ptratioIDX = 8

propertyStats = features.mean().values.reshape(1, 11)

In [19]:
regr = LinearRegression().fit(featues, target)
fittedVals = regr.predict(features)

MSE = mean_squared_error(target, fittedVals)
RMSE = np.sqrt(MSE)

In [31]:
def getLogEstimate(nrRooms, studentsPerClass, nextToRiver=False, highConfidence=True):
    
    #Configure the property
    propertyStats[0][rmIDX] = nrRooms
    propertyStats[0][ptratioIDX] = studentsPerClass

    if nextToRiver:
        propertyStats[0][chasIDX] = 1

    #Make the prediction
    logEstimate = regr.predict(propertyStats)[0][0]

    #Calculate the Range
    if highConfidence:
        upperBound = logEstimate + 2*RMSE
        lowerBound = logEstimate - 2*RMSE
        interval = 95
    else:
        upperBound = logEstimate + RMSE
        lowerBound = logEstimate - RMSE
        interval = 68

    return logEstimate, upperBound, lowerBound, interval


In [33]:
getLogEstimate(3, 20, nextToRiver=True, highConfidence=False)

(2.7767581914803987, 2.964270326677529, 2.5892460562832684, 68)

In [39]:
todayMedian = 583.3
scaleUp = todayMedian / np.median(bostonDataset.target)

logEstimate, upperB, lowerB, conf = getLogEstimate(9, 15, False, False)

#Conversion to today's price
dollarToday = round(np.e**logEstimate*1000*scaleUp, 2)
upperToday = round(np.e**upperB*1000*scaleUp, -3)
lowerToday = round(np.e**lowerB*1000*scaleUp, -3)

#Printing the prices
print(f'The estimated property value is ${dollarToday}')
print(f'At {conf}% confidence the variation range is ${lowerToday} - ${upperToday}')

The estimated property value is $918392.54
At 68% confidence the variation range is $761000.0 - $1108000.0


In [52]:
def getFinalEstimate(rm, ptratio, chas=False, confidencePercentage=True):
    
    if rm < 1 or ptratio < 1:
        print('This is an unrealistic input. Try again.')
        return
    
    logEstimate, upperB, lowerB, conf = getLogEstimate(rm, ptratio, chas, confidencePercentage)

    #Conversion to today's price
    dollarToday = round(np.e**logEstimate*1000*scaleUp, 2)
    upperToday = round(np.e**upperB*1000*scaleUp, -3)
    lowerToday = round(np.e**lowerB*1000*scaleUp, -3)

    #Printing the prices 
    print(f'The estimated property value is ${dollarToday}')
    print(f'At {conf}% confidence the variation range is ${lowerToday} - ${upperToday}')

In [54]:
getFinalEstimate(5, 60, True)

The estimated property value is $118601.38
At 95% confidence the variation range is $82000.0 - $173000.0
