## House Price Valuation 

In [1]:
import pandas as pd
import numpy as np

from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
from sklearn.datasets import load_boston

### Gathering Data and Creating a Linear Model

In [2]:
boston_dataset = load_boston()
data = pd.DataFrame(data=boston_dataset.data, columns=boston_dataset.feature_names)
features = data.drop(['INDUS', 'AGE'], axis=1)

log_prices = np.log(boston_dataset.target)
target = pd.DataFrame(log_prices, columns=['PRICE'])

In [3]:
property_stats = features.mean().values.reshape(1,11)
features.head()

Unnamed: 0,CRIM,ZN,CHAS,NOX,RM,DIS,RAD,TAX,PTRATIO,B,LSTAT
0,0.00632,18.0,0.0,0.538,6.575,4.09,1.0,296.0,15.3,396.9,4.98
1,0.02731,0.0,0.0,0.469,6.421,4.9671,2.0,242.0,17.8,396.9,9.14
2,0.02729,0.0,0.0,0.469,7.185,4.9671,2.0,242.0,17.8,392.83,4.03
3,0.03237,0.0,0.0,0.458,6.998,6.0622,3.0,222.0,18.7,394.63,2.94
4,0.06905,0.0,0.0,0.458,7.147,6.0622,3.0,222.0,18.7,396.9,5.33


In [4]:
property_stats.shape

(1, 11)

In [5]:
property_stats

array([[3.61352356e+00, 1.13636364e+01, 6.91699605e-02, 5.54695059e-01,
        6.28463439e+00, 3.79504269e+00, 9.54940711e+00, 4.08237154e+02,
        1.84555336e+01, 3.56674032e+02, 1.26530632e+01]])

In [6]:
# building the model
regr = LinearRegression()
Linear_model = regr.fit(features, target)
fitted_values = regr.predict(features)

# calculating mean squared error and root mean squared error
MSE = mean_squared_error(target, fitted_values)
RMSE = np.sqrt(MSE)

### Defining a function to calculate log prices

In [7]:
def get_log_estimate(nr_rooms,
                    students_per_classroom,
                    next_to_river=False,
                    high_confidence=True):
    
        # configure property
        property_stats[0][4] = nr_rooms
        property_stats[0][8] = students_per_classroom
        
        if next_to_river:
                property_stats[0][2] = 1
        else:
            property_stats[0][2] = 0
            
    
        # make predictions
        log_estimate = regr.predict(property_stats)[0][0]
        
        
        # calc range
        if high_confidence:
            upper_bound = log_estimate + 2*RMSE
            lower_bound = log_estimate - 2*RMSE
            interval = 96
        else:
            upper_bound = log_estimate + RMSE
            lower_bound = log_estimate - RMSE
            interval = 68
            
        return log_estimate, upper_bound, lower_bound, interval

In [8]:
get_log_estimate(3, 20, next_to_river=True, high_confidence=False)

(2.776758191480399, 2.9642703266775294, 2.589246056283269, 68)

In [9]:
# Zillow median home value $ 592,300
median_boston_value = np.median(boston_dataset.target)
median_zillow = 592.3
scale_factor = (median_zillow/median_boston_value)

log_est, upper, lower, conf = get_log_estimate(9, 15, next_to_river=False, high_confidence=False)

dollar_est = np.e**log_est * 1000 * scale_factor
dollar_hi = np.e**upper * 1000 * scale_factor
dollar_low = np.e**lower * 1000 * scale_factor

rounded_est = np.around(dollar_est, -3)
rounded_hi = np.around(dollar_hi, -3)
rounded_low = np.around(dollar_low, -3)

print(f'The estimated house price is {rounded_est}.')
print(f'At {conf}% confidence the valuation range is')
print(f'USD {rounded_low} at the lower end and USD {rounded_hi} at the higher end.')

The estimated house price is 839000.0.
At 68% confidence the valuation range is
USD 696000.0 at the lower end and USD 1013000.0 at the higher end.


### Defining a function to calculate house price in dollars

In [10]:
def get_dollar_estimate(RM, PTRATIO, CHAS=False, High_Confidence=True):
    
    # Docstring
    
    """
    Estimate the price of a property in Boston.
    RM -- number of rooms in the property
    PTRATIO -- number of students per teacher in the classroom for the school in the area
    CHAS -- True if the property is next to the river, False otherwise
    High_Confidence -- True for a 95% prediction interval, False for a 68% prediction interval
    
    """
    if(RM<1 or PTRATIO<1):
        print('It is unrealistic. Try again.')
        return
    
    log_est, upper, lower, conf = get_log_estimate(RM, PTRATIO, next_to_river=CHAS, high_confidence=High_Confidence)

    dollar_est = np.e**log_est * 1000 * scale_factor
    dollar_hi = np.e**upper * 1000 * scale_factor
    dollar_low = np.e**lower * 1000 * scale_factor

    rounded_est = np.around(dollar_est, -3)
    rounded_hi = np.around(dollar_hi, -3)
    rounded_low = np.around(dollar_low, -3)

    print(f'The estimated house price is {rounded_est}.')
    print(f'At {conf}% confidence the valuation range is')
    print(f'USD {rounded_low} at the lower end and USD {rounded_hi} at the higher end.')


In [11]:
get_dollar_estimate(RM=5, PTRATIO=15, CHAS=False, High_Confidence=False)

The estimated house price is 584000.0.
At 68% confidence the valuation range is
USD 484000.0 at the lower end and USD 705000.0 at the higher end.


### Importing the self created module boston_valuation 

In [12]:
import boston_valuation as val

The estimated house price is 839000.0.
At 68% confidence the valuation range is
USD 696000.0 at the lower end and USD 1013000.0 at the higher end.


In [13]:
val.get_dollar_estimate(RM=5, PTRATIO=15, CHAS=False, High_Confidence=False)

The estimated house price is 584000.0.
At 68% confidence the valuation range is
USD 484000.0 at the lower end and USD 705000.0 at the higher end.
