### Import Statements 

In [37]:
from sklearn.datasets import load_boston
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error

import warnings
warnings.filterwarnings('ignore')

import pandas as pd 
import numpy as np

In [50]:
boston_data=load_boston()

In [51]:
#Gather data 
data=pd.DataFrame(data=boston_data.data,columns=boston_data.feature_names)

# Dropping the uneccesary features
features=data.drop(['INDUS','AGE'],axis=1)
log_prices=np.log(boston_data.target)
target=pd.DataFrame(data=log_prices,columns=['PRICE'])
features.head()

Unnamed: 0,CRIM,ZN,CHAS,NOX,RM,DIS,RAD,TAX,PTRATIO,B,LSTAT
0,0.00632,18.0,0.0,0.538,6.575,4.09,1.0,296.0,15.3,396.9,4.98
1,0.02731,0.0,0.0,0.469,6.421,4.9671,2.0,242.0,17.8,396.9,9.14
2,0.02729,0.0,0.0,0.469,7.185,4.9671,2.0,242.0,17.8,392.83,4.03
3,0.03237,0.0,0.0,0.458,6.998,6.0622,3.0,222.0,18.7,394.63,2.94
4,0.06905,0.0,0.0,0.458,7.147,6.0622,3.0,222.0,18.7,396.9,5.33


In [52]:
property_stats=features.mean().values.reshape(1,11) # values to convert to ndarray and reshape to make it 2d

In [53]:
regr=LinearRegression().fit(features,target)
fitted_values=regr.predict(features)
MSE=mean_squared_error(target,fitted_values)
RMSE=np.sqrt(MSE)

In [66]:
features.columns

Index(['CRIM', 'ZN', 'CHAS', 'NOX', 'RM', 'DIS', 'RAD', 'TAX', 'PTRATIO', 'B',
       'LSTAT'],
      dtype='object')

In [54]:
RM_IDX=4
PTRATIO_IDX=8
CRIME_IDX=0
ZN_IDX=1
CHAS_IDX=2




def log_estimate_price(rooms,student_per_classroom,highconfidence=True,near_river=False):
    # giving values as the input 
    property_stats[0][RM_IDX]=rooms
    property_stats[0][PTRATIO_IDX]=student_per_classroom
    property_stats[0][CHAS_IDX]=near_river
    # making prediction
    log_price=regr.predict(property_stats)[0][0]
    if(highconfidence):
        interval=95
        lower_bound=log_price-2*RMSE
        upper_bound=log_price+2*RMSE
    else:
        interval=68
        lower_bound=log_price-RMSE
        upper_bound=log_price+RMSE

    return log_price,lower_bound,upper_bound,interval

In [55]:
log_estimate_price(3,20,near_river=True,highconfidence=False)


(2.7767581914803987, 2.5892460562832684, 2.964270326677529, 68)

In [56]:
np.median(boston_data.target)

21.2

In [57]:
log_estimate_price(3,20,near_river=True)

(2.7767581914803987, 2.401733921086138, 3.1517824618746593, 95)

In [63]:
#Estimating the price of a house in boston acording to today's prices 
current_price=583.3

scale=current_price/np.median(boston_data.target)


In [64]:
def dollar_price_estimate(rm,ptratio,chas=False,ranges=True):
# Documentation
    """ Function To Determine Prices in Boston
    Parameters::
    rm:- Number of rooms
    ptration:- number of students per class(max=100)
    chas:- if the house is near charles river.
    ranges:-boolean value.(True:-high range,False:-low range)"""
    
# Checking the values 
    if(rm<1 or ptratio<1 or ptratio>100):
        print("this parameters are invalid please check the docs for more!")
        return
        # estimation
    log_pr,lower,upper,confi=log_estimate_price(rm,ptratio,near_river=chas,highconfidence=ranges)

    price=(np.e**(log_pr))*1000*scale
    lower_bound=(np.e**(lower))*1000*scale
    upper_bound=(np.e**(upper))*1000*scale

    print(f"the price for the house is {np.round(price,3)}")
    print(f"with the confidence of {confi} the range is ")
    print (f"{np.around(lower_bound,-3)}-{np.around(upper_bound,-3)}")



In [65]:
dollar_price_estimate(10,30,ranges=False)

the price for the house is 516337.55
with the confidence of 68 the range is 
428000.0-623000.0


# Thank you