In [410]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
# from mpl_toolkits.mplot3d import Axes3D
%matplotlib inline
import statsmodels.api as sm

from numpy import asarray
from sklearn.preprocessing import MinMaxScaler

from dateutil.relativedelta import relativedelta

pd.set_option('display.max_row', 1000)
pd.set_option('display.max_columns', 50)
pd.set_option('display.expand_frame_repr', False)
pd.set_option('max_colwidth', None)

from IPython.display import HTML
from IPython.core.interactiveshell import InteractiveShell
from statsmodels.stats.outliers_influence import variance_inflation_factor
import pickle
data = pd.read_csv("./data/kc_house_data.csv")

In [411]:
# Load Data From Pickel
with open('regression_Initial_Model_sk.pickle', 'rb') as file:
    data_Model1_sk = pickle.load(file)
    
with open('scaler_1.pickle', 'rb') as file:
    scaler_1 = pickle.load(file)    

with open('data_Target_Homes_Not_Scaled_WithDummies.pickle', 'rb') as file:
    data_Target_Homes_Not_Scaled_WithDummies = pickle.load(file)    

#need to create new pickle file of variables that we start with when we scale vs. the end
# with open('model1_Continuous_variables.pickle', 'rb') as file:
#     model1_Continuous_variables = pickle.load(file)    

In [412]:
#Create the columns to scale, ensure alignment, need different names given different dataframes and prefixes for each type of the same col
colsToScale = ['bedrooms', 'bathrooms','sqft_living','sqft_lot','sqft_basement','sqft_living15', 'sqft_lot15', 'floors',
               'sqft_above','AgeOfHome','ttl_rooms']

colsToScale_with_a = list(["a_" + e for e in colsToScale])
colsToScale_with_sc = list(["sc_" + e for e in colsToScale])
colsToScale_with_n_sc  = list(["n_sc_" + e for e in colsToScale])


In [413]:
#Scale the data in pickel file, required given the need to import data in an unscaled state.  Only way change the data using 
#Unscaled numbers

data_Target_Homes_colsToScale = data_Target_Homes_Not_Scaled_WithDummies[colsToScale] 

Scaled_Data_1 = scaler_1.transform(data_Target_Homes_colsToScale)
df_Scaled_Data_1 = pd.DataFrame(data=Scaled_Data_1, columns=colsToScale_with_sc)

#Combined scaled values with original values
data_Target_Homes_Not_Scaled_WithDummies = data_Target_Homes_Not_Scaled_WithDummies.reset_index(drop=True)
data_Target_Homes_Scaled_WithDummies = pd.concat([data_Target_Homes_Not_Scaled_WithDummies, df_Scaled_Data_1], axis=1)


In [414]:
#Get and insert the average value for each feature for easy retravial when adding values to demonstrate regression
def resetAverages():
    df_Averages = pd.DataFrame()
    df_Averages['a_bedrooms'] = [data_Target_Homes_Not_Scaled_WithDummies['bedrooms'].mean()]
    df_Averages['a_bathrooms'] = [data_Target_Homes_Not_Scaled_WithDummies['bathrooms'].mean()]
    df_Averages['a_sqft_living'] = [data_Target_Homes_Not_Scaled_WithDummies['sqft_living'].mean()]
    df_Averages['a_sqft_lot'] = [data_Target_Homes_Not_Scaled_WithDummies['sqft_lot'].mean()]
    df_Averages['a_sqft_basement'] = [data_Target_Homes_Not_Scaled_WithDummies['sqft_basement'].mean()]
    df_Averages['a_sqft_living15'] = [data_Target_Homes_Not_Scaled_WithDummies['sqft_living15'].mean()]
    df_Averages['a_sqft_lot15'] = [data_Target_Homes_Not_Scaled_WithDummies['sqft_lot15'].mean()]
    df_Averages['a_floors'] = [data_Target_Homes_Not_Scaled_WithDummies['floors'].mean()]
    df_Averages['a_sqft_above'] = [data_Target_Homes_Not_Scaled_WithDummies['sqft_above'].mean()]
    df_Averages['a_AgeOfHome'] = [data_Target_Homes_Not_Scaled_WithDummies['AgeOfHome'].mean()]
    df_Averages['a_ttl_rooms'] = [data_Target_Homes_Not_Scaled_WithDummies['ttl_rooms'].mean()]
    df_Averages['a_waterfront_1'] = [0]
    df_Averages['a_view_1'] = [0]
    df_Averages['a_view_2'] = [0]
    df_Averages['a_view_3'] = [0]
    df_Averages['a_view_4'] = [0]
    df_Averages['a_condition_4'] = [0]
    df_Averages['a_condition_5'] = [0]
    df_Averages['a_grade_6'] = [0]
    df_Averages['a_grade_8'] = [0]
    df_Averages['a_grade_9'] = [0]
    df_Averages['a_grade_10'] = [0]
    df_Averages['a_grade_11'] = [0]
    df_Averages['a_ZipFirst3_981'] =[0]
    df_Averages['a_sls_mnth_2'] = [0]
    df_Averages['a_sls_mnth_3'] = [0]
    df_Averages['a_sls_mnth_4'] = [0]
    df_Averages['a_sls_mnth_5'] = [1]
    df_Averages['a_sls_mnth_6'] = [0]
    df_Averages['a_sls_mnth_7'] = [0]
    df_Averages['a_sls_mnth_8'] = [0]
    df_Averages['a_sls_mnth_9'] = [0]
    df_Averages['a_sls_mnth_10'] = [0]
    df_Averages['a_sls_mnth_11'] = [0]
    df_Averages['a_sls_mnth_12'] = [0]
    df_Averages['a_If_renovated_1'] = [0]
    
    # caluculate the averages after scaling the dataframe
    df_Averages['a_sc_bedrooms'] = data_Target_Homes_Scaled_WithDummies['sc_bedrooms'].mean()
    df_Averages['a_sc_bathrooms'] = data_Target_Homes_Scaled_WithDummies['sc_bathrooms'].mean()
    df_Averages['a_sc_sqft_living'] =data_Target_Homes_Scaled_WithDummies['sc_sqft_living'].mean()
    df_Averages['a_sc_sqft_lot'] = data_Target_Homes_Scaled_WithDummies['sc_sqft_lot'].mean()
    df_Averages['a_sc_sqft_basement'] = data_Target_Homes_Scaled_WithDummies['sc_sqft_basement'].mean()
    df_Averages['a_sc_sqft_living15'] = data_Target_Homes_Scaled_WithDummies['sc_sqft_living15'].mean()
    df_Averages['a_sc_sqft_lot15'] = data_Target_Homes_Scaled_WithDummies['sc_sqft_lot15'].mean()
    df_Averages['a_sc_floors'] = data_Target_Homes_Scaled_WithDummies['sc_floors'].mean()
    df_Averages['a_sc_sqft_above'] = data_Target_Homes_Scaled_WithDummies['sc_sqft_above'].mean()
    df_Averages['a_sc_AgeOfHome'] = data_Target_Homes_Scaled_WithDummies['sc_AgeOfHome'].mean()
    df_Averages['a_sc_ttl_rooms'] = data_Target_Homes_Scaled_WithDummies['sc_ttl_rooms'].mean()
    return df_Averages


In [415]:
#update value of target column using unscaled units
def Calculate_New_Unscaled_Value_For_TargetCol(TargetColumn, deltaInValue, TargetDataFrame, id=0):
    
    #if continueous/ squart foot above 
    if (TargetColumn == "sqft_above") or (TargetColumn == "a_sqft_above"):
        Target_UnscaledValue_ForTargetCol = TargetDataFrame[TargetColumn]
        UpdatedValue_ForTargetCol = Target_UnscaledValue_ForTargetCol + deltaInValue
        
    if (TargetColumn[:5] == "grade") or (TargetColumn[:7] == "a_grade"):
        Target_UnscaledValue_ForTargetCol = TargetDataFrame[TargetColumn]
        UpdatedValue_ForTargetCol = deltaInValue
        
    return UpdatedValue_ForTargetCol

In [416]:
#Scenario 1, using Model 1, Physical Features Only


#Start by getting The "Avearge" home 
df_Averages = resetAverages()

#Scenario 1, Impact of Increasing square Footage Above
# TargetColumn = "sqft_above"
# targetCol = "a_" + TargetColumn

ListOfTargets = ["sqft_above"]
ListOfdeltas = [100]

# ListOfTargets = ["grade_10"]
# ListOfdeltas = [1]

# ListOfTargets = ["sqft_above", "grade_8"]
# ListOfdeltas = [0,1]

ChangedFeatures = list(zip(ListOfTargets, ListOfdeltas))

#assume 100 additional square feet
# deltaInValue = 100

#enter In targeted home, or enter 0 to get the average change
# parcelID = 6414100192
parcelID = 0

if parcelID ==0:
    #we are calculating for an avearge
    df_target_data_to_scale = df_Averages
    
    for targetCol, delta in ChangedFeatures:
        TargetColumn = "a_" + targetCol
        NewValue = Calculate_New_Unscaled_Value_For_TargetCol(TargetColumn,delta,df_target_data_to_scale,parcelID)
        df_target_data_to_scale[TargetColumn] = NewValue
        
        #if changing a dummy variable need to reset all dummies along with updating the new dummy column
        
        if (TargetColumn[:7] == "a_grade"):
            df_target_data_to_scale["a_grade_11"] = 0
            df_target_data_to_scale["grade_10"] = 0
            df_target_data_to_scale["a_grade_9"] = 0
            df_target_data_to_scale["a_grade_8"] = 0
            df_target_data_to_scale["a_grade_6"] = 0
            df_target_data_to_scale[TargetColumn] = 1
            
    #Need to changed colstoscale given target dataframe has cols with "a_" 
    colsToScale = colsToScale_with_a
    
    colsToFeed_In_to_Predict = ['a_waterfront_1', 'a_view_1', 'a_view_2', 'a_view_3', 'a_view_4', 'a_condition_4',
       'a_condition_5', 'a_grade_6', 'a_grade_8', 'a_grade_9', 'a_grade_10', 'a_grade_11',
       'a_ZipFirst3_981', 'a_sls_mnth_2', 'a_sls_mnth_3', 'a_sls_mnth_4', 'a_sls_mnth_5',
       'a_sls_mnth_6', 'a_sls_mnth_7', 'a_sls_mnth_8', 'a_sls_mnth_9', 'a_sls_mnth_10',
       'a_sls_mnth_11', 'a_sls_mnth_12', 'a_If_renovated_1', 'n_sc_sqft_lot',
       'n_sc_sqft_basement', 'n_sc_sqft_above']


else:
    # we are calculating data using a real home
    df_target_data_to_scale = data_Target_Homes_Not_Scaled_WithDummies.loc[data_Target_Homes_Not_Scaled_WithDummies['id']==parcelID]
    
    for targetCol, delta in ChangedFeatures:
        TargetColumn = targetCol
        NewValue = Calculate_New_Unscaled_Value_For_TargetCol(TargetColumn,delta,df_target_data_to_scale,parcelID)
        df_target_data_to_scale[TargetColumn] = NewValue
        
        if (TargetColumn[:5] == "grade"):
            df_target_data_to_scale["grade_11"] = 0
            df_target_data_to_scale["grade_10"] = 0
            df_target_data_to_scale["grade_9"] = 0
            df_target_data_to_scale["grade_8"] = 0
            df_target_data_to_scale["grade_6"] = 0
#             df_target_data_to_scale[TargetColumn] = 1

    colsToFeed_In_to_Predict = ['waterfront_1', 'view_1', 'view_2', 'view_3', 'view_4', 'condition_4',
       'condition_5', 'grade_6', 'grade_8', 'grade_9', 'grade_10', 'grade_11',
       'ZipFirst3_981', 'sls_mnth_2', 'sls_mnth_3', 'sls_mnth_4', 'sls_mnth_5',
       'sls_mnth_6', 'sls_mnth_7', 'sls_mnth_8', 'sls_mnth_9', 'sls_mnth_10',
       'sls_mnth_11', 'sls_mnth_12', 'If_renovated_1', 'n_sc_sqft_lot',
       'n_sc_sqft_basement', 'n_sc_sqft_above']

#Scale and create dataframe of scaled values
Scaled_Data_1 = scaler_1.transform(df_target_data_to_scale[colsToScale])
df_Scaled_Data_1 = pd.DataFrame(data=Scaled_Data_1, columns=colsToScale_with_n_sc)

#combined scaled values with other non scaled features needed to feed into predcit
df_target_data_to_scale = df_target_data_to_scale.reset_index(drop=True)
data_TargetHomes_With_Updated_TargetColumn = pd.concat([df_target_data_to_scale, df_Scaled_Data_1], axis=1)

#Get the columns needed and order required to feed into predict
df_Home_To_Predict_No_ID = data_TargetHomes_With_Updated_TargetColumn [colsToFeed_In_to_Predict]

y_hat_average = data_Model1_sk.predict(df_Home_To_Predict_No_ID)
print(y_hat_average)
print(np.exp(y_hat_average))


[12.87795416]
[391583.54230548]


# Model 2 - Adding Location Based Features

In [417]:
# Update school district

In [418]:
# Load Data From Pickel
with open('regression_Second_Model_sk.pickle', 'rb') as file:
    data_Model2_sk = pickle.load(file)
    
with open('scaler_2.pickle', 'rb') as file:
    scaler_2 = pickle.load(file)    

with open('data_with_Hot_Dist_POints_WithDummies.pickle', 'rb') as file:
    data_with_Hot_Dist_POints_WithDummies = pickle.load(file)
    
    
with open('regression_Data_Second_Model.pickle', 'rb') as file:
    data_Model2 = pickle.load(file)

In [419]:
#Get and insert the average value for each feature for easy retravial when adding values to demonstrate regression
def resetAverages():
    df_Averages = pd.DataFrame()
    df_Averages['a_bedrooms'] = [data_Target_Homes_Not_Scaled_WithDummies['bedrooms'].mean()]
    df_Averages['a_bathrooms'] = [data_Target_Homes_Not_Scaled_WithDummies['bathrooms'].mean()]
    df_Averages['a_sqft_living'] = [data_Target_Homes_Not_Scaled_WithDummies['sqft_living'].mean()]
    df_Averages['a_sqft_lot'] = [data_Target_Homes_Not_Scaled_WithDummies['sqft_lot'].mean()]
    df_Averages['a_sqft_basement'] = [data_Target_Homes_Not_Scaled_WithDummies['sqft_basement'].mean()]
    df_Averages['a_sqft_living15'] = [data_Target_Homes_Not_Scaled_WithDummies['sqft_living15'].mean()]
    df_Averages['a_sqft_lot15'] = [data_Target_Homes_Not_Scaled_WithDummies['sqft_lot15'].mean()]
    df_Averages['a_floors'] = [data_Target_Homes_Not_Scaled_WithDummies['floors'].mean()]
    df_Averages['a_sqft_above'] = [data_Target_Homes_Not_Scaled_WithDummies['sqft_above'].mean()]
    df_Averages['a_AgeOfHome'] = [data_Target_Homes_Not_Scaled_WithDummies['AgeOfHome'].mean()]
    df_Averages['a_ttl_rooms'] = [data_Target_Homes_Not_Scaled_WithDummies['ttl_rooms'].mean()]
    df_Averages['a_waterfront_1'] = [0]
    df_Averages['a_view_1'] = [0]
    df_Averages['a_view_2'] = [0]
    df_Averages['a_view_3'] = [0]
    df_Averages['a_view_4'] = [0]
    df_Averages['a_condition_4'] = [0]
    df_Averages['a_condition_5'] = [0]
    df_Averages['a_grade_6'] = [0]
    df_Averages['a_grade_8'] = [0]
    df_Averages['a_grade_9'] = [0]
    df_Averages['a_grade_10'] = [0]
    df_Averages['a_grade_11'] = [0]
    df_Averages['a_ZipFirst3_981'] =[0]
    df_Averages['a_sls_mnth_2'] = [0]
    df_Averages['a_sls_mnth_3'] = [0]
    df_Averages['a_sls_mnth_4'] = [0]
    df_Averages['a_sls_mnth_5'] = [1]
    df_Averages['a_sls_mnth_6'] = [0]
    df_Averages['a_sls_mnth_7'] = [0]
    df_Averages['a_sls_mnth_8'] = [0]
    df_Averages['a_sls_mnth_9'] = [0]
    df_Averages['a_sls_mnth_10'] = [0]
    df_Averages['a_sls_mnth_11'] = [0]
    df_Averages['a_sls_mnth_12'] = [0]
    df_Averages['a_If_renovated_1'] = [0]
    
    # caluculate the averages after scaling the dataframe
    df_Averages['a_sc_bedrooms'] = data_Target_Homes_Scaled_WithDummies['sc_bedrooms'].mean()
    df_Averages['a_sc_bathrooms'] = data_Target_Homes_Scaled_WithDummies['sc_bathrooms'].mean()
    df_Averages['a_sc_sqft_living'] =data_Target_Homes_Scaled_WithDummies['sc_sqft_living'].mean()
    df_Averages['a_sc_sqft_lot'] = data_Target_Homes_Scaled_WithDummies['sc_sqft_lot'].mean()
    df_Averages['a_sc_sqft_basement'] = data_Target_Homes_Scaled_WithDummies['sc_sqft_basement'].mean()
    df_Averages['a_sc_sqft_living15'] = data_Target_Homes_Scaled_WithDummies['sc_sqft_living15'].mean()
    df_Averages['a_sc_sqft_lot15'] = data_Target_Homes_Scaled_WithDummies['sc_sqft_lot15'].mean()
    df_Averages['a_sc_floors'] = data_Target_Homes_Scaled_WithDummies['sc_floors'].mean()
    df_Averages['a_sc_sqft_above'] = data_Target_Homes_Scaled_WithDummies['sc_sqft_above'].mean()
    df_Averages['a_sc_AgeOfHome'] = data_Target_Homes_Scaled_WithDummies['sc_AgeOfHome'].mean()
    df_Averages['a_sc_ttl_rooms'] = data_Target_Homes_Scaled_WithDummies['sc_ttl_rooms'].mean()
    
    #newly added averages from datamodel 2
    df_Averages['a_Sch_d_Top15'] = 1
    df_Averages['a_Sch_d_Top30'] = 0
    df_Averages['a_Sch_d_Top60'] = 0
    df_Averages['a_Under10'] = 10
    df_Averages['a_Over20'] = 10
    df_Averages['a_sc_Under10'] = data_Model2["Under10"].mean()
    df_Averages['a_sc_Over20'] = data_Model2["Over20"].mean()
    
    return df_Averages


In [420]:
test = resetAverages()
testAveragePredict = test[['a_Sch_d_Top15', 'a_Sch_d_Top30', 'a_Sch_d_Top60', 'a_sc_Under10', 'a_sc_Over20',
       'a_sc_sqft_lot', 'a_sc_sqft_basement', 'a_sc_sqft_above', 'a_view_1', 'a_view_2', 'a_view_3',
       'a_view_4', 'a_condition_4', 'a_condition_5', 'a_grade_6', 'a_grade_8', 'a_grade_9',
       'a_grade_10', 'a_sls_mnth_2', 'a_sls_mnth_3', 'a_sls_mnth_4', 'a_sls_mnth_5',
       'a_sls_mnth_6', 'a_sls_mnth_7', 'a_sls_mnth_8', 'a_sls_mnth_9', 'a_sls_mnth_10',
       'a_sls_mnth_11', 'a_sls_mnth_12', 'a_If_renovated_1']]

# Model  3 - Adding Assesor Appraisals

In [421]:
#Columns To feed into scaler
# model3_Continuous_variables  = ['YrRollingAppraisal','AppraisedLandValue', 
#                                   'AppraisedImpsValue', 'LandToHouseCheck',
#                                   'InflationSinceLastAppraisal', 
#                                   'InflationSinceLastAppraisal_abs', 'AppraisedTotal', 'AssesorAppraisals_x']


In [422]:
# Load Data From Pickel
with open('regression_Third_Model_sk.pickle', 'rb') as file:
    data_Model3_sk = pickle.load(file)
    
with open('scaler_3.pickle', 'rb') as file:
    scaler_2 = pickle.load(file)    

with open('df_data_with_Appraisals.pickle', 'rb') as file:
    df_data_with_Appraisals = pickle.load(file)
    
    
with open('regression_Data_Third_Model.pickle', 'rb') as file:
    data_Model3 = pickle.load(file)

In [426]:
#Get and insert the average value for each feature for easy retravial when adding values to demonstrate regression
def resetAverages():
    df_Averages = pd.DataFrame()
    df_Averages['a_bedrooms'] = [data_Target_Homes_Not_Scaled_WithDummies['bedrooms'].mean()]
    df_Averages['a_bathrooms'] = [data_Target_Homes_Not_Scaled_WithDummies['bathrooms'].mean()]
    df_Averages['a_sqft_living'] = [data_Target_Homes_Not_Scaled_WithDummies['sqft_living'].mean()]
    df_Averages['a_sqft_lot'] = [data_Target_Homes_Not_Scaled_WithDummies['sqft_lot'].mean()]
    df_Averages['a_sqft_basement'] = [data_Target_Homes_Not_Scaled_WithDummies['sqft_basement'].mean()]
    df_Averages['a_sqft_living15'] = [data_Target_Homes_Not_Scaled_WithDummies['sqft_living15'].mean()]
    df_Averages['a_sqft_lot15'] = [data_Target_Homes_Not_Scaled_WithDummies['sqft_lot15'].mean()]
    df_Averages['a_floors'] = [data_Target_Homes_Not_Scaled_WithDummies['floors'].mean()]
    df_Averages['a_sqft_above'] = [data_Target_Homes_Not_Scaled_WithDummies['sqft_above'].mean()]
    df_Averages['a_AgeOfHome'] = [data_Target_Homes_Not_Scaled_WithDummies['AgeOfHome'].mean()]
    df_Averages['a_ttl_rooms'] = [data_Target_Homes_Not_Scaled_WithDummies['ttl_rooms'].mean()]
    df_Averages['a_waterfront_1'] = [0]
    df_Averages['a_view_1'] = [0]
    df_Averages['a_view_2'] = [0]
    df_Averages['a_view_3'] = [0]
    df_Averages['a_view_4'] = [0]
    df_Averages['a_condition_4'] = [0]
    df_Averages['a_condition_5'] = [0]
    df_Averages['a_grade_6'] = [0]
    df_Averages['a_grade_8'] = [0]
    df_Averages['a_grade_9'] = [0]
    df_Averages['a_grade_10'] = [0]
    df_Averages['a_grade_11'] = [0]
    df_Averages['a_ZipFirst3_981'] =[0]
    df_Averages['a_sls_mnth_2'] = [0]
    df_Averages['a_sls_mnth_3'] = [0]
    df_Averages['a_sls_mnth_4'] = [0]
    df_Averages['a_sls_mnth_5'] = [1]
    df_Averages['a_sls_mnth_6'] = [0]
    df_Averages['a_sls_mnth_7'] = [0]
    df_Averages['a_sls_mnth_8'] = [0]
    df_Averages['a_sls_mnth_9'] = [0]
    df_Averages['a_sls_mnth_10'] = [0]
    df_Averages['a_sls_mnth_11'] = [0]
    df_Averages['a_sls_mnth_12'] = [0]
    df_Averages['a_If_renovated_1'] = [0]
    
    # caluculate the averages after scaling the dataframe
    df_Averages['a_sc_bedrooms'] = data_Target_Homes_Scaled_WithDummies['sc_bedrooms'].mean()
    df_Averages['a_sc_bathrooms'] = data_Target_Homes_Scaled_WithDummies['sc_bathrooms'].mean()
    df_Averages['a_sc_sqft_living'] =data_Target_Homes_Scaled_WithDummies['sc_sqft_living'].mean()
    df_Averages['a_sc_sqft_lot'] = data_Target_Homes_Scaled_WithDummies['sc_sqft_lot'].mean()
    df_Averages['a_sc_sqft_basement'] = data_Target_Homes_Scaled_WithDummies['sc_sqft_basement'].mean()
    df_Averages['a_sc_sqft_living15'] = data_Target_Homes_Scaled_WithDummies['sc_sqft_living15'].mean()
    df_Averages['a_sc_sqft_lot15'] = data_Target_Homes_Scaled_WithDummies['sc_sqft_lot15'].mean()
    df_Averages['a_sc_floors'] = data_Target_Homes_Scaled_WithDummies['sc_floors'].mean()
    df_Averages['a_sc_sqft_above'] = data_Target_Homes_Scaled_WithDummies['sc_sqft_above'].mean()
    df_Averages['a_sc_AgeOfHome'] = data_Target_Homes_Scaled_WithDummies['sc_AgeOfHome'].mean()
    df_Averages['a_sc_ttl_rooms'] = data_Target_Homes_Scaled_WithDummies['sc_ttl_rooms'].mean()
    
    #newly added averages from datamodel 2
    df_Averages['a_Sch_d_Top15'] = 1
    df_Averages['a_Sch_d_Top30'] = 0
    df_Averages['a_Sch_d_Top60'] = 0
#     df_Averages['a_Under10'] = 10
#     df_Averages['a_Over20'] = 10
    df_Averages['a_sc_Under10'] = data_Model2["Under10"].mean()
    df_Averages['a_sc_Over20'] = data_Model2["Over20"].mean()
    
    #newly added averages from datamodel 3
#     df_Averages['YrRollingAppraisal'] 
#     df_Averages['AppraisedLandValue']
#     df_Averages['AppraisedImpsValue']
#     df_Averages['LandToHouseCheck']
#     df_Averages['InflationSinceLastAppraisal']
#     df_Averages['InflationSinceLastAppraisal_abs']
#     df_Averages['AppraisedTotal']
    df_Averages['a_sc_AssesorAppraisals_x'] = df_data_with_Appraisals['AssesorAppraisals_x'].mean()
        
    
    
    return df_Averages


In [427]:
df_average = resetAverages()
testAveragePredict = df_average[['a_sc_AssesorAppraisals_x', 'a_Sch_d_Top15', 'a_Sch_d_Top30', 'a_Sch_d_Top60',
       'a_sc_Under10', 'a_sc_Over20', 'a_view_1', 'a_view_2', 'a_view_3', 'a_view_4',
       'a_condition_4', 'a_condition_5', 'a_grade_6', 'a_grade_8', 'a_grade_9',
       'a_grade_10', 'a_sls_mnth_2', 'a_sls_mnth_3', 'a_sls_mnth_4', 'a_sls_mnth_5',
       'a_sls_mnth_6', 'a_sls_mnth_7', 'a_sls_mnth_8', 'a_sls_mnth_9', 'a_sls_mnth_10',
       'a_sls_mnth_11', 'a_sls_mnth_12', 'a_If_renovated_1', 'a_sc_sqft_lot',
       'a_sc_sqft_basement', 'a_sc_sqft_above']]

In [428]:
y_hat_average = data_Model3_sk.predict(testAveragePredict)
print(y_hat_average)
print(np.exp(y_hat_average))

[13.03987984]
[460413.29988845]


In [346]:
testPredictDatatop = testPredictData.head(1)
testPredictDatatop

Unnamed: 0,Sch_d_Top15,Sch_d_Top30,Sch_d_Top60,Under10,Over20,sqft_lot,sqft_basement,sqft_above,view_1,view_2,view_3,view_4,condition_4,condition_5,grade_6,grade_8,grade_9,grade_10,sls_mnth_2,sls_mnth_3,sls_mnth_4,sls_mnth_5,sls_mnth_6,sls_mnth_7,sls_mnth_8,sls_mnth_9,sls_mnth_10,sls_mnth_11,sls_mnth_12,If_renovated_1
0,0,1,0,0.428571,0.05,0.540333,0.184332,0.528125,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1


In [344]:
y_hat_average = data_Model2_sk.predict(testPredictDatatop)
print(y_hat_average)
print(np.exp(y_hat_average))


[13.28233237]
[586737.24232559]
