In [31]:
from collections import namedtuple
import pickle
import pandas as pd
import numpy as np
from zillow_adapter import ask_zillow
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder
from sklearn.pipeline import Pipeline
from sklearn.model_selection import RandomizedSearchCV

with open("final_df.pickle", "rb") as fp: 
    X_, y = pickle.load(fp)
    
def prep(dat: pd.DataFrame) -> pd.DataFrame: 
    
    return (dat.drop(['zip5_encoded', 'city_encoded'], axis=1)
            
            # match column names to zillow
            .rename(columns={'year_built_bin': 'year_built', 'baths_lavs': 'bathrooms', 'beds_total': 'bedrooms'})
            
            ## BIN LABELS NEED TO BE THIS
            .assign(**{feat: pd.cut(dat[feat], 
                                    bins=[0, 4, 9, 14, 10000], 
                                    labels=["0-4 years", "5-9 years", "10-14 years", "15+"]) 
                       for feat 
                       in ['roof_year', 'furnace_year']})
            
            ## NEEDS TO BE IN THIS ORDER
            .reindex(['bedrooms', 'bathrooms', 'year_built', 
                      'quartz_countertops', 'granite_countertops', 'formica_countertops', 'tile_countertops',
                      'laminate_floors', 'hardwood_floors', 
                      'roof_year', 'furnace_year'], axis=1))

X = prep(X_)

# address, zipcode = "4565 White Rd, Pierson, MI", 49339
# house = ask_zillow(address, zipcode)

print(X.shape)
#inps = {feat: house.results.__dict__[feat] for feat in ('bedrooms', 'bathrooms', 'year_built')}

countertops_map = {"Marble/Quartz": [1, 0, 0, 0],
                   "Granite/Concrete": [0, 1, 0, 0],
                   '"Formica/Tile"': [0, 0, 1, 1], 
                   'Laminate': [0, 0, 0, 0], 
                   'skip': [X.quartz_countertops.mean(), 
                            X.granite_countertops.mean(), 
                            X.formica_countertops.mean(), 
                            X.tile_countertops.mean()]}

flooring_map = {"Hardwood" : [0, 1], 
                "Engineered/Laminate": [1, 0], 
                "Ceramic Tile": [0, 0], 
                "Porcelain Tile/Concrete": [0, 0], 
                "skip": [X.laminate_floors.mean(), 
                         X.hardwood_floors.mean()]}

roof_furnace_map = {**{'skip': y.mean()}, **{feat: feat 
                                             for feat 
                                             in ["0-4 years", "5-9 years", "10-14 years", "15+"]}}

X.head()

(24773, 11)


Unnamed: 0,bedrooms,bathrooms,year_built,quartz_countertops,granite_countertops,formica_countertops,tile_countertops,laminate_floors,hardwood_floors,roof_year,furnace_year
0,4,2.1,1999,0,1,0,0,0,0,15+,15+
3,5,3.0,1996,0,0,0,0,0,1,15+,15+
6,3,2.1,1997,0,1,0,0,0,0,15+,15+
10,4,3.0,1992,0,1,0,0,0,1,15+,15+
11,4,3.1,1980,0,0,0,0,0,1,15+,15+


In [25]:
to_onehot = ['furnace_year', 'roof_year']
numeric_features = X.drop(to_onehot, axis=1).columns

pass_numeric = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='mean'))])

onehotter = Pipeline(steps=[
    ('onehot', OneHotEncoder(handle_unknown='ignore'))])

preprocessor = ColumnTransformer(
    transformers=[
        ('num', pass_numeric, numeric_features),
        ('cat', onehotter, to_onehot)])

# Append classifier to preprocessing pipeline.
# Now we have a full prediction pipeline.


gbr = Pipeline(steps=[('preprocessor', preprocessor),
                      ('regressor', GradientBoostingRegressor(loss='ls'))])

param_grid({'regressor__learning_rate': [np.exp(k) for k in range(-5, -1)], 'regressor__max_depth': range(2, 9), 
           'regressor__min_samples_split': [2, 3, 4], 'regressor__n_estimators': range(135, 800)})

gbf_cv = RandomizedSearchCV(gbr)

gbr_cv.fit()

pickle gbr_cv

onehot = OneHotEncoder()
pd.DataFrame(onehot.fit_transform(X[to_onehot].fillna('15+')).todense(), columns=onehot.get_feature_names())


Unnamed: 0,x0_0-4 years,x0_10-14 years,x0_15+,x0_5-9 years,x1_0-4 years,x1_10-14 years,x1_15+,x1_5-9 years
0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0
1,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0
2,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0
3,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0
4,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0
5,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0
6,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0
7,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0
8,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0
9,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0


In [4]:
from valuation import valuation
from constants import SurveyPredictants

address, zipcode = "4565 White Rd, Pierson, MI", 49339
house = ask_zillow(address, zipcode)

#good_address = "3400 Pacific Ave., Marina Del Rey, CA, 90292", #"12345 Butternut Avenue, Sand Lake, MI 49343",
countertops = "Granite/Concrete"
flooring= "Hardwood"
roof_age= "0-4 years"
furnace_age= "10-14 years"

x = valuation(house.results, SurveyPredictants(countertops, flooring, roof_age, furnace_age))

In [32]:
np.exp(-1)

0.36787944117144233

In [None]:

# zill_props = ['bathrooms',
#  'bedrooms',
#  'data',
#  'get_attr',
#  'graph_data_link',
#  'home_detail_link',
#  'home_size',
#  'home_type',
#  'last_sold_date',
#  'last_sold_price',
#  'last_sold_price_currency',
#  'latitude',
#  'longitude',
#  'map_this_home_link',
#  'property_size',
#  'tax_value',
#  'tax_year',
#  'year_built',]
