In [6]:
import numpy as np
import pandas as pd
from scipy import stats
from sklearn.linear_model import Ridge, Lasso, ElasticNet, LinearRegression
import plotly.express as px
import matplotlib.pyplot as plt
import plotly.graph_objects as go
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import KFold, StratifiedKFold
from sklearn.model_selection import cross_val_score

from sklearn import ensemble
import time

from catboost import CatBoostRegressor

import re

from statsmodels.stats.outliers_influence import variance_inflation_factor
from xgboost import XGBRegressor

In [7]:
pd.set_option('display.max_rows', 500)

In [12]:
front_end = pd.read_csv('../data/ames_housing_price_data_v6.csv')

In [35]:
sp=pd.read_csv('../data/ames_housing_price_data_v5.csv')
sp=sp[['PID','SalePrice']]

In [16]:
def dummify(df, non_dummies, dummies):
    for dummified in dummies:
        for original in non_dummies:
            if original in dummified:
                orig_name = f'{original}_'
                value = dummified.replace(orig_name, '')
                df[dummified] = df[original].map(lambda x: 1 if x == value else 0)
    df=df.drop(non_dummies,axis=1)
    return df

In [17]:
dummies = [   
    'Neighborhood_Blueste',
    'Neighborhood_BrDale',
    'Neighborhood_BrkSide',
    'Neighborhood_ClearCr',
    'Neighborhood_CollgCr',
    'Neighborhood_Crawfor',
    'Neighborhood_Edwards',
    'Neighborhood_Gilbert', 
    'Neighborhood_Greens', 
    'Neighborhood_GrnHill',
    'Neighborhood_IDOTRR',
    'Neighborhood_Landmrk',
    'Neighborhood_MeadowV',
    'Neighborhood_Mitchel',
    'Neighborhood_NAmes',
    'Neighborhood_NPkVill',
    'Neighborhood_NWAmes',
    'Neighborhood_NoRidge',
    'Neighborhood_NridgHt',
    'Neighborhood_OldTown',
    'Neighborhood_SWISU',
    'Neighborhood_Sawyer',
    'Neighborhood_SawyerW',
    'Neighborhood_Somerst',
    'Neighborhood_StoneBr',
    'Neighborhood_Timber',
    'Neighborhood_Veenker',
    'BldgType_2fmCon',
    'BldgType_Duplex',
    'BldgType_Twnhs',
    'BldgType_TwnhsE',
    'MasVnrType_None',
    'MasVnrType_Stone'
    ]

non_dummies=['Neighborhood', 'BldgType', 'MasVnrType']

In [18]:
#transformation of front-end to back-end, and catboost application
back_end = front_end.copy()
back_end['ExterQualDisc']=back_end['ExterQual']-back_end['OverallQual']
back_end['OverallCondDisc']=back_end['OverallCond']-back_end['OverallQual']
back_end['KitchenQualDisc']=back_end['KitchenQual']-back_end['OverallQual']
back_end=back_end.drop(['ExterQual','OverallCond','KitchenQual'],axis=1)

back_end = dummify(back_end, non_dummies, dummies)

cbl = CatBoostRegressor();
cbl.load_model("HousePriceCatBoost", "cbm")
cbl_pred = cbl.predict(back_end)

In [27]:
cbl_pred=pd.Series(data=cbl_pred)
cbl_pred.name='pred_price'

In [30]:
df=pd.merge(front_end,cbl_pred, left_index=True, right_index=True)

In [None]:
df=pd.merge(df,sp, left_index=True, right_on='PID')

In [36]:
sp

Unnamed: 0,PID,SalePrice
0,909176150,126000
1,905476230,139500
2,911128020,124900
3,535377150,114000
4,534177230,227000
...,...,...
2574,903205040,121000
2575,905402060,139600
2576,909275030,145000
2577,907192040,217500


In [33]:
df

Unnamed: 0.1,Unnamed: 0,GrLivArea,LotArea,OverallQual,BSMT_LowQual,house_age_years,GarageCars,MasVnrType,FullBath,HalfBath,...,supermarket,hotel,stop,farmyard,christian_catholic,jewish,muslim,garden_centre,christian_lutheran,pred_price
0,0,856,7890,0.428571,856.0,71.210959,2.0,,1,0,...,4,0,0,0,1,0,0,0,3,116221.481922
1,1,1049,4235,0.285714,104.0,25.104110,1.0,Brick Face,2,0,...,4,0,0,0,1,0,1,0,2,136969.108708
2,2,1039,8146,0.142857,405.0,109.402740,1.0,,1,0,...,1,0,0,1,0,0,0,0,3,109278.942802
3,3,1665,8400,0.714286,167.0,8.838356,2.0,,2,1,...,1,0,0,0,0,1,0,0,3,225587.294664
4,4,1922,7301,0.571429,0.0,6.501370,2.0,Brick Face,3,0,...,4,3,1,0,0,0,0,0,1,189746.626180
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2466,2466,952,8854,0.428571,952.0,93.394521,1.0,,1,0,...,3,0,0,0,0,0,0,0,3,118619.899379
2467,2467,1733,13680,0.000000,0.0,54.452055,2.0,,2,0,...,4,0,0,0,0,0,1,0,1,137198.840665
2468,2468,2002,6270,0.285714,1001.0,58.619178,3.0,,2,0,...,0,0,0,0,1,0,0,0,2,159091.878266
2469,2469,1842,8826,0.571429,144.0,7.501370,2.0,Brick Face,2,1,...,0,3,1,13,0,0,0,0,0,221253.185316


In [None]:
px.scatter