In [1]:
import pandas as pd 
import numpy as np 
import seaborn as sns 
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split, KFold, cross_val_score
from sklearn.linear_model import LinearRegression, LassoCV, RidgeCV
from sklearn.preprocessing import PolynomialFeatures, PowerTransformer

from sklearn.metrics import r2_score

%matplotlib inline

In [2]:
train = pd.read_csv("datasets/train.csv")
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)

## Data Cleaning and Null Value Remover 

In [3]:
def column_cleaner(data): 
    data.columns=map(str.lower, data.columns)
    data.columns = data.columns.str.strip().str.replace(" ","_")


In [4]:
column_cleaner(train)

In [5]:
def fill_na(data): 
    data[['garage_qual', 'garage_cond']] = data[['garage_qual', 'garage_cond']].fillna(value = 0)
    data['mas_vnr_area'] = data[["mas_vnr_area"]].fillna(value = 0)
    data['bsmt_qual'] = data[["bsmt_qual"]].fillna(value = 0)
    data['bsmt_cond'] = data[["bsmt_cond"]].fillna(value = 0)
    return data

In [6]:
fill_na(train)

Unnamed: 0,id,pid,ms_subclass,ms_zoning,lot_frontage,lot_area,street,alley,lot_shape,land_contour,utilities,lot_config,land_slope,neighborhood,condition_1,condition_2,bldg_type,house_style,overall_qual,overall_cond,year_built,year_remod/add,roof_style,roof_matl,exterior_1st,exterior_2nd,mas_vnr_type,mas_vnr_area,exter_qual,exter_cond,foundation,bsmt_qual,bsmt_cond,bsmt_exposure,bsmtfin_type_1,bsmtfin_sf_1,bsmtfin_type_2,bsmtfin_sf_2,bsmt_unf_sf,total_bsmt_sf,heating,heating_qc,central_air,electrical,1st_flr_sf,2nd_flr_sf,low_qual_fin_sf,gr_liv_area,bsmt_full_bath,bsmt_half_bath,full_bath,half_bath,bedroom_abvgr,kitchen_abvgr,kitchen_qual,totrms_abvgrd,functional,fireplaces,fireplace_qu,garage_type,garage_yr_blt,garage_finish,garage_cars,garage_area,garage_qual,garage_cond,paved_drive,wood_deck_sf,open_porch_sf,enclosed_porch,3ssn_porch,screen_porch,pool_area,pool_qc,fence,misc_feature,misc_val,mo_sold,yr_sold,sale_type,saleprice
0,109,533352170,60,RL,,13517,Pave,,IR1,Lvl,AllPub,CulDSac,Gtl,Sawyer,RRAe,Norm,1Fam,2Story,6,8,1976,2005,Gable,CompShg,HdBoard,Plywood,BrkFace,289.0,Gd,TA,CBlock,TA,TA,No,GLQ,533.0,Unf,0.0,192.0,725.0,GasA,Ex,Y,SBrkr,725,754,0,1479,0.0,0.0,2,1,3,1,Gd,6,Typ,0,,Attchd,1976.0,RFn,2.0,475.0,TA,TA,Y,0,44,0,0,0,0,,,,0,3,2010,WD,130500
1,544,531379050,60,RL,43.0,11492,Pave,,IR1,Lvl,AllPub,CulDSac,Gtl,SawyerW,Norm,Norm,1Fam,2Story,7,5,1996,1997,Gable,CompShg,VinylSd,VinylSd,BrkFace,132.0,Gd,TA,PConc,Gd,TA,No,GLQ,637.0,Unf,0.0,276.0,913.0,GasA,Ex,Y,SBrkr,913,1209,0,2122,1.0,0.0,2,1,4,1,Gd,8,Typ,1,TA,Attchd,1997.0,RFn,2.0,559.0,TA,TA,Y,0,74,0,0,0,0,,,,0,4,2009,WD,220000
2,153,535304180,20,RL,68.0,7922,Pave,,Reg,Lvl,AllPub,Inside,Gtl,NAmes,Norm,Norm,1Fam,1Story,5,7,1953,2007,Gable,CompShg,VinylSd,VinylSd,,0.0,TA,Gd,CBlock,TA,TA,No,GLQ,731.0,Unf,0.0,326.0,1057.0,GasA,TA,Y,SBrkr,1057,0,0,1057,1.0,0.0,1,0,3,1,Gd,5,Typ,0,,Detchd,1953.0,Unf,1.0,246.0,TA,TA,Y,0,52,0,0,0,0,,,,0,1,2010,WD,109000
3,318,916386060,60,RL,73.0,9802,Pave,,Reg,Lvl,AllPub,Inside,Gtl,Timber,Norm,Norm,1Fam,2Story,5,5,2006,2007,Gable,CompShg,VinylSd,VinylSd,,0.0,TA,TA,PConc,Gd,TA,No,Unf,0.0,Unf,0.0,384.0,384.0,GasA,Gd,Y,SBrkr,744,700,0,1444,0.0,0.0,2,1,3,1,TA,7,Typ,0,,BuiltIn,2007.0,Fin,2.0,400.0,TA,TA,Y,100,0,0,0,0,0,,,,0,4,2010,WD,174000
4,255,906425045,50,RL,82.0,14235,Pave,,IR1,Lvl,AllPub,Inside,Gtl,SawyerW,Norm,Norm,1Fam,1.5Fin,6,8,1900,1993,Gable,CompShg,Wd Sdng,Plywood,,0.0,TA,TA,PConc,Fa,Gd,No,Unf,0.0,Unf,0.0,676.0,676.0,GasA,TA,Y,SBrkr,831,614,0,1445,0.0,0.0,2,0,3,1,TA,6,Typ,0,,Detchd,1957.0,Unf,2.0,484.0,TA,TA,N,0,59,0,0,0,0,,,,0,3,2010,WD,138500
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2046,1587,921126030,20,RL,79.0,11449,Pave,,IR1,HLS,AllPub,Inside,Gtl,Timber,Norm,Norm,1Fam,1Story,8,5,2007,2007,Gable,CompShg,VinylSd,VinylSd,,0.0,Gd,TA,PConc,Gd,TA,Av,GLQ,1011.0,Unf,0.0,873.0,1884.0,GasA,Ex,Y,SBrkr,1728,0,0,1728,1.0,0.0,2,0,3,1,Gd,7,Typ,1,Gd,Attchd,2007.0,Fin,2.0,520.0,TA,TA,Y,0,276,0,0,0,0,,,,0,1,2008,WD,298751
2047,785,905377130,30,RL,,12342,Pave,,IR1,Lvl,AllPub,Inside,Gtl,Edwards,Norm,Norm,1Fam,1Story,4,5,1940,1950,Gable,CompShg,VinylSd,VinylSd,,0.0,TA,TA,CBlock,TA,TA,No,BLQ,262.0,Unf,0.0,599.0,861.0,GasA,Ex,Y,SBrkr,861,0,0,861,0.0,0.0,1,0,1,1,TA,4,Typ,0,,Detchd,1961.0,Unf,2.0,539.0,TA,TA,Y,158,0,0,0,0,0,,,,0,3,2009,WD,82500
2048,916,909253010,50,RL,57.0,7558,Pave,,Reg,Bnk,AllPub,Inside,Gtl,Crawfor,Norm,Norm,1Fam,1.5Fin,6,6,1928,1950,Gable,CompShg,BrkFace,Stone,,0.0,TA,TA,BrkTil,TA,TA,No,Unf,0.0,Unf,0.0,896.0,896.0,GasA,Gd,Y,SBrkr,1172,741,0,1913,0.0,0.0,1,1,3,1,TA,9,Typ,1,TA,Detchd,1929.0,Unf,2.0,342.0,Fa,Fa,Y,0,0,0,0,0,0,,,,0,3,2009,WD,177000
2049,639,535179160,20,RL,80.0,10400,Pave,,Reg,Lvl,AllPub,Corner,Gtl,NAmes,Norm,Norm,1Fam,1Story,4,5,1956,1956,Gable,CompShg,Plywood,Plywood,,0.0,TA,TA,CBlock,TA,TA,No,Rec,155.0,LwQ,750.0,295.0,1200.0,GasA,TA,Y,SBrkr,1200,0,0,1200,1.0,0.0,1,0,3,1,TA,6,Typ,2,Gd,Attchd,1956.0,Unf,1.0,294.0,TA,TA,Y,0,189,140,0,0,0,,,,0,11,2009,WD,144000


## Outlier Identification and Removal

In [7]:
train.shape

(2051, 81)

In [777]:
#train.loc[(train['gr_liv_area'] > 4000)]
#train.loc[(train['mas_vnr_area'] > 1200)]
#train.loc[(train["lot_area"] > 100000) ]
#train.loc[(train["1st_flr_sf"] > 3000) & (train['saleprice'] < 500000)]
#train.loc[(train["garage_comb"] > 5000) ]
#train.loc[(train["exter_cond*exter_qual"] == 5000) ]
#train.loc[(train['bsmtfin_sf_2'] > 1200)]
#train.loc[(train)['wood_deck_sf'] > 1400]
#train.loc[(train['saleprice'] > 580000)]
#train.loc[(train['saleprice'] < 15000)].sort_values(by = 'saleprice', ascending = False).index
#train.loc[train["garage_cars"].isnull()].index
#train.loc[train["total_bsmt_sf"].isnull()].index

In [778]:
#dropping ground living area that is over 4000 
train.drop(index = [960, 1885], inplace = True)

#dropping mas_vnr_area that is over 1200 
train.drop(index = [1409], inplace = True)

#dropping lot area that is over 100,000
#train.drop(index= [471, 694], inplace = True)

#dropping total basement sf that is greater than 3000 and the saleprice is less than 500,000
train.drop(index= [328, 1147], inplace = True)

#dropping the 2st_flr_sf that is greater than 3500
train.drop(index= [616], inplace = True)

#dropping three garage_comb scores that are greater than 5000 
#train.drop(index= [337, 925, 1293], inplace = True)

#dropping two values whose bsmt_fin_sf_2 is greater than 1200 
#train.drop(index= [770, 1692], inplace = True)

#dropping a value where wood_deck_sf is greater than 1400 
#train.drop(index = [966], inplace = True)

#dropping two values where saleprice is less than 15000
train.drop(index = [183, 1628], inplace = True)

#dropping a value where saleprice is greater than 600000
train.drop(index = [1671], inplace = True)

#dropping a null value 
train.drop(index = [1712], inplace = True)

#dropping null value where total_bsmt_sf is null 
train.drop(index = [1327], inplace = True)

## Dictionary Conversion 

In [779]:
def dataframe_dictionary_input(data): 
    street_cleaned = { "Grvl" : 0,"Pave" : 1}
    data["street"] = data[["street"]].replace(street_cleaned)


    na_to_ex_rating = {"Po" : 1, "Fa" : 2,"TA" : 3, "Gd" : 4, "Ex" : 5,}
    data['bsmt_qual'] = data[['bsmt_qual']].replace(na_to_ex_rating)
    data["bsmt_cond"] = data[['bsmt_cond']].replace(na_to_ex_rating)


    heating_qc_cleaned = {"Po" : 0, "Fa" : 1,"TA" : 2, "Gd" : 3, "Ex" : 4,}
    data['heating_qc'] = data[['heating_qc']].replace(heating_qc_cleaned)

    poor_to_ex_no_na = {"Po" : 0, "Fa" : 1,"TA" : 2, "Gd" : 3, "Ex" : 4,}
    data['kitchen_qual'] = data[['kitchen_qual']].replace(poor_to_ex_no_na)

    data['exter_qual'] = data[['exter_qual']].replace(poor_to_ex_no_na)
    data['exter_cond'] = data[['exter_cond']].replace(poor_to_ex_no_na)
    
    data['garage_qual'] = data[['garage_qual']].replace(na_to_ex_rating)
    data['garage_cond'] = data[['garage_cond']].replace(na_to_ex_rating)
    data['central_air'] = data['central_air'].map({'Y': 1, "N": 0})
    
    return data 


In [780]:
dataframe_dictionary_input(train)

Unnamed: 0,id,pid,ms_subclass,ms_zoning,lot_frontage,lot_area,street,alley,lot_shape,land_contour,utilities,lot_config,land_slope,neighborhood,condition_1,condition_2,bldg_type,house_style,overall_qual,overall_cond,year_built,year_remod/add,roof_style,roof_matl,exterior_1st,exterior_2nd,mas_vnr_type,mas_vnr_area,exter_qual,exter_cond,foundation,bsmt_qual,bsmt_cond,bsmt_exposure,bsmtfin_type_1,bsmtfin_sf_1,bsmtfin_type_2,bsmtfin_sf_2,bsmt_unf_sf,total_bsmt_sf,heating,heating_qc,central_air,electrical,1st_flr_sf,2nd_flr_sf,low_qual_fin_sf,gr_liv_area,bsmt_full_bath,bsmt_half_bath,full_bath,half_bath,bedroom_abvgr,kitchen_abvgr,kitchen_qual,totrms_abvgrd,functional,fireplaces,fireplace_qu,garage_type,garage_yr_blt,garage_finish,garage_cars,garage_area,garage_qual,garage_cond,paved_drive,wood_deck_sf,open_porch_sf,enclosed_porch,3ssn_porch,screen_porch,pool_area,pool_qc,fence,misc_feature,misc_val,mo_sold,yr_sold,sale_type,saleprice
0,109,533352170,60,RL,,13517,1,,IR1,Lvl,AllPub,CulDSac,Gtl,Sawyer,RRAe,Norm,1Fam,2Story,6,8,1976,2005,Gable,CompShg,HdBoard,Plywood,BrkFace,289.0,3,2,CBlock,3,3,No,GLQ,533.0,Unf,0.0,192.0,725.0,GasA,4,1,SBrkr,725,754,0,1479,0.0,0.0,2,1,3,1,3,6,Typ,0,,Attchd,1976.0,RFn,2.0,475.0,3,3,Y,0,44,0,0,0,0,,,,0,3,2010,WD,130500
1,544,531379050,60,RL,43.0,11492,1,,IR1,Lvl,AllPub,CulDSac,Gtl,SawyerW,Norm,Norm,1Fam,2Story,7,5,1996,1997,Gable,CompShg,VinylSd,VinylSd,BrkFace,132.0,3,2,PConc,4,3,No,GLQ,637.0,Unf,0.0,276.0,913.0,GasA,4,1,SBrkr,913,1209,0,2122,1.0,0.0,2,1,4,1,3,8,Typ,1,TA,Attchd,1997.0,RFn,2.0,559.0,3,3,Y,0,74,0,0,0,0,,,,0,4,2009,WD,220000
2,153,535304180,20,RL,68.0,7922,1,,Reg,Lvl,AllPub,Inside,Gtl,NAmes,Norm,Norm,1Fam,1Story,5,7,1953,2007,Gable,CompShg,VinylSd,VinylSd,,0.0,2,3,CBlock,3,3,No,GLQ,731.0,Unf,0.0,326.0,1057.0,GasA,2,1,SBrkr,1057,0,0,1057,1.0,0.0,1,0,3,1,3,5,Typ,0,,Detchd,1953.0,Unf,1.0,246.0,3,3,Y,0,52,0,0,0,0,,,,0,1,2010,WD,109000
3,318,916386060,60,RL,73.0,9802,1,,Reg,Lvl,AllPub,Inside,Gtl,Timber,Norm,Norm,1Fam,2Story,5,5,2006,2007,Gable,CompShg,VinylSd,VinylSd,,0.0,2,2,PConc,4,3,No,Unf,0.0,Unf,0.0,384.0,384.0,GasA,3,1,SBrkr,744,700,0,1444,0.0,0.0,2,1,3,1,2,7,Typ,0,,BuiltIn,2007.0,Fin,2.0,400.0,3,3,Y,100,0,0,0,0,0,,,,0,4,2010,WD,174000
4,255,906425045,50,RL,82.0,14235,1,,IR1,Lvl,AllPub,Inside,Gtl,SawyerW,Norm,Norm,1Fam,1.5Fin,6,8,1900,1993,Gable,CompShg,Wd Sdng,Plywood,,0.0,2,2,PConc,2,4,No,Unf,0.0,Unf,0.0,676.0,676.0,GasA,2,1,SBrkr,831,614,0,1445,0.0,0.0,2,0,3,1,2,6,Typ,0,,Detchd,1957.0,Unf,2.0,484.0,3,3,N,0,59,0,0,0,0,,,,0,3,2010,WD,138500
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2046,1587,921126030,20,RL,79.0,11449,1,,IR1,HLS,AllPub,Inside,Gtl,Timber,Norm,Norm,1Fam,1Story,8,5,2007,2007,Gable,CompShg,VinylSd,VinylSd,,0.0,3,2,PConc,4,3,Av,GLQ,1011.0,Unf,0.0,873.0,1884.0,GasA,4,1,SBrkr,1728,0,0,1728,1.0,0.0,2,0,3,1,3,7,Typ,1,Gd,Attchd,2007.0,Fin,2.0,520.0,3,3,Y,0,276,0,0,0,0,,,,0,1,2008,WD,298751
2047,785,905377130,30,RL,,12342,1,,IR1,Lvl,AllPub,Inside,Gtl,Edwards,Norm,Norm,1Fam,1Story,4,5,1940,1950,Gable,CompShg,VinylSd,VinylSd,,0.0,2,2,CBlock,3,3,No,BLQ,262.0,Unf,0.0,599.0,861.0,GasA,4,1,SBrkr,861,0,0,861,0.0,0.0,1,0,1,1,2,4,Typ,0,,Detchd,1961.0,Unf,2.0,539.0,3,3,Y,158,0,0,0,0,0,,,,0,3,2009,WD,82500
2048,916,909253010,50,RL,57.0,7558,1,,Reg,Bnk,AllPub,Inside,Gtl,Crawfor,Norm,Norm,1Fam,1.5Fin,6,6,1928,1950,Gable,CompShg,BrkFace,Stone,,0.0,2,2,BrkTil,3,3,No,Unf,0.0,Unf,0.0,896.0,896.0,GasA,3,1,SBrkr,1172,741,0,1913,0.0,0.0,1,1,3,1,2,9,Typ,1,TA,Detchd,1929.0,Unf,2.0,342.0,2,2,Y,0,0,0,0,0,0,,,,0,3,2009,WD,177000
2049,639,535179160,20,RL,80.0,10400,1,,Reg,Lvl,AllPub,Corner,Gtl,NAmes,Norm,Norm,1Fam,1Story,4,5,1956,1956,Gable,CompShg,Plywood,Plywood,,0.0,2,2,CBlock,3,3,No,Rec,155.0,LwQ,750.0,295.0,1200.0,GasA,2,1,SBrkr,1200,0,0,1200,1.0,0.0,1,0,3,1,2,6,Typ,2,Gd,Attchd,1956.0,Unf,1.0,294.0,3,3,Y,0,189,140,0,0,0,,,,0,11,2009,WD,144000


## Feature Interactions

In [781]:
def feature_interactions(data): 
    data["garage_comb"] = data["garage_area"] * data["garage_cars"]
    data["deck_*_porch"] = data["wood_deck_sf"] * data["open_porch_sf"]
    data["exter_cond*exter_qual"] = data["exter_cond"] * data["exter_qual"]
    data["bsmt_qual*bsmt_cond"] = data["bsmt_qual"] * data["bsmt_cond"]
    data['garage_qual*garage_con'] = data['garage_qual'] * data['garage_cond']
    return data 

    

In [782]:
feature_interactions(train)

Unnamed: 0,id,pid,ms_subclass,ms_zoning,lot_frontage,lot_area,street,alley,lot_shape,land_contour,utilities,lot_config,land_slope,neighborhood,condition_1,condition_2,bldg_type,house_style,overall_qual,overall_cond,year_built,year_remod/add,roof_style,roof_matl,exterior_1st,exterior_2nd,mas_vnr_type,mas_vnr_area,exter_qual,exter_cond,foundation,bsmt_qual,bsmt_cond,bsmt_exposure,bsmtfin_type_1,bsmtfin_sf_1,bsmtfin_type_2,bsmtfin_sf_2,bsmt_unf_sf,total_bsmt_sf,heating,heating_qc,central_air,electrical,1st_flr_sf,2nd_flr_sf,low_qual_fin_sf,gr_liv_area,bsmt_full_bath,bsmt_half_bath,full_bath,half_bath,bedroom_abvgr,kitchen_abvgr,kitchen_qual,totrms_abvgrd,functional,fireplaces,fireplace_qu,garage_type,garage_yr_blt,garage_finish,garage_cars,garage_area,garage_qual,garage_cond,paved_drive,wood_deck_sf,open_porch_sf,enclosed_porch,3ssn_porch,screen_porch,pool_area,pool_qc,fence,misc_feature,misc_val,mo_sold,yr_sold,sale_type,saleprice,garage_comb,deck_*_porch,exter_cond*exter_qual,bsmt_qual*bsmt_cond,garage_qual*garage_con
0,109,533352170,60,RL,,13517,1,,IR1,Lvl,AllPub,CulDSac,Gtl,Sawyer,RRAe,Norm,1Fam,2Story,6,8,1976,2005,Gable,CompShg,HdBoard,Plywood,BrkFace,289.0,3,2,CBlock,3,3,No,GLQ,533.0,Unf,0.0,192.0,725.0,GasA,4,1,SBrkr,725,754,0,1479,0.0,0.0,2,1,3,1,3,6,Typ,0,,Attchd,1976.0,RFn,2.0,475.0,3,3,Y,0,44,0,0,0,0,,,,0,3,2010,WD,130500,950.0,0,6,9,9
1,544,531379050,60,RL,43.0,11492,1,,IR1,Lvl,AllPub,CulDSac,Gtl,SawyerW,Norm,Norm,1Fam,2Story,7,5,1996,1997,Gable,CompShg,VinylSd,VinylSd,BrkFace,132.0,3,2,PConc,4,3,No,GLQ,637.0,Unf,0.0,276.0,913.0,GasA,4,1,SBrkr,913,1209,0,2122,1.0,0.0,2,1,4,1,3,8,Typ,1,TA,Attchd,1997.0,RFn,2.0,559.0,3,3,Y,0,74,0,0,0,0,,,,0,4,2009,WD,220000,1118.0,0,6,12,9
2,153,535304180,20,RL,68.0,7922,1,,Reg,Lvl,AllPub,Inside,Gtl,NAmes,Norm,Norm,1Fam,1Story,5,7,1953,2007,Gable,CompShg,VinylSd,VinylSd,,0.0,2,3,CBlock,3,3,No,GLQ,731.0,Unf,0.0,326.0,1057.0,GasA,2,1,SBrkr,1057,0,0,1057,1.0,0.0,1,0,3,1,3,5,Typ,0,,Detchd,1953.0,Unf,1.0,246.0,3,3,Y,0,52,0,0,0,0,,,,0,1,2010,WD,109000,246.0,0,6,9,9
3,318,916386060,60,RL,73.0,9802,1,,Reg,Lvl,AllPub,Inside,Gtl,Timber,Norm,Norm,1Fam,2Story,5,5,2006,2007,Gable,CompShg,VinylSd,VinylSd,,0.0,2,2,PConc,4,3,No,Unf,0.0,Unf,0.0,384.0,384.0,GasA,3,1,SBrkr,744,700,0,1444,0.0,0.0,2,1,3,1,2,7,Typ,0,,BuiltIn,2007.0,Fin,2.0,400.0,3,3,Y,100,0,0,0,0,0,,,,0,4,2010,WD,174000,800.0,0,4,12,9
4,255,906425045,50,RL,82.0,14235,1,,IR1,Lvl,AllPub,Inside,Gtl,SawyerW,Norm,Norm,1Fam,1.5Fin,6,8,1900,1993,Gable,CompShg,Wd Sdng,Plywood,,0.0,2,2,PConc,2,4,No,Unf,0.0,Unf,0.0,676.0,676.0,GasA,2,1,SBrkr,831,614,0,1445,0.0,0.0,2,0,3,1,2,6,Typ,0,,Detchd,1957.0,Unf,2.0,484.0,3,3,N,0,59,0,0,0,0,,,,0,3,2010,WD,138500,968.0,0,4,8,9
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2046,1587,921126030,20,RL,79.0,11449,1,,IR1,HLS,AllPub,Inside,Gtl,Timber,Norm,Norm,1Fam,1Story,8,5,2007,2007,Gable,CompShg,VinylSd,VinylSd,,0.0,3,2,PConc,4,3,Av,GLQ,1011.0,Unf,0.0,873.0,1884.0,GasA,4,1,SBrkr,1728,0,0,1728,1.0,0.0,2,0,3,1,3,7,Typ,1,Gd,Attchd,2007.0,Fin,2.0,520.0,3,3,Y,0,276,0,0,0,0,,,,0,1,2008,WD,298751,1040.0,0,6,12,9
2047,785,905377130,30,RL,,12342,1,,IR1,Lvl,AllPub,Inside,Gtl,Edwards,Norm,Norm,1Fam,1Story,4,5,1940,1950,Gable,CompShg,VinylSd,VinylSd,,0.0,2,2,CBlock,3,3,No,BLQ,262.0,Unf,0.0,599.0,861.0,GasA,4,1,SBrkr,861,0,0,861,0.0,0.0,1,0,1,1,2,4,Typ,0,,Detchd,1961.0,Unf,2.0,539.0,3,3,Y,158,0,0,0,0,0,,,,0,3,2009,WD,82500,1078.0,0,4,9,9
2048,916,909253010,50,RL,57.0,7558,1,,Reg,Bnk,AllPub,Inside,Gtl,Crawfor,Norm,Norm,1Fam,1.5Fin,6,6,1928,1950,Gable,CompShg,BrkFace,Stone,,0.0,2,2,BrkTil,3,3,No,Unf,0.0,Unf,0.0,896.0,896.0,GasA,3,1,SBrkr,1172,741,0,1913,0.0,0.0,1,1,3,1,2,9,Typ,1,TA,Detchd,1929.0,Unf,2.0,342.0,2,2,Y,0,0,0,0,0,0,,,,0,3,2009,WD,177000,684.0,0,4,9,4
2049,639,535179160,20,RL,80.0,10400,1,,Reg,Lvl,AllPub,Corner,Gtl,NAmes,Norm,Norm,1Fam,1Story,4,5,1956,1956,Gable,CompShg,Plywood,Plywood,,0.0,2,2,CBlock,3,3,No,Rec,155.0,LwQ,750.0,295.0,1200.0,GasA,2,1,SBrkr,1200,0,0,1200,1.0,0.0,1,0,3,1,2,6,Typ,2,Gd,Attchd,1956.0,Unf,1.0,294.0,3,3,Y,0,189,140,0,0,0,,,,0,11,2009,WD,144000,294.0,0,4,9,9


## Dummy Column Maker 

In [783]:
def dummy_cols_maker(data):
    data = pd.get_dummies(data, columns = ['neighborhood'], drop_first=True)
    data = pd.get_dummies(data, columns = ['electrical'], drop_first = True)
    data = pd.get_dummies(data, columns = ['heating'], drop_first = True)
    data = pd.get_dummies(data, columns = ['functional'], drop_first = True)
    data = pd.get_dummies(data, columns = ['ms_zoning'], drop_first = True) 
    data = pd.get_dummies(data, columns = ['utilities'], drop_first = True)
    data = pd.get_dummies(data, columns = ['condition_1'], drop_first = True)
    data = pd.get_dummies(data, columns = ['foundation'], drop_first = True)

    
    
    return data

In [784]:
train = dummy_cols_maker(data = train) 

In [785]:
train.head()

Unnamed: 0,id,pid,ms_subclass,lot_frontage,lot_area,street,alley,lot_shape,land_contour,lot_config,land_slope,condition_2,bldg_type,house_style,overall_qual,overall_cond,year_built,year_remod/add,roof_style,roof_matl,exterior_1st,exterior_2nd,mas_vnr_type,mas_vnr_area,exter_qual,exter_cond,bsmt_qual,bsmt_cond,bsmt_exposure,bsmtfin_type_1,bsmtfin_sf_1,bsmtfin_type_2,bsmtfin_sf_2,bsmt_unf_sf,total_bsmt_sf,heating_qc,central_air,1st_flr_sf,2nd_flr_sf,low_qual_fin_sf,gr_liv_area,bsmt_full_bath,bsmt_half_bath,full_bath,half_bath,bedroom_abvgr,kitchen_abvgr,kitchen_qual,totrms_abvgrd,fireplaces,fireplace_qu,garage_type,garage_yr_blt,garage_finish,garage_cars,garage_area,garage_qual,garage_cond,paved_drive,wood_deck_sf,open_porch_sf,enclosed_porch,3ssn_porch,screen_porch,pool_area,pool_qc,fence,misc_feature,misc_val,mo_sold,yr_sold,sale_type,saleprice,garage_comb,deck_*_porch,exter_cond*exter_qual,bsmt_qual*bsmt_cond,garage_qual*garage_con,neighborhood_Blueste,neighborhood_BrDale,neighborhood_BrkSide,neighborhood_ClearCr,neighborhood_CollgCr,neighborhood_Crawfor,neighborhood_Edwards,neighborhood_Gilbert,neighborhood_Greens,neighborhood_GrnHill,neighborhood_IDOTRR,neighborhood_Landmrk,neighborhood_MeadowV,neighborhood_Mitchel,neighborhood_NAmes,neighborhood_NPkVill,neighborhood_NWAmes,neighborhood_NoRidge,neighborhood_NridgHt,neighborhood_OldTown,neighborhood_SWISU,neighborhood_Sawyer,neighborhood_SawyerW,neighborhood_Somerst,neighborhood_StoneBr,neighborhood_Timber,neighborhood_Veenker,electrical_FuseF,electrical_FuseP,electrical_Mix,electrical_SBrkr,heating_GasW,heating_Grav,heating_OthW,heating_Wall,functional_Maj2,functional_Min1,functional_Min2,functional_Mod,functional_Sal,functional_Sev,functional_Typ,ms_zoning_C (all),ms_zoning_FV,ms_zoning_I (all),ms_zoning_RH,ms_zoning_RL,ms_zoning_RM,utilities_NoSeWa,utilities_NoSewr,condition_1_Feedr,condition_1_Norm,condition_1_PosA,condition_1_PosN,condition_1_RRAe,condition_1_RRAn,condition_1_RRNe,condition_1_RRNn,foundation_CBlock,foundation_PConc,foundation_Slab,foundation_Stone,foundation_Wood
0,109,533352170,60,,13517,1,,IR1,Lvl,CulDSac,Gtl,Norm,1Fam,2Story,6,8,1976,2005,Gable,CompShg,HdBoard,Plywood,BrkFace,289.0,3,2,3,3,No,GLQ,533.0,Unf,0.0,192.0,725.0,4,1,725,754,0,1479,0.0,0.0,2,1,3,1,3,6,0,,Attchd,1976.0,RFn,2.0,475.0,3,3,Y,0,44,0,0,0,0,,,,0,3,2010,WD,130500,950.0,0,6,9,9,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0
1,544,531379050,60,43.0,11492,1,,IR1,Lvl,CulDSac,Gtl,Norm,1Fam,2Story,7,5,1996,1997,Gable,CompShg,VinylSd,VinylSd,BrkFace,132.0,3,2,4,3,No,GLQ,637.0,Unf,0.0,276.0,913.0,4,1,913,1209,0,2122,1.0,0.0,2,1,4,1,3,8,1,TA,Attchd,1997.0,RFn,2.0,559.0,3,3,Y,0,74,0,0,0,0,,,,0,4,2009,WD,220000,1118.0,0,6,12,9,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0
2,153,535304180,20,68.0,7922,1,,Reg,Lvl,Inside,Gtl,Norm,1Fam,1Story,5,7,1953,2007,Gable,CompShg,VinylSd,VinylSd,,0.0,2,3,3,3,No,GLQ,731.0,Unf,0.0,326.0,1057.0,2,1,1057,0,0,1057,1.0,0.0,1,0,3,1,3,5,0,,Detchd,1953.0,Unf,1.0,246.0,3,3,Y,0,52,0,0,0,0,,,,0,1,2010,WD,109000,246.0,0,6,9,9,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0
3,318,916386060,60,73.0,9802,1,,Reg,Lvl,Inside,Gtl,Norm,1Fam,2Story,5,5,2006,2007,Gable,CompShg,VinylSd,VinylSd,,0.0,2,2,4,3,No,Unf,0.0,Unf,0.0,384.0,384.0,3,1,744,700,0,1444,0.0,0.0,2,1,3,1,2,7,0,,BuiltIn,2007.0,Fin,2.0,400.0,3,3,Y,100,0,0,0,0,0,,,,0,4,2010,WD,174000,800.0,0,4,12,9,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0
4,255,906425045,50,82.0,14235,1,,IR1,Lvl,Inside,Gtl,Norm,1Fam,1.5Fin,6,8,1900,1993,Gable,CompShg,Wd Sdng,Plywood,,0.0,2,2,2,4,No,Unf,0.0,Unf,0.0,676.0,676.0,2,1,831,614,0,1445,0.0,0.0,2,0,3,1,2,6,0,,Detchd,1957.0,Unf,2.0,484.0,3,3,N,0,59,0,0,0,0,,,,0,3,2010,WD,138500,968.0,0,4,8,9,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0


## Feature Selection 

In [786]:
#plt.figure(figsize = (30,30))
#sns.heatmap(train.corr()[['saleprice']].sort_values(by = "saleprice" ,ascending = False), annot = True, cmap = 'coolwarm');

In [787]:
train.corr()[['saleprice']].sort_values(by = "saleprice" ,ascending = False)

Unnamed: 0,saleprice
saleprice,1.0
overall_qual,0.804727
gr_liv_area,0.719609
exter_qual,0.714571
garage_comb,0.697918
kitchen_qual,0.693909
total_bsmt_sf,0.664408
garage_area,0.660161
garage_cars,0.65231
1st_flr_sf,0.647168


In [788]:
train.columns[-63:]

Index(['neighborhood_Blueste', 'neighborhood_BrDale', 'neighborhood_BrkSide',
       'neighborhood_ClearCr', 'neighborhood_CollgCr', 'neighborhood_Crawfor',
       'neighborhood_Edwards', 'neighborhood_Gilbert', 'neighborhood_Greens',
       'neighborhood_GrnHill', 'neighborhood_IDOTRR', 'neighborhood_Landmrk',
       'neighborhood_MeadowV', 'neighborhood_Mitchel', 'neighborhood_NAmes',
       'neighborhood_NPkVill', 'neighborhood_NWAmes', 'neighborhood_NoRidge',
       'neighborhood_NridgHt', 'neighborhood_OldTown', 'neighborhood_SWISU',
       'neighborhood_Sawyer', 'neighborhood_SawyerW', 'neighborhood_Somerst',
       'neighborhood_StoneBr', 'neighborhood_Timber', 'neighborhood_Veenker',
       'electrical_FuseF', 'electrical_FuseP', 'electrical_Mix',
       'electrical_SBrkr', 'heating_GasW', 'heating_Grav', 'heating_OthW',
       'heating_Wall', 'functional_Maj2', 'functional_Min1', 'functional_Min2',
       'functional_Mod', 'functional_Sal', 'functional_Sev', 'functional_Typ',


In [789]:
features = ['overall_qual', 'exter_qual', 'gr_liv_area', 'garage_comb', 'kitchen_qual', 'total_bsmt_sf',
              'deck_*_porch', 'exter_cond*exter_qual', 'wood_deck_sf', 'fireplaces', 'full_bath',
            'bsmt_qual*bsmt_cond','mas_vnr_area', 'open_porch_sf', 'year_built', 'year_remod/add',
            '1st_flr_sf', 'totrms_abvgrd', 'heating_qc', 'neighborhood_NridgHt', 'garage_qual*garage_con',
            'central_air', 'neighborhood_OldTown', 'neighborhood_SWISU',
       'neighborhood_Sawyer', 'neighborhood_Blueste', 'neighborhood_BrDale', 'neighborhood_BrkSide',
       'neighborhood_ClearCr', 'neighborhood_CollgCr', 'neighborhood_Crawfor',
       'neighborhood_Edwards', 'neighborhood_Gilbert', 'neighborhood_Greens',
       'neighborhood_GrnHill', 'neighborhood_IDOTRR', 'neighborhood_Landmrk',
       'neighborhood_MeadowV', 'neighborhood_Mitchel', 'neighborhood_NAmes',
       'neighborhood_NPkVill', 'neighborhood_NWAmes', 'neighborhood_NoRidge',
       'neighborhood_NridgHt', 'neighborhood_OldTown', 'neighborhood_SWISU',
       'neighborhood_Sawyer', 'neighborhood_SawyerW', 'neighborhood_Somerst',
       'neighborhood_StoneBr', 'neighborhood_Timber', 'neighborhood_Veenker',
       'electrical_FuseF', 'electrical_FuseP', 'electrical_Mix',
       'electrical_SBrkr', 'heating_GasW', 'heating_Grav', 'heating_OthW',
       'heating_Wall', 'functional_Maj2', 'functional_Min1', 'functional_Min2',
       'functional_Mod', 'functional_Sal', 'functional_Sev', 'functional_Typ',
       'ms_zoning_C (all)', 'ms_zoning_FV', 'ms_zoning_I (all)',
       'ms_zoning_RH', 'ms_zoning_RL', 'ms_zoning_RM', 'utilities_NoSeWa',
       'utilities_NoSewr', 'condition_1_Feedr', 'condition_1_Norm',
       'condition_1_PosA', 'condition_1_PosN', 'condition_1_RRAe',
       'condition_1_RRAn', 'condition_1_RRNe', 'condition_1_RRNn',
       'foundation_CBlock', 'foundation_PConc', 'foundation_Slab',
       'foundation_Stone', 'foundation_Wood',]

In [790]:
X = train[features]
y = train['saleprice']

In [791]:
X_train, X_test, y_train, y_test = train_test_split(X,y, random_state = 42)

## Power Transformer 

In [792]:
pt = PowerTransformer()
pt.fit(X_train)
X_train_pt_transformed = pt.transform(X_train)
X_test_pt_transformed = pt.transform(X_test)
    
    
pt_y = PowerTransformer()
pt_y.fit(y_train.to_frame())
y_train_pt_transformed = pt_y.transform(y_train.to_frame())
y_test_pt_transformed = pt_y.transform(y_test.to_frame())
    
    

  x = um.multiply(x, x, out=x)
  loglike = -n_samples / 2 * np.log(x_trans.var())


## Model Fitting and Instantiation 

In [793]:
#def fitter_and_instantiator(): 
lr = LinearRegression()  

In [794]:
cross_val_score(lr, X_train, y_train_pt_transformed, cv = 5).mean()

0.8965207631208131

In [795]:
ridge = RidgeCV(alphas=np.logspace(-4, 4, 50))

In [796]:
lr_scores = cross_val_score(lr, X_train, y_train_pt_transformed, cv=3)
lr_scores.mean()

0.8934742276352554

In [797]:
ridge_scores = cross_val_score(ridge, X_train, y_train_pt_transformed, cv=3)
ridge_scores.mean()

0.8930060752512645

In [798]:
lasso = LassoCV()

In [799]:
lasso_scores =  cross_val_score(lasso, X_train, y_train_pt_transformed, cv=3)
lasso_scores.mean()

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


0.770216465076293

In [800]:
lr.fit(X_train, y_train_pt_transformed)

LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None,
         normalize=False)

In [801]:
lr.score(X_train,y_train_pt_transformed)

0.9137630166160758

In [802]:
lr.score(X_test,y_test_pt_transformed)

0.9048880701585281

In [809]:
y_train.mean()

181465.55816993464

In [803]:
ridge.fit(X_train, y_train_pt_transformed)

RidgeCV(alphas=array([1.00000e-04, 1.45635e-04, 2.12095e-04, 3.08884e-04, 4.49843e-04,
       6.55129e-04, 9.54095e-04, 1.38950e-03, 2.02359e-03, 2.94705e-03,
       4.29193e-03, 6.25055e-03, 9.10298e-03, 1.32571e-02, 1.93070e-02,
       2.81177e-02, 4.09492e-02, 5.96362e-02, 8.68511e-02, 1.26486e-01,
     ..., 1.04811e+03, 1.52642e+03,
       2.22300e+03, 3.23746e+03, 4.71487e+03, 6.86649e+03, 1.00000e+04]),
    cv=None, fit_intercept=True, gcv_mode=None, normalize=False,
    scoring=None, store_cv_values=False)

In [804]:
ridge.score(X_train, y_train_pt_transformed)

0.9134515217981971

In [805]:
ridge.score(X_test, y_test_pt_transformed)

0.9058886534636809

In [814]:
coef = pd.DataFrame(ridge.coef_.T, index=X_train.columns, columns=['coef'])

coef.sort_values(by='coef', ascending=False)

Unnamed: 0,coef
neighborhood_GrnHill,1.123824
functional_Typ,0.3901524
neighborhood_Crawfor,0.3704528
functional_Min1,0.3647671
condition_1_PosA,0.3358189
functional_Min2,0.3273898
foundation_Stone,0.3184257
neighborhood_ClearCr,0.2873102
neighborhood_StoneBr,0.242217
condition_1_PosN,0.1867183


In [633]:
pred = ridge.predict(X_test)

In [634]:
pred_reversed = pt_y.inverse_transform(pred.reshape(-1,1))

In [635]:
r2_score(y_test, pred_reversed)

0.9157661807475699

In [636]:
test= pd.read_csv('datasets/test.csv')

In [637]:
column_cleaner(test)

In [638]:
dataframe_dictionary_input(test)

Unnamed: 0,id,pid,ms_subclass,ms_zoning,lot_frontage,lot_area,street,alley,lot_shape,land_contour,utilities,lot_config,land_slope,neighborhood,condition_1,condition_2,bldg_type,house_style,overall_qual,overall_cond,year_built,year_remod/add,roof_style,roof_matl,exterior_1st,exterior_2nd,mas_vnr_type,mas_vnr_area,exter_qual,exter_cond,foundation,bsmt_qual,bsmt_cond,bsmt_exposure,bsmtfin_type_1,bsmtfin_sf_1,bsmtfin_type_2,bsmtfin_sf_2,bsmt_unf_sf,total_bsmt_sf,heating,heating_qc,central_air,electrical,1st_flr_sf,2nd_flr_sf,low_qual_fin_sf,gr_liv_area,bsmt_full_bath,bsmt_half_bath,full_bath,half_bath,bedroom_abvgr,kitchen_abvgr,kitchen_qual,totrms_abvgrd,functional,fireplaces,fireplace_qu,garage_type,garage_yr_blt,garage_finish,garage_cars,garage_area,garage_qual,garage_cond,paved_drive,wood_deck_sf,open_porch_sf,enclosed_porch,3ssn_porch,screen_porch,pool_area,pool_qc,fence,misc_feature,misc_val,mo_sold,yr_sold,sale_type
0,2658,902301120,190,RM,69.0,9142,1,Grvl,Reg,Lvl,AllPub,Inside,Gtl,OldTown,Norm,Norm,2fmCon,2Story,6,8,1910,1950,Gable,CompShg,AsbShng,AsbShng,,0.0,2,1,Stone,2.0,3,No,Unf,0,Unf,0,1020,1020,GasA,3,0,FuseP,908,1020,0,1928,0,0,2,0,4,2,1,9,Typ,0,,Detchd,1910.0,Unf,1,440,1,1.0,Y,0,60,112,0,0,0,,,,0,4,2006,WD
1,2718,905108090,90,RL,,9662,1,,IR1,Lvl,AllPub,Inside,Gtl,Sawyer,Norm,Norm,Duplex,1Story,5,4,1977,1977,Gable,CompShg,Plywood,Plywood,,0.0,2,2,CBlock,4.0,3,No,Unf,0,Unf,0,1967,1967,GasA,2,1,SBrkr,1967,0,0,1967,0,0,2,0,6,2,2,10,Typ,0,,Attchd,1977.0,Fin,2,580,3,3.0,Y,170,0,0,0,0,0,,,,0,8,2006,WD
2,2414,528218130,60,RL,58.0,17104,1,,IR1,Lvl,AllPub,Inside,Gtl,Gilbert,Norm,Norm,1Fam,2Story,7,5,2006,2006,Gable,CompShg,VinylSd,VinylSd,,0.0,3,2,PConc,4.0,4,Av,GLQ,554,Unf,0,100,654,GasA,4,1,SBrkr,664,832,0,1496,1,0,2,1,3,1,3,7,Typ,1,Gd,Attchd,2006.0,RFn,2,426,3,3.0,Y,100,24,0,0,0,0,,,,0,9,2006,New
3,1989,902207150,30,RM,60.0,8520,1,,Reg,Lvl,AllPub,Inside,Gtl,OldTown,Norm,Norm,1Fam,1Story,5,6,1923,2006,Gable,CompShg,Wd Sdng,Wd Sdng,,0.0,3,2,CBlock,3.0,3,No,Unf,0,Unf,0,968,968,GasA,2,1,SBrkr,968,0,0,968,0,0,1,0,2,1,2,5,Typ,0,,Detchd,1935.0,Unf,2,480,2,3.0,N,0,0,184,0,0,0,,,,0,7,2007,WD
4,625,535105100,20,RL,,9500,1,,IR1,Lvl,AllPub,Inside,Gtl,NAmes,Norm,Norm,1Fam,1Story,6,5,1963,1963,Gable,CompShg,Plywood,Plywood,BrkFace,247.0,2,2,CBlock,4.0,3,No,BLQ,609,Unf,0,785,1394,GasA,3,1,SBrkr,1394,0,0,1394,1,0,1,1,3,1,2,6,Typ,2,Gd,Attchd,1963.0,RFn,2,514,3,3.0,Y,0,76,0,0,185,0,,,,0,7,2009,WD
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
874,1662,527377110,60,RL,80.0,8000,1,,Reg,Lvl,AllPub,Inside,Gtl,NWAmes,PosN,Norm,1Fam,2Story,6,6,1974,1974,Gable,CompShg,HdBoard,HdBoard,,0.0,2,2,CBlock,3.0,3,No,ALQ,931,LwQ,153,0,1084,GasA,2,1,SBrkr,1084,793,0,1877,1,0,2,1,4,1,2,8,Typ,1,TA,Attchd,1974.0,Unf,2,488,3,3.0,Y,0,96,0,0,0,0,,,,0,11,2007,WD
875,1234,535126140,60,RL,90.0,14670,1,,Reg,Lvl,AllPub,Inside,Gtl,NAmes,Norm,Norm,1Fam,2Story,6,7,1966,1999,Gable,CompShg,VinylSd,VinylSd,BrkFace,410.0,3,3,CBlock,3.0,3,No,BLQ,575,Unf,0,529,1104,GasA,4,1,SBrkr,1104,884,0,1988,0,0,2,1,4,1,3,9,Typ,1,Gd,Attchd,1966.0,RFn,2,480,3,3.0,Y,0,230,0,0,0,0,,MnPrv,,0,8,2008,WD
876,1373,904100040,20,RL,55.0,8250,1,,Reg,Lvl,AllPub,Inside,Gtl,Sawyer,Feedr,Norm,1Fam,1Story,5,5,1968,1968,Hip,CompShg,HdBoard,HdBoard,,0.0,2,2,CBlock,3.0,3,No,BLQ,250,LwQ,492,210,952,GasA,4,1,SBrkr,1211,0,0,1211,0,0,1,0,3,1,2,5,Typ,1,TA,Attchd,1968.0,Unf,1,322,3,3.0,Y,0,63,0,0,0,0,,,,0,8,2008,WD
877,1672,527425140,20,RL,60.0,9000,1,,Reg,Lvl,AllPub,FR2,Gtl,NAmes,Norm,Norm,1Fam,1Story,4,6,1971,1971,Gable,CompShg,HdBoard,HdBoard,,0.0,2,2,PConc,3.0,3,No,ALQ,616,Unf,0,248,864,GasA,2,1,SBrkr,864,0,0,864,0,0,1,0,3,1,2,5,Typ,0,,Detchd,1974.0,Unf,2,528,3,3.0,Y,0,0,0,0,0,0,,GdWo,,0,5,2007,WD


In [639]:
fill_na(test)

Unnamed: 0,id,pid,ms_subclass,ms_zoning,lot_frontage,lot_area,street,alley,lot_shape,land_contour,utilities,lot_config,land_slope,neighborhood,condition_1,condition_2,bldg_type,house_style,overall_qual,overall_cond,year_built,year_remod/add,roof_style,roof_matl,exterior_1st,exterior_2nd,mas_vnr_type,mas_vnr_area,exter_qual,exter_cond,foundation,bsmt_qual,bsmt_cond,bsmt_exposure,bsmtfin_type_1,bsmtfin_sf_1,bsmtfin_type_2,bsmtfin_sf_2,bsmt_unf_sf,total_bsmt_sf,heating,heating_qc,central_air,electrical,1st_flr_sf,2nd_flr_sf,low_qual_fin_sf,gr_liv_area,bsmt_full_bath,bsmt_half_bath,full_bath,half_bath,bedroom_abvgr,kitchen_abvgr,kitchen_qual,totrms_abvgrd,functional,fireplaces,fireplace_qu,garage_type,garage_yr_blt,garage_finish,garage_cars,garage_area,garage_qual,garage_cond,paved_drive,wood_deck_sf,open_porch_sf,enclosed_porch,3ssn_porch,screen_porch,pool_area,pool_qc,fence,misc_feature,misc_val,mo_sold,yr_sold,sale_type
0,2658,902301120,190,RM,69.0,9142,1,Grvl,Reg,Lvl,AllPub,Inside,Gtl,OldTown,Norm,Norm,2fmCon,2Story,6,8,1910,1950,Gable,CompShg,AsbShng,AsbShng,,0.0,2,1,Stone,2.0,3,No,Unf,0,Unf,0,1020,1020,GasA,3,0,FuseP,908,1020,0,1928,0,0,2,0,4,2,1,9,Typ,0,,Detchd,1910.0,Unf,1,440,1,1.0,Y,0,60,112,0,0,0,,,,0,4,2006,WD
1,2718,905108090,90,RL,,9662,1,,IR1,Lvl,AllPub,Inside,Gtl,Sawyer,Norm,Norm,Duplex,1Story,5,4,1977,1977,Gable,CompShg,Plywood,Plywood,,0.0,2,2,CBlock,4.0,3,No,Unf,0,Unf,0,1967,1967,GasA,2,1,SBrkr,1967,0,0,1967,0,0,2,0,6,2,2,10,Typ,0,,Attchd,1977.0,Fin,2,580,3,3.0,Y,170,0,0,0,0,0,,,,0,8,2006,WD
2,2414,528218130,60,RL,58.0,17104,1,,IR1,Lvl,AllPub,Inside,Gtl,Gilbert,Norm,Norm,1Fam,2Story,7,5,2006,2006,Gable,CompShg,VinylSd,VinylSd,,0.0,3,2,PConc,4.0,4,Av,GLQ,554,Unf,0,100,654,GasA,4,1,SBrkr,664,832,0,1496,1,0,2,1,3,1,3,7,Typ,1,Gd,Attchd,2006.0,RFn,2,426,3,3.0,Y,100,24,0,0,0,0,,,,0,9,2006,New
3,1989,902207150,30,RM,60.0,8520,1,,Reg,Lvl,AllPub,Inside,Gtl,OldTown,Norm,Norm,1Fam,1Story,5,6,1923,2006,Gable,CompShg,Wd Sdng,Wd Sdng,,0.0,3,2,CBlock,3.0,3,No,Unf,0,Unf,0,968,968,GasA,2,1,SBrkr,968,0,0,968,0,0,1,0,2,1,2,5,Typ,0,,Detchd,1935.0,Unf,2,480,2,3.0,N,0,0,184,0,0,0,,,,0,7,2007,WD
4,625,535105100,20,RL,,9500,1,,IR1,Lvl,AllPub,Inside,Gtl,NAmes,Norm,Norm,1Fam,1Story,6,5,1963,1963,Gable,CompShg,Plywood,Plywood,BrkFace,247.0,2,2,CBlock,4.0,3,No,BLQ,609,Unf,0,785,1394,GasA,3,1,SBrkr,1394,0,0,1394,1,0,1,1,3,1,2,6,Typ,2,Gd,Attchd,1963.0,RFn,2,514,3,3.0,Y,0,76,0,0,185,0,,,,0,7,2009,WD
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
874,1662,527377110,60,RL,80.0,8000,1,,Reg,Lvl,AllPub,Inside,Gtl,NWAmes,PosN,Norm,1Fam,2Story,6,6,1974,1974,Gable,CompShg,HdBoard,HdBoard,,0.0,2,2,CBlock,3.0,3,No,ALQ,931,LwQ,153,0,1084,GasA,2,1,SBrkr,1084,793,0,1877,1,0,2,1,4,1,2,8,Typ,1,TA,Attchd,1974.0,Unf,2,488,3,3.0,Y,0,96,0,0,0,0,,,,0,11,2007,WD
875,1234,535126140,60,RL,90.0,14670,1,,Reg,Lvl,AllPub,Inside,Gtl,NAmes,Norm,Norm,1Fam,2Story,6,7,1966,1999,Gable,CompShg,VinylSd,VinylSd,BrkFace,410.0,3,3,CBlock,3.0,3,No,BLQ,575,Unf,0,529,1104,GasA,4,1,SBrkr,1104,884,0,1988,0,0,2,1,4,1,3,9,Typ,1,Gd,Attchd,1966.0,RFn,2,480,3,3.0,Y,0,230,0,0,0,0,,MnPrv,,0,8,2008,WD
876,1373,904100040,20,RL,55.0,8250,1,,Reg,Lvl,AllPub,Inside,Gtl,Sawyer,Feedr,Norm,1Fam,1Story,5,5,1968,1968,Hip,CompShg,HdBoard,HdBoard,,0.0,2,2,CBlock,3.0,3,No,BLQ,250,LwQ,492,210,952,GasA,4,1,SBrkr,1211,0,0,1211,0,0,1,0,3,1,2,5,Typ,1,TA,Attchd,1968.0,Unf,1,322,3,3.0,Y,0,63,0,0,0,0,,,,0,8,2008,WD
877,1672,527425140,20,RL,60.0,9000,1,,Reg,Lvl,AllPub,FR2,Gtl,NAmes,Norm,Norm,1Fam,1Story,4,6,1971,1971,Gable,CompShg,HdBoard,HdBoard,,0.0,2,2,PConc,3.0,3,No,ALQ,616,Unf,0,248,864,GasA,2,1,SBrkr,864,0,0,864,0,0,1,0,3,1,2,5,Typ,0,,Detchd,1974.0,Unf,2,528,3,3.0,Y,0,0,0,0,0,0,,GdWo,,0,5,2007,WD


In [640]:
feature_interactions(test)

Unnamed: 0,id,pid,ms_subclass,ms_zoning,lot_frontage,lot_area,street,alley,lot_shape,land_contour,utilities,lot_config,land_slope,neighborhood,condition_1,condition_2,bldg_type,house_style,overall_qual,overall_cond,year_built,year_remod/add,roof_style,roof_matl,exterior_1st,exterior_2nd,mas_vnr_type,mas_vnr_area,exter_qual,exter_cond,foundation,bsmt_qual,bsmt_cond,bsmt_exposure,bsmtfin_type_1,bsmtfin_sf_1,bsmtfin_type_2,bsmtfin_sf_2,bsmt_unf_sf,total_bsmt_sf,heating,heating_qc,central_air,electrical,1st_flr_sf,2nd_flr_sf,low_qual_fin_sf,gr_liv_area,bsmt_full_bath,bsmt_half_bath,full_bath,half_bath,bedroom_abvgr,kitchen_abvgr,kitchen_qual,totrms_abvgrd,functional,fireplaces,fireplace_qu,garage_type,garage_yr_blt,garage_finish,garage_cars,garage_area,garage_qual,garage_cond,paved_drive,wood_deck_sf,open_porch_sf,enclosed_porch,3ssn_porch,screen_porch,pool_area,pool_qc,fence,misc_feature,misc_val,mo_sold,yr_sold,sale_type,garage_comb,deck_*_porch,exter_cond*exter_qual,bsmt_qual*bsmt_cond,garage_qual*garage_con
0,2658,902301120,190,RM,69.0,9142,1,Grvl,Reg,Lvl,AllPub,Inside,Gtl,OldTown,Norm,Norm,2fmCon,2Story,6,8,1910,1950,Gable,CompShg,AsbShng,AsbShng,,0.0,2,1,Stone,2.0,3,No,Unf,0,Unf,0,1020,1020,GasA,3,0,FuseP,908,1020,0,1928,0,0,2,0,4,2,1,9,Typ,0,,Detchd,1910.0,Unf,1,440,1,1.0,Y,0,60,112,0,0,0,,,,0,4,2006,WD,440,0,2,6.0,1.0
1,2718,905108090,90,RL,,9662,1,,IR1,Lvl,AllPub,Inside,Gtl,Sawyer,Norm,Norm,Duplex,1Story,5,4,1977,1977,Gable,CompShg,Plywood,Plywood,,0.0,2,2,CBlock,4.0,3,No,Unf,0,Unf,0,1967,1967,GasA,2,1,SBrkr,1967,0,0,1967,0,0,2,0,6,2,2,10,Typ,0,,Attchd,1977.0,Fin,2,580,3,3.0,Y,170,0,0,0,0,0,,,,0,8,2006,WD,1160,0,4,12.0,9.0
2,2414,528218130,60,RL,58.0,17104,1,,IR1,Lvl,AllPub,Inside,Gtl,Gilbert,Norm,Norm,1Fam,2Story,7,5,2006,2006,Gable,CompShg,VinylSd,VinylSd,,0.0,3,2,PConc,4.0,4,Av,GLQ,554,Unf,0,100,654,GasA,4,1,SBrkr,664,832,0,1496,1,0,2,1,3,1,3,7,Typ,1,Gd,Attchd,2006.0,RFn,2,426,3,3.0,Y,100,24,0,0,0,0,,,,0,9,2006,New,852,2400,6,16.0,9.0
3,1989,902207150,30,RM,60.0,8520,1,,Reg,Lvl,AllPub,Inside,Gtl,OldTown,Norm,Norm,1Fam,1Story,5,6,1923,2006,Gable,CompShg,Wd Sdng,Wd Sdng,,0.0,3,2,CBlock,3.0,3,No,Unf,0,Unf,0,968,968,GasA,2,1,SBrkr,968,0,0,968,0,0,1,0,2,1,2,5,Typ,0,,Detchd,1935.0,Unf,2,480,2,3.0,N,0,0,184,0,0,0,,,,0,7,2007,WD,960,0,6,9.0,6.0
4,625,535105100,20,RL,,9500,1,,IR1,Lvl,AllPub,Inside,Gtl,NAmes,Norm,Norm,1Fam,1Story,6,5,1963,1963,Gable,CompShg,Plywood,Plywood,BrkFace,247.0,2,2,CBlock,4.0,3,No,BLQ,609,Unf,0,785,1394,GasA,3,1,SBrkr,1394,0,0,1394,1,0,1,1,3,1,2,6,Typ,2,Gd,Attchd,1963.0,RFn,2,514,3,3.0,Y,0,76,0,0,185,0,,,,0,7,2009,WD,1028,0,4,12.0,9.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
874,1662,527377110,60,RL,80.0,8000,1,,Reg,Lvl,AllPub,Inside,Gtl,NWAmes,PosN,Norm,1Fam,2Story,6,6,1974,1974,Gable,CompShg,HdBoard,HdBoard,,0.0,2,2,CBlock,3.0,3,No,ALQ,931,LwQ,153,0,1084,GasA,2,1,SBrkr,1084,793,0,1877,1,0,2,1,4,1,2,8,Typ,1,TA,Attchd,1974.0,Unf,2,488,3,3.0,Y,0,96,0,0,0,0,,,,0,11,2007,WD,976,0,4,9.0,9.0
875,1234,535126140,60,RL,90.0,14670,1,,Reg,Lvl,AllPub,Inside,Gtl,NAmes,Norm,Norm,1Fam,2Story,6,7,1966,1999,Gable,CompShg,VinylSd,VinylSd,BrkFace,410.0,3,3,CBlock,3.0,3,No,BLQ,575,Unf,0,529,1104,GasA,4,1,SBrkr,1104,884,0,1988,0,0,2,1,4,1,3,9,Typ,1,Gd,Attchd,1966.0,RFn,2,480,3,3.0,Y,0,230,0,0,0,0,,MnPrv,,0,8,2008,WD,960,0,9,9.0,9.0
876,1373,904100040,20,RL,55.0,8250,1,,Reg,Lvl,AllPub,Inside,Gtl,Sawyer,Feedr,Norm,1Fam,1Story,5,5,1968,1968,Hip,CompShg,HdBoard,HdBoard,,0.0,2,2,CBlock,3.0,3,No,BLQ,250,LwQ,492,210,952,GasA,4,1,SBrkr,1211,0,0,1211,0,0,1,0,3,1,2,5,Typ,1,TA,Attchd,1968.0,Unf,1,322,3,3.0,Y,0,63,0,0,0,0,,,,0,8,2008,WD,322,0,4,9.0,9.0
877,1672,527425140,20,RL,60.0,9000,1,,Reg,Lvl,AllPub,FR2,Gtl,NAmes,Norm,Norm,1Fam,1Story,4,6,1971,1971,Gable,CompShg,HdBoard,HdBoard,,0.0,2,2,PConc,3.0,3,No,ALQ,616,Unf,0,248,864,GasA,2,1,SBrkr,864,0,0,864,0,0,1,0,3,1,2,5,Typ,0,,Detchd,1974.0,Unf,2,528,3,3.0,Y,0,0,0,0,0,0,,GdWo,,0,5,2007,WD,1056,0,4,9.0,9.0


In [641]:
test.head()

Unnamed: 0,id,pid,ms_subclass,ms_zoning,lot_frontage,lot_area,street,alley,lot_shape,land_contour,utilities,lot_config,land_slope,neighborhood,condition_1,condition_2,bldg_type,house_style,overall_qual,overall_cond,year_built,year_remod/add,roof_style,roof_matl,exterior_1st,exterior_2nd,mas_vnr_type,mas_vnr_area,exter_qual,exter_cond,foundation,bsmt_qual,bsmt_cond,bsmt_exposure,bsmtfin_type_1,bsmtfin_sf_1,bsmtfin_type_2,bsmtfin_sf_2,bsmt_unf_sf,total_bsmt_sf,heating,heating_qc,central_air,electrical,1st_flr_sf,2nd_flr_sf,low_qual_fin_sf,gr_liv_area,bsmt_full_bath,bsmt_half_bath,full_bath,half_bath,bedroom_abvgr,kitchen_abvgr,kitchen_qual,totrms_abvgrd,functional,fireplaces,fireplace_qu,garage_type,garage_yr_blt,garage_finish,garage_cars,garage_area,garage_qual,garage_cond,paved_drive,wood_deck_sf,open_porch_sf,enclosed_porch,3ssn_porch,screen_porch,pool_area,pool_qc,fence,misc_feature,misc_val,mo_sold,yr_sold,sale_type,garage_comb,deck_*_porch,exter_cond*exter_qual,bsmt_qual*bsmt_cond,garage_qual*garage_con
0,2658,902301120,190,RM,69.0,9142,1,Grvl,Reg,Lvl,AllPub,Inside,Gtl,OldTown,Norm,Norm,2fmCon,2Story,6,8,1910,1950,Gable,CompShg,AsbShng,AsbShng,,0.0,2,1,Stone,2.0,3,No,Unf,0,Unf,0,1020,1020,GasA,3,0,FuseP,908,1020,0,1928,0,0,2,0,4,2,1,9,Typ,0,,Detchd,1910.0,Unf,1,440,1,1.0,Y,0,60,112,0,0,0,,,,0,4,2006,WD,440,0,2,6.0,1.0
1,2718,905108090,90,RL,,9662,1,,IR1,Lvl,AllPub,Inside,Gtl,Sawyer,Norm,Norm,Duplex,1Story,5,4,1977,1977,Gable,CompShg,Plywood,Plywood,,0.0,2,2,CBlock,4.0,3,No,Unf,0,Unf,0,1967,1967,GasA,2,1,SBrkr,1967,0,0,1967,0,0,2,0,6,2,2,10,Typ,0,,Attchd,1977.0,Fin,2,580,3,3.0,Y,170,0,0,0,0,0,,,,0,8,2006,WD,1160,0,4,12.0,9.0
2,2414,528218130,60,RL,58.0,17104,1,,IR1,Lvl,AllPub,Inside,Gtl,Gilbert,Norm,Norm,1Fam,2Story,7,5,2006,2006,Gable,CompShg,VinylSd,VinylSd,,0.0,3,2,PConc,4.0,4,Av,GLQ,554,Unf,0,100,654,GasA,4,1,SBrkr,664,832,0,1496,1,0,2,1,3,1,3,7,Typ,1,Gd,Attchd,2006.0,RFn,2,426,3,3.0,Y,100,24,0,0,0,0,,,,0,9,2006,New,852,2400,6,16.0,9.0
3,1989,902207150,30,RM,60.0,8520,1,,Reg,Lvl,AllPub,Inside,Gtl,OldTown,Norm,Norm,1Fam,1Story,5,6,1923,2006,Gable,CompShg,Wd Sdng,Wd Sdng,,0.0,3,2,CBlock,3.0,3,No,Unf,0,Unf,0,968,968,GasA,2,1,SBrkr,968,0,0,968,0,0,1,0,2,1,2,5,Typ,0,,Detchd,1935.0,Unf,2,480,2,3.0,N,0,0,184,0,0,0,,,,0,7,2007,WD,960,0,6,9.0,6.0
4,625,535105100,20,RL,,9500,1,,IR1,Lvl,AllPub,Inside,Gtl,NAmes,Norm,Norm,1Fam,1Story,6,5,1963,1963,Gable,CompShg,Plywood,Plywood,BrkFace,247.0,2,2,CBlock,4.0,3,No,BLQ,609,Unf,0,785,1394,GasA,3,1,SBrkr,1394,0,0,1394,1,0,1,1,3,1,2,6,Typ,2,Gd,Attchd,1963.0,RFn,2,514,3,3.0,Y,0,76,0,0,185,0,,,,0,7,2009,WD,1028,0,4,12.0,9.0


In [642]:

test = dummy_cols_maker(data = test) 

In [643]:
test.head()

Unnamed: 0,id,pid,ms_subclass,lot_frontage,lot_area,street,alley,lot_shape,land_contour,lot_config,land_slope,condition_2,bldg_type,house_style,overall_qual,overall_cond,year_built,year_remod/add,roof_style,roof_matl,exterior_1st,exterior_2nd,mas_vnr_type,mas_vnr_area,exter_qual,exter_cond,bsmt_qual,bsmt_cond,bsmt_exposure,bsmtfin_type_1,bsmtfin_sf_1,bsmtfin_type_2,bsmtfin_sf_2,bsmt_unf_sf,total_bsmt_sf,heating_qc,central_air,1st_flr_sf,2nd_flr_sf,low_qual_fin_sf,gr_liv_area,bsmt_full_bath,bsmt_half_bath,full_bath,half_bath,bedroom_abvgr,kitchen_abvgr,kitchen_qual,totrms_abvgrd,fireplaces,fireplace_qu,garage_type,garage_yr_blt,garage_finish,garage_cars,garage_area,garage_qual,garage_cond,paved_drive,wood_deck_sf,open_porch_sf,enclosed_porch,3ssn_porch,screen_porch,pool_area,pool_qc,fence,misc_feature,misc_val,mo_sold,yr_sold,sale_type,garage_comb,deck_*_porch,exter_cond*exter_qual,bsmt_qual*bsmt_cond,garage_qual*garage_con,neighborhood_Blueste,neighborhood_BrDale,neighborhood_BrkSide,neighborhood_ClearCr,neighborhood_CollgCr,neighborhood_Crawfor,neighborhood_Edwards,neighborhood_Gilbert,neighborhood_Greens,neighborhood_IDOTRR,neighborhood_MeadowV,neighborhood_Mitchel,neighborhood_NAmes,neighborhood_NPkVill,neighborhood_NWAmes,neighborhood_NoRidge,neighborhood_NridgHt,neighborhood_OldTown,neighborhood_SWISU,neighborhood_Sawyer,neighborhood_SawyerW,neighborhood_Somerst,neighborhood_StoneBr,neighborhood_Timber,neighborhood_Veenker,electrical_FuseF,electrical_FuseP,electrical_SBrkr,heating_GasA,heating_GasW,heating_Grav,functional_Maj2,functional_Min1,functional_Min2,functional_Mod,functional_Typ,ms_zoning_FV,ms_zoning_I (all),ms_zoning_RH,ms_zoning_RL,ms_zoning_RM,utilities_NoSewr,condition_1_Feedr,condition_1_Norm,condition_1_PosA,condition_1_PosN,condition_1_RRAe,condition_1_RRAn,condition_1_RRNe,condition_1_RRNn,foundation_CBlock,foundation_PConc,foundation_Slab,foundation_Stone,foundation_Wood
0,2658,902301120,190,69.0,9142,1,Grvl,Reg,Lvl,Inside,Gtl,Norm,2fmCon,2Story,6,8,1910,1950,Gable,CompShg,AsbShng,AsbShng,,0.0,2,1,2.0,3,No,Unf,0,Unf,0,1020,1020,3,0,908,1020,0,1928,0,0,2,0,4,2,1,9,0,,Detchd,1910.0,Unf,1,440,1,1.0,Y,0,60,112,0,0,0,,,,0,4,2006,WD,440,0,2,6.0,1.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,1,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,1,0
1,2718,905108090,90,,9662,1,,IR1,Lvl,Inside,Gtl,Norm,Duplex,1Story,5,4,1977,1977,Gable,CompShg,Plywood,Plywood,,0.0,2,2,4.0,3,No,Unf,0,Unf,0,1967,1967,2,1,1967,0,0,1967,0,0,2,0,6,2,2,10,0,,Attchd,1977.0,Fin,2,580,3,3.0,Y,170,0,0,0,0,0,,,,0,8,2006,WD,1160,0,4,12.0,9.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0
2,2414,528218130,60,58.0,17104,1,,IR1,Lvl,Inside,Gtl,Norm,1Fam,2Story,7,5,2006,2006,Gable,CompShg,VinylSd,VinylSd,,0.0,3,2,4.0,4,Av,GLQ,554,Unf,0,100,654,4,1,664,832,0,1496,1,0,2,1,3,1,3,7,1,Gd,Attchd,2006.0,RFn,2,426,3,3.0,Y,100,24,0,0,0,0,,,,0,9,2006,New,852,2400,6,16.0,9.0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0
3,1989,902207150,30,60.0,8520,1,,Reg,Lvl,Inside,Gtl,Norm,1Fam,1Story,5,6,1923,2006,Gable,CompShg,Wd Sdng,Wd Sdng,,0.0,3,2,3.0,3,No,Unf,0,Unf,0,968,968,2,1,968,0,0,968,0,0,1,0,2,1,2,5,0,,Detchd,1935.0,Unf,2,480,2,3.0,N,0,0,184,0,0,0,,,,0,7,2007,WD,960,0,6,9.0,6.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,1,0,0,0,0,1,0,0,1,0,0,0,0,0,0,1,0,0,0,0
4,625,535105100,20,,9500,1,,IR1,Lvl,Inside,Gtl,Norm,1Fam,1Story,6,5,1963,1963,Gable,CompShg,Plywood,Plywood,BrkFace,247.0,2,2,4.0,3,No,BLQ,609,Unf,0,785,1394,3,1,1394,0,0,1394,1,0,1,1,3,1,2,6,2,Gd,Attchd,1963.0,RFn,2,514,3,3.0,Y,0,76,0,0,185,0,,,,0,7,2009,WD,1028,0,4,12.0,9.0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0


In [644]:
test['neighborhood_GrnHill'] = 0 
test['electrical_Mix'] = 0 
test['neighborhood_Landmrk'] = 0
test['heating_Wall'] = 0 
test['heating_OthW'] = 0 
test['functional_Sal'] = 0
test['functional_Sev'] =0 
test['utilities_NoSeWa'] =0 
test['ms_zoning_C (all)'] = 0 

In [645]:
test[features]

Unnamed: 0,overall_qual,exter_qual,gr_liv_area,garage_comb,kitchen_qual,total_bsmt_sf,deck_*_porch,exter_cond*exter_qual,wood_deck_sf,fireplaces,full_bath,bsmt_qual*bsmt_cond,mas_vnr_area,open_porch_sf,year_built,year_remod/add,1st_flr_sf,totrms_abvgrd,heating_qc,neighborhood_NridgHt,garage_qual*garage_con,central_air,neighborhood_OldTown,neighborhood_SWISU,neighborhood_Sawyer,neighborhood_Blueste,neighborhood_BrDale,neighborhood_BrkSide,neighborhood_ClearCr,neighborhood_CollgCr,neighborhood_Crawfor,neighborhood_Edwards,neighborhood_Gilbert,neighborhood_Greens,neighborhood_GrnHill,neighborhood_IDOTRR,neighborhood_Landmrk,neighborhood_MeadowV,neighborhood_Mitchel,neighborhood_NAmes,neighborhood_NPkVill,neighborhood_NWAmes,neighborhood_NoRidge,neighborhood_NridgHt.1,neighborhood_OldTown.1,neighborhood_SWISU.1,neighborhood_Sawyer.1,neighborhood_SawyerW,neighborhood_Somerst,neighborhood_StoneBr,neighborhood_Timber,neighborhood_Veenker,electrical_FuseF,electrical_FuseP,electrical_Mix,electrical_SBrkr,heating_GasW,heating_Grav,heating_OthW,heating_Wall,functional_Maj2,functional_Min1,functional_Min2,functional_Mod,functional_Sal,functional_Sev,functional_Typ,ms_zoning_C (all),ms_zoning_FV,ms_zoning_I (all),ms_zoning_RH,ms_zoning_RL,ms_zoning_RM,utilities_NoSeWa,utilities_NoSewr,condition_1_Feedr,condition_1_Norm,condition_1_PosA,condition_1_PosN,condition_1_RRAe,condition_1_RRAn,condition_1_RRNe,condition_1_RRNn,foundation_CBlock,foundation_PConc,foundation_Slab,foundation_Stone,foundation_Wood
0,6,2,1928,440,1,1020,0,2,0,0,2,6.0,0.0,60,1910,1950,908,9,3,0,1.0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0
1,5,2,1967,1160,2,1967,0,4,170,0,2,12.0,0.0,0,1977,1977,1967,10,2,0,9.0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0
2,7,3,1496,852,3,654,2400,6,100,1,2,16.0,0.0,24,2006,2006,664,7,4,0,9.0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0
3,5,3,968,960,2,968,0,6,0,0,1,9.0,0.0,0,1923,2006,968,5,2,0,6.0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0
4,6,2,1394,1028,2,1394,0,4,0,2,1,12.0,247.0,76,1963,1963,1394,6,3,0,9.0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
874,6,2,1877,976,2,1084,0,4,0,1,2,9.0,0.0,96,1974,1974,1084,8,2,0,9.0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0
875,6,3,1988,960,3,1104,0,9,0,1,2,9.0,410.0,230,1966,1999,1104,9,4,0,9.0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0
876,5,2,1211,322,2,952,0,4,0,1,1,9.0,0.0,63,1968,1968,1211,5,4,0,9.0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0
877,4,2,864,1056,2,864,0,4,0,0,1,9.0,0.0,0,1971,1971,864,5,2,0,9.0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0


In [769]:
#def prediction_maker(data): 
test_preds = model.predict(test[features])
pred_reversed = pt_y.inverse_transform(test_preds.reshape(-1,1))
test['preds_1'] = pred_reversed

submission_13 = test.loc[:, ['id', 'preds_1']]
submission_13.rename(columns = {'id': 'Id', 'preds_1': 'SalePrice'}, inplace=True)
    

In [770]:
submission_13.head()
   

Unnamed: 0,Id,SalePrice
0,2658,119537.86128
1,2718,157688.651308
2,2414,186619.118075
3,1989,124891.648829
4,625,183645.615315


In [765]:
submission_13.to_csv('./Submissions/submissions_13', index=False )