# Exploring feature combination and polynomial features

In [130]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score
from sklearn.preprocessing import StandardScaler, PolynomialFeatures

from IPython.display import display, HTML

%matplotlib inline

In [131]:
train = pd.read_csv('datasets/clean_train_encoded.csv', keep_default_na=False)
test = pd.read_csv('datasets/clean_test_encoded.csv', keep_default_na=False)

_Garage Features_

In [132]:
train['New_Gar_Feat'] = train['Garage Area'] * (train['Garage Qual'] + train['Garage Cond']) 

In [133]:
test['New_Gar_Feat'] = test['Garage Area'] * (test['Garage Qual'] + test['Garage Cond']) 

_Bsmt Features_

In [134]:
train['New_Bsmt_Feat'] = train['Total Bsmt SF'] * (train['Bsmt Cond'] + train['Bsmt Qual'])

In [135]:
test['New_Bsmt_Feat'] = test['Total Bsmt SF'] * (test['Bsmt Cond'] + test['Bsmt Qual'])

_Overall + Kitchen + Liv Area Feat_

In [136]:
train['New_Ovr_Kit_Feat'] = train['Gr Liv Area'] * (train['Overall Qual'] + train['Overall Cond'] + train['Kitchen Qual'])

In [137]:
test['New_Ovr_Kit_Feat'] = test['Gr Liv Area'] * (test['Overall Qual'] + test['Overall Cond'] + test['Kitchen Qual'])

In [138]:
train.head()

Unnamed: 0,Id,PID,MS SubClass,Lot Frontage,Lot Area,Lot Shape,Utilities,Land Slope,Overall Qual,Overall Cond,Year Built,Year Remod/Add,Mas Vnr Area,Exter Qual,Exter Cond,Bsmt Qual,Bsmt Cond,Bsmt Exposure,BsmtFin Type 1,BsmtFin SF 1,BsmtFin Type 2,BsmtFin SF 2,Bsmt Unf SF,Total Bsmt SF,Heating QC,Electrical,1st Flr SF,2nd Flr SF,Low Qual Fin SF,Gr Liv Area,Bsmt Full Bath,Bsmt Half Bath,Full Bath,Half Bath,Bedroom AbvGr,Kitchen AbvGr,Kitchen Qual,TotRms AbvGrd,Functional,Fireplaces,Fireplace Qu,Garage Yr Blt,Garage Finish,Garage Cars,Garage Area,Garage Qual,Garage Cond,Paved Drive,Wood Deck SF,Open Porch SF,Enclosed Porch,3Ssn Porch,Screen Porch,Pool Area,Misc Val,Mo Sold,Yr Sold,zoning_C,zoning_FV,zoning_I,zoning_RH,zoning_RL,zoning_RM,street_Pave,land_countour_HLS,land_countour_Low,land_countour_Lvl,lot_conf_CulDSac,lot_conf_FR2,lot_conf_FR3,lot_conf_Inside,neigh_Blueste,neigh_BrDale,neigh_BrkSide,neigh_ClearCr,neigh_CollgCr,neigh_Crawfor,neigh_Edwards,neigh_Gilbert,neigh_Greens,neigh_GrnHill,neigh_IDOTRR,neigh_Landmrk,neigh_MeadowV,neigh_Mitchel,neigh_NAmes,neigh_NPkVill,neigh_NWAmes,neigh_NoRidge,neigh_NridgHt,neigh_OldTown,neigh_SWISU,neigh_Sawyer,neigh_SawyerW,neigh_Somerst,neigh_StoneBr,neigh_Timber,neigh_Veenker,cond1_Feedr,cond1_Norm,cond1_PosA,cond1_PosN,cond1_RRAe,cond1_RRAn,cond1_RRNe,cond1_RRNn,cond2_Feedr,cond2_Norm,cond2_PosA,cond2_PosN,cond2_RRAe,cond2_RRAn,cond2_RRNn,bldg_type_2fmCon,bldg_type_Duplex,bldg_type_Twnhs,bldg_type_TwnhsE,hse_style_1.5Unf,hse_style_1Story,hse_style_2.5Fin,hse_style_2.5Unf,hse_style_2Story,hse_style_SFoyer,hse_style_SLvl,rf_style_Gable,rf_style_Gambrel,rf_style_Hip,rf_style_Mansard,rf_style_Shed,rf_mat_Membran,rf_mat_Metal,rf_mat_Roll,rf_mat_Tar&Grv,rf_mat_WdShake,rf_mat_WdShngl,ext1st_AsphShn,ext1st_BrkComm,ext1st_BrkFace,ext1st_CBlock,ext1st_CemntBd,ext1st_HdBoard,ext1st_ImStucc,ext1st_MetalSd,ext1st_Plywood,ext1st_PreCast,ext1st_Stone,ext1st_Stucco,ext1st_VinylSd,ext1st_Wd Sdng,ext1st_WdShing,ext2nd_AsphShn,ext2nd_Brk Cmn,ext2nd_BrkFace,ext2nd_CBlock,ext2nd_CmentBd,ext2nd_HdBoard,ext2nd_ImStucc,ext2nd_MetalSd,ext2nd_Other,ext2nd_Plywood,ext2nd_PreCast,ext2nd_Stone,ext2nd_Stucco,ext2nd_VinylSd,ext2nd_Wd Sdng,ext2nd_Wd Shng,mas_vnr_type_BrkFace,mas_vnr_type_CBlock,mas_vnr_type_NA,mas_vnr_type_None,mas_vnr_type_Stone,found_CBlock,found_PConc,found_Slab,found_Stone,found_Wood,heat_GasA,heat_GasW,heat_Grav,heat_OthW,heat_Wall,ctrl_air_Y,gar_type_Attchd,gar_type_Basment,gar_type_BuiltIn,gar_type_CarPort,gar_type_Detchd,gar_type_NA,sale_type_CWD,sale_type_Con,sale_type_ConLD,sale_type_ConLI,sale_type_ConLw,sale_type_New,sale_type_Oth,sale_type_VWD,sale_type_WD,SalePrice,New_Gar_Feat,New_Bsmt_Feat,New_Ovr_Kit_Feat
0,109,533352170,60,0.0,13517,2,3,2,6,8,1976,2005,289.0,4,3,3,3,0,6,533.0,1,0.0,192.0,725.0,5,4,725,754,0,1479,0.0,0.0,2,1,3,1,4,6,7,0,0,1976.0,2,2.0,475.0,3,3,2,0,44,0,0,0,0,0,3,2010,0,0,0,0,1,0,1,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,1,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,130500,2850.0,4350.0,26622
1,544,531379050,60,43.0,11492,2,3,2,7,5,1996,1997,132.0,4,3,4,3,0,6,637.0,1,0.0,276.0,913.0,5,4,913,1209,0,2122,1.0,0.0,2,1,4,1,4,8,7,1,3,1997.0,2,2.0,559.0,3,3,2,0,74,0,0,0,0,0,4,2009,0,0,0,0,1,0,1,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,1,0,0,0,1,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,220000,3354.0,6391.0,33952
2,153,535304180,20,68.0,7922,3,3,2,5,7,1953,2007,0.0,3,4,3,3,0,6,731.0,1,0.0,326.0,1057.0,3,4,1057,0,0,1057,1.0,0.0,1,0,3,1,4,5,7,0,0,1953.0,1,1.0,246.0,3,3,2,0,52,0,0,0,0,0,1,2010,0,0,0,0,1,0,1,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,1,0,0,0,0,1,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,109000,1476.0,6342.0,16912
3,318,916386060,60,73.0,9802,3,3,2,5,5,2006,2007,0.0,3,3,4,3,0,1,0.0,1,0.0,384.0,384.0,4,4,744,700,0,1444,0.0,0.0,2,1,3,1,3,7,7,0,0,2007.0,3,2.0,400.0,3,3,2,100,0,0,0,0,0,0,4,2010,0,0,0,0,1,0,1,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,1,0,0,0,1,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,174000,2400.0,2688.0,18772
4,255,906425045,50,82.0,14235,2,3,2,6,8,1900,1993,0.0,3,3,2,4,0,1,0.0,1,0.0,676.0,676.0,3,4,831,614,0,1445,0.0,0.0,2,0,3,1,3,6,7,0,0,1957.0,1,2.0,484.0,3,3,0,0,59,0,0,0,0,0,3,2010,0,0,0,0,1,0,1,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,1,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,138500,2904.0,4056.0,24565


In [139]:
test.head()

Unnamed: 0,Id,PID,MS SubClass,Lot Frontage,Lot Area,Lot Shape,Utilities,Land Slope,Overall Qual,Overall Cond,Year Built,Year Remod/Add,Mas Vnr Area,Exter Qual,Exter Cond,Bsmt Qual,Bsmt Cond,Bsmt Exposure,BsmtFin Type 1,BsmtFin SF 1,BsmtFin Type 2,BsmtFin SF 2,Bsmt Unf SF,Total Bsmt SF,Heating QC,Electrical,1st Flr SF,2nd Flr SF,Low Qual Fin SF,Gr Liv Area,Bsmt Full Bath,Bsmt Half Bath,Full Bath,Half Bath,Bedroom AbvGr,Kitchen AbvGr,Kitchen Qual,TotRms AbvGrd,Functional,Fireplaces,Fireplace Qu,Garage Yr Blt,Garage Finish,Garage Cars,Garage Area,Garage Qual,Garage Cond,Paved Drive,Wood Deck SF,Open Porch SF,Enclosed Porch,3Ssn Porch,Screen Porch,Pool Area,Misc Val,Mo Sold,Yr Sold,zoning_C,zoning_FV,zoning_I,zoning_RH,zoning_RL,zoning_RM,street_Pave,land_countour_HLS,land_countour_Low,land_countour_Lvl,lot_conf_CulDSac,lot_conf_FR2,lot_conf_FR3,lot_conf_Inside,neigh_Blueste,neigh_BrDale,neigh_BrkSide,neigh_ClearCr,neigh_CollgCr,neigh_Crawfor,neigh_Edwards,neigh_Gilbert,neigh_Greens,neigh_GrnHill,neigh_IDOTRR,neigh_Landmrk,neigh_MeadowV,neigh_Mitchel,neigh_NAmes,neigh_NPkVill,neigh_NWAmes,neigh_NoRidge,neigh_NridgHt,neigh_OldTown,neigh_SWISU,neigh_Sawyer,neigh_SawyerW,neigh_Somerst,neigh_StoneBr,neigh_Timber,neigh_Veenker,cond1_Feedr,cond1_Norm,cond1_PosA,cond1_PosN,cond1_RRAe,cond1_RRAn,cond1_RRNe,cond1_RRNn,cond2_Feedr,cond2_Norm,cond2_PosA,cond2_PosN,cond2_RRAe,cond2_RRAn,cond2_RRNn,bldg_type_2fmCon,bldg_type_Duplex,bldg_type_Twnhs,bldg_type_TwnhsE,hse_style_1.5Unf,hse_style_1Story,hse_style_2.5Fin,hse_style_2.5Unf,hse_style_2Story,hse_style_SFoyer,hse_style_SLvl,rf_style_Gable,rf_style_Gambrel,rf_style_Hip,rf_style_Mansard,rf_style_Shed,rf_mat_Membran,rf_mat_Metal,rf_mat_Roll,rf_mat_Tar&Grv,rf_mat_WdShake,rf_mat_WdShngl,ext1st_AsphShn,ext1st_BrkComm,ext1st_BrkFace,ext1st_CBlock,ext1st_CemntBd,ext1st_HdBoard,ext1st_ImStucc,ext1st_MetalSd,ext1st_Plywood,ext1st_PreCast,ext1st_Stone,ext1st_Stucco,ext1st_VinylSd,ext1st_Wd Sdng,ext1st_WdShing,ext2nd_AsphShn,ext2nd_Brk Cmn,ext2nd_BrkFace,ext2nd_CBlock,ext2nd_CmentBd,ext2nd_HdBoard,ext2nd_ImStucc,ext2nd_MetalSd,ext2nd_Other,ext2nd_Plywood,ext2nd_PreCast,ext2nd_Stone,ext2nd_Stucco,ext2nd_VinylSd,ext2nd_Wd Sdng,ext2nd_Wd Shng,mas_vnr_type_BrkFace,mas_vnr_type_CBlock,mas_vnr_type_NA,mas_vnr_type_None,mas_vnr_type_Stone,found_CBlock,found_PConc,found_Slab,found_Stone,found_Wood,heat_GasA,heat_GasW,heat_Grav,heat_OthW,heat_Wall,ctrl_air_Y,gar_type_Attchd,gar_type_Basment,gar_type_BuiltIn,gar_type_CarPort,gar_type_Detchd,gar_type_NA,sale_type_CWD,sale_type_Con,sale_type_ConLD,sale_type_ConLI,sale_type_ConLw,sale_type_New,sale_type_Oth,sale_type_VWD,sale_type_WD,New_Gar_Feat,New_Bsmt_Feat,New_Ovr_Kit_Feat
0,2658,902301120,190,69.0,9142,3,3,2,6,8,1910,1950,0.0,3,2,2,3,0,1,0.0,1,0.0,1020.0,1020.0,4,1,908,1020,0,1928,0.0,0.0,2,0,4,2,2,9,7,0,0,1910.0,1,1.0,440.0,1,1,2,0,60,112,0,0,0,0,4,2006,0,0,0,0,0,1,1,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,880.0,5100.0,30848
1,2718,905108090,90,0.0,9662,2,3,2,5,4,1977,1977,0.0,3,3,4,3,0,1,0.0,1,0.0,1967.0,1967.0,3,4,1967,0,0,1967,0.0,0.0,2,0,6,2,3,10,7,0,0,1977.0,3,2.0,580.0,3,3,2,170,0,0,0,0,0,0,8,2006,0,0,0,0,1,0,1,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,1,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,3480.0,13769.0,23604
2,2414,528218130,60,58.0,17104,2,3,2,7,5,2006,2006,0.0,4,3,4,4,2,6,554.0,1,0.0,100.0,654.0,5,4,664,832,0,1496,1.0,0.0,2,1,3,1,4,7,7,1,4,2006.0,2,2.0,426.0,3,3,2,100,24,0,0,0,0,0,9,2006,0,0,0,0,1,0,1,0,0,1,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,1,0,0,0,1,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,2556.0,5232.0,23936
3,1989,902207150,30,60.0,8520,3,3,2,5,6,1923,2006,0.0,4,3,3,3,0,1,0.0,1,0.0,968.0,968.0,3,4,968,0,0,968,0.0,0.0,1,0,2,1,3,5,7,0,0,1935.0,1,2.0,480.0,2,3,0,0,0,184,0,0,0,0,7,2007,0,0,0,0,0,1,1,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,1,0,0,0,0,1,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,2400.0,5808.0,13552
4,625,535105100,20,0.0,9500,2,3,2,6,5,1963,1963,247.0,3,3,4,3,0,4,609.0,1,0.0,785.0,1394.0,4,4,1394,0,0,1394,1.0,0.0,1,1,3,1,3,6,7,2,4,1963.0,2,2.0,514.0,3,3,2,0,76,0,0,185,0,0,7,2009,0,0,0,0,1,0,1,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,1,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,3084.0,9758.0,19516


In [140]:
train_sale_price = train[['SalePrice']]
train_sale_price.head()

Unnamed: 0,SalePrice
0,130500
1,220000
2,109000
3,174000
4,138500


In [141]:
# test_sale_price = test[['SalePrice']]
# test_sale_price.head()

In [142]:
#drop train set sale price
train.drop(columns='SalePrice', inplace=True)

In [143]:
#add it again
train['SalePrice'] = train_sale_price

In [144]:
train.head()

Unnamed: 0,Id,PID,MS SubClass,Lot Frontage,Lot Area,Lot Shape,Utilities,Land Slope,Overall Qual,Overall Cond,Year Built,Year Remod/Add,Mas Vnr Area,Exter Qual,Exter Cond,Bsmt Qual,Bsmt Cond,Bsmt Exposure,BsmtFin Type 1,BsmtFin SF 1,BsmtFin Type 2,BsmtFin SF 2,Bsmt Unf SF,Total Bsmt SF,Heating QC,Electrical,1st Flr SF,2nd Flr SF,Low Qual Fin SF,Gr Liv Area,Bsmt Full Bath,Bsmt Half Bath,Full Bath,Half Bath,Bedroom AbvGr,Kitchen AbvGr,Kitchen Qual,TotRms AbvGrd,Functional,Fireplaces,Fireplace Qu,Garage Yr Blt,Garage Finish,Garage Cars,Garage Area,Garage Qual,Garage Cond,Paved Drive,Wood Deck SF,Open Porch SF,Enclosed Porch,3Ssn Porch,Screen Porch,Pool Area,Misc Val,Mo Sold,Yr Sold,zoning_C,zoning_FV,zoning_I,zoning_RH,zoning_RL,zoning_RM,street_Pave,land_countour_HLS,land_countour_Low,land_countour_Lvl,lot_conf_CulDSac,lot_conf_FR2,lot_conf_FR3,lot_conf_Inside,neigh_Blueste,neigh_BrDale,neigh_BrkSide,neigh_ClearCr,neigh_CollgCr,neigh_Crawfor,neigh_Edwards,neigh_Gilbert,neigh_Greens,neigh_GrnHill,neigh_IDOTRR,neigh_Landmrk,neigh_MeadowV,neigh_Mitchel,neigh_NAmes,neigh_NPkVill,neigh_NWAmes,neigh_NoRidge,neigh_NridgHt,neigh_OldTown,neigh_SWISU,neigh_Sawyer,neigh_SawyerW,neigh_Somerst,neigh_StoneBr,neigh_Timber,neigh_Veenker,cond1_Feedr,cond1_Norm,cond1_PosA,cond1_PosN,cond1_RRAe,cond1_RRAn,cond1_RRNe,cond1_RRNn,cond2_Feedr,cond2_Norm,cond2_PosA,cond2_PosN,cond2_RRAe,cond2_RRAn,cond2_RRNn,bldg_type_2fmCon,bldg_type_Duplex,bldg_type_Twnhs,bldg_type_TwnhsE,hse_style_1.5Unf,hse_style_1Story,hse_style_2.5Fin,hse_style_2.5Unf,hse_style_2Story,hse_style_SFoyer,hse_style_SLvl,rf_style_Gable,rf_style_Gambrel,rf_style_Hip,rf_style_Mansard,rf_style_Shed,rf_mat_Membran,rf_mat_Metal,rf_mat_Roll,rf_mat_Tar&Grv,rf_mat_WdShake,rf_mat_WdShngl,ext1st_AsphShn,ext1st_BrkComm,ext1st_BrkFace,ext1st_CBlock,ext1st_CemntBd,ext1st_HdBoard,ext1st_ImStucc,ext1st_MetalSd,ext1st_Plywood,ext1st_PreCast,ext1st_Stone,ext1st_Stucco,ext1st_VinylSd,ext1st_Wd Sdng,ext1st_WdShing,ext2nd_AsphShn,ext2nd_Brk Cmn,ext2nd_BrkFace,ext2nd_CBlock,ext2nd_CmentBd,ext2nd_HdBoard,ext2nd_ImStucc,ext2nd_MetalSd,ext2nd_Other,ext2nd_Plywood,ext2nd_PreCast,ext2nd_Stone,ext2nd_Stucco,ext2nd_VinylSd,ext2nd_Wd Sdng,ext2nd_Wd Shng,mas_vnr_type_BrkFace,mas_vnr_type_CBlock,mas_vnr_type_NA,mas_vnr_type_None,mas_vnr_type_Stone,found_CBlock,found_PConc,found_Slab,found_Stone,found_Wood,heat_GasA,heat_GasW,heat_Grav,heat_OthW,heat_Wall,ctrl_air_Y,gar_type_Attchd,gar_type_Basment,gar_type_BuiltIn,gar_type_CarPort,gar_type_Detchd,gar_type_NA,sale_type_CWD,sale_type_Con,sale_type_ConLD,sale_type_ConLI,sale_type_ConLw,sale_type_New,sale_type_Oth,sale_type_VWD,sale_type_WD,New_Gar_Feat,New_Bsmt_Feat,New_Ovr_Kit_Feat,SalePrice
0,109,533352170,60,0.0,13517,2,3,2,6,8,1976,2005,289.0,4,3,3,3,0,6,533.0,1,0.0,192.0,725.0,5,4,725,754,0,1479,0.0,0.0,2,1,3,1,4,6,7,0,0,1976.0,2,2.0,475.0,3,3,2,0,44,0,0,0,0,0,3,2010,0,0,0,0,1,0,1,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,1,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,2850.0,4350.0,26622,130500
1,544,531379050,60,43.0,11492,2,3,2,7,5,1996,1997,132.0,4,3,4,3,0,6,637.0,1,0.0,276.0,913.0,5,4,913,1209,0,2122,1.0,0.0,2,1,4,1,4,8,7,1,3,1997.0,2,2.0,559.0,3,3,2,0,74,0,0,0,0,0,4,2009,0,0,0,0,1,0,1,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,1,0,0,0,1,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,3354.0,6391.0,33952,220000
2,153,535304180,20,68.0,7922,3,3,2,5,7,1953,2007,0.0,3,4,3,3,0,6,731.0,1,0.0,326.0,1057.0,3,4,1057,0,0,1057,1.0,0.0,1,0,3,1,4,5,7,0,0,1953.0,1,1.0,246.0,3,3,2,0,52,0,0,0,0,0,1,2010,0,0,0,0,1,0,1,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,1,0,0,0,0,1,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,1476.0,6342.0,16912,109000
3,318,916386060,60,73.0,9802,3,3,2,5,5,2006,2007,0.0,3,3,4,3,0,1,0.0,1,0.0,384.0,384.0,4,4,744,700,0,1444,0.0,0.0,2,1,3,1,3,7,7,0,0,2007.0,3,2.0,400.0,3,3,2,100,0,0,0,0,0,0,4,2010,0,0,0,0,1,0,1,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,1,0,0,0,1,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,2400.0,2688.0,18772,174000
4,255,906425045,50,82.0,14235,2,3,2,6,8,1900,1993,0.0,3,3,2,4,0,1,0.0,1,0.0,676.0,676.0,3,4,831,614,0,1445,0.0,0.0,2,0,3,1,3,6,7,0,0,1957.0,1,2.0,484.0,3,3,0,0,59,0,0,0,0,0,3,2010,0,0,0,0,1,0,1,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,1,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,2904.0,4056.0,24565,138500


_Checking Corr with Sale Price_

In [145]:
pd.set_option('display.max_columns', 500)
pd.set_option('display.max_rows',500)

In [146]:
corr_sp = train[train.columns[1:-1]].apply(lambda x: x.corr(train['SalePrice']))
corr_sp.abs().sort_values(ascending=False)
# len(c)

New_Ovr_Kit_Feat        0.829210
Overall Qual            0.799589
New_Bsmt_Feat           0.756997
Gr Liv Area             0.725804
Exter Qual              0.712082
Kitchen Qual            0.691005
Total Bsmt SF           0.661263
New_Gar_Feat            0.656438
Garage Area             0.650425
Garage Cars             0.648155
1st Flr SF              0.643916
Bsmt Qual               0.612355
Year Built              0.560865
Year Remod/Add          0.545238
Full Bath               0.543263
Garage Yr Blt           0.541300
TotRms AbvGrd           0.530281
found_PConc             0.528814
Garage Finish           0.526959
Fireplace Qu            0.523222
Mas Vnr Area            0.505693
Fireplaces              0.454515
Heating QC              0.452204
neigh_NridgHt           0.450279
BsmtFin SF 1            0.436771
Bsmt Exposure           0.436348
gar_type_Detchd         0.421141
mas_vnr_type_None       0.410447
found_CBlock            0.369688
sale_type_New           0.360687
Open Porch

## Creating Poly Features

### Train data

In [147]:
#picking the features that have the top correlation for poly

In [148]:
features = ['New_Gar_Feat','New_Bsmt_Feat','New_Ovr_Kit_Feat','Exter Qual', '1st Flr SF']
X_train = train[features]

In [149]:
from sklearn.preprocessing import PolynomialFeatures

# Instantiate PolynomialFeatures

poly = PolynomialFeatures(include_bias=False)


In [150]:
# Create X_poly
X_poly_train = poly.fit_transform(X_train)

X_poly_train.shape

(1931, 20)

In [151]:
X_poly_train_df = pd.DataFrame(X_poly_train,columns=poly.get_feature_names(features))

In [152]:
X_poly_train_df.head()

Unnamed: 0,New_Gar_Feat,New_Bsmt_Feat,New_Ovr_Kit_Feat,Exter Qual,1st Flr SF,New_Gar_Feat^2,New_Gar_Feat New_Bsmt_Feat,New_Gar_Feat New_Ovr_Kit_Feat,New_Gar_Feat Exter Qual,New_Gar_Feat 1st Flr SF,New_Bsmt_Feat^2,New_Bsmt_Feat New_Ovr_Kit_Feat,New_Bsmt_Feat Exter Qual,New_Bsmt_Feat 1st Flr SF,New_Ovr_Kit_Feat^2,New_Ovr_Kit_Feat Exter Qual,New_Ovr_Kit_Feat 1st Flr SF,Exter Qual^2,Exter Qual 1st Flr SF,1st Flr SF^2
0,2850.0,4350.0,26622.0,4.0,725.0,8122500.0,12397500.0,75872700.0,11400.0,2066250.0,18922500.0,115805700.0,17400.0,3153750.0,708730900.0,106488.0,19300950.0,16.0,2900.0,525625.0
1,3354.0,6391.0,33952.0,4.0,913.0,11249316.0,21435414.0,113875008.0,13416.0,3062202.0,40844881.0,216987232.0,25564.0,5834983.0,1152738000.0,135808.0,30998176.0,16.0,3652.0,833569.0
2,1476.0,6342.0,16912.0,3.0,1057.0,2178576.0,9360792.0,24962112.0,4428.0,1560132.0,40220964.0,107255904.0,19026.0,6703494.0,286015700.0,50736.0,17875984.0,9.0,3171.0,1117249.0
3,2400.0,2688.0,18772.0,3.0,744.0,5760000.0,6451200.0,45052800.0,7200.0,1785600.0,7225344.0,50459136.0,8064.0,1999872.0,352388000.0,56316.0,13966368.0,9.0,2232.0,553536.0
4,2904.0,4056.0,24565.0,3.0,831.0,8433216.0,11778624.0,71336760.0,8712.0,2413224.0,16451136.0,99635640.0,12168.0,3370536.0,603439200.0,73695.0,20413515.0,9.0,2493.0,690561.0


In [153]:
train.drop(columns=['New_Gar_Feat', 'New_Bsmt_Feat', 'New_Ovr_Kit_Feat', 'Exter Qual', '1st Flr SF'], inplace=True)

In [154]:
#patch the poly features in training set
train = pd.merge(train, X_poly_train_df, how='inner', left_index=True, right_index=True)

In [155]:
train.head()

Unnamed: 0,Id,PID,MS SubClass,Lot Frontage,Lot Area,Lot Shape,Utilities,Land Slope,Overall Qual,Overall Cond,Year Built,Year Remod/Add,Mas Vnr Area,Exter Cond,Bsmt Qual,Bsmt Cond,Bsmt Exposure,BsmtFin Type 1,BsmtFin SF 1,BsmtFin Type 2,BsmtFin SF 2,Bsmt Unf SF,Total Bsmt SF,Heating QC,Electrical,2nd Flr SF,Low Qual Fin SF,Gr Liv Area,Bsmt Full Bath,Bsmt Half Bath,Full Bath,Half Bath,Bedroom AbvGr,Kitchen AbvGr,Kitchen Qual,TotRms AbvGrd,Functional,Fireplaces,Fireplace Qu,Garage Yr Blt,Garage Finish,Garage Cars,Garage Area,Garage Qual,Garage Cond,Paved Drive,Wood Deck SF,Open Porch SF,Enclosed Porch,3Ssn Porch,Screen Porch,Pool Area,Misc Val,Mo Sold,Yr Sold,zoning_C,zoning_FV,zoning_I,zoning_RH,zoning_RL,zoning_RM,street_Pave,land_countour_HLS,land_countour_Low,land_countour_Lvl,lot_conf_CulDSac,lot_conf_FR2,lot_conf_FR3,lot_conf_Inside,neigh_Blueste,neigh_BrDale,neigh_BrkSide,neigh_ClearCr,neigh_CollgCr,neigh_Crawfor,neigh_Edwards,neigh_Gilbert,neigh_Greens,neigh_GrnHill,neigh_IDOTRR,neigh_Landmrk,neigh_MeadowV,neigh_Mitchel,neigh_NAmes,neigh_NPkVill,neigh_NWAmes,neigh_NoRidge,neigh_NridgHt,neigh_OldTown,neigh_SWISU,neigh_Sawyer,neigh_SawyerW,neigh_Somerst,neigh_StoneBr,neigh_Timber,neigh_Veenker,cond1_Feedr,cond1_Norm,cond1_PosA,cond1_PosN,cond1_RRAe,cond1_RRAn,cond1_RRNe,cond1_RRNn,cond2_Feedr,cond2_Norm,cond2_PosA,cond2_PosN,cond2_RRAe,cond2_RRAn,cond2_RRNn,bldg_type_2fmCon,bldg_type_Duplex,bldg_type_Twnhs,bldg_type_TwnhsE,hse_style_1.5Unf,hse_style_1Story,hse_style_2.5Fin,hse_style_2.5Unf,hse_style_2Story,hse_style_SFoyer,hse_style_SLvl,rf_style_Gable,rf_style_Gambrel,rf_style_Hip,rf_style_Mansard,rf_style_Shed,rf_mat_Membran,rf_mat_Metal,rf_mat_Roll,rf_mat_Tar&Grv,rf_mat_WdShake,rf_mat_WdShngl,ext1st_AsphShn,ext1st_BrkComm,ext1st_BrkFace,ext1st_CBlock,ext1st_CemntBd,ext1st_HdBoard,ext1st_ImStucc,ext1st_MetalSd,ext1st_Plywood,ext1st_PreCast,ext1st_Stone,ext1st_Stucco,ext1st_VinylSd,ext1st_Wd Sdng,ext1st_WdShing,ext2nd_AsphShn,ext2nd_Brk Cmn,ext2nd_BrkFace,ext2nd_CBlock,ext2nd_CmentBd,ext2nd_HdBoard,ext2nd_ImStucc,ext2nd_MetalSd,ext2nd_Other,ext2nd_Plywood,ext2nd_PreCast,ext2nd_Stone,ext2nd_Stucco,ext2nd_VinylSd,ext2nd_Wd Sdng,ext2nd_Wd Shng,mas_vnr_type_BrkFace,mas_vnr_type_CBlock,mas_vnr_type_NA,mas_vnr_type_None,mas_vnr_type_Stone,found_CBlock,found_PConc,found_Slab,found_Stone,found_Wood,heat_GasA,heat_GasW,heat_Grav,heat_OthW,heat_Wall,ctrl_air_Y,gar_type_Attchd,gar_type_Basment,gar_type_BuiltIn,gar_type_CarPort,gar_type_Detchd,gar_type_NA,sale_type_CWD,sale_type_Con,sale_type_ConLD,sale_type_ConLI,sale_type_ConLw,sale_type_New,sale_type_Oth,sale_type_VWD,sale_type_WD,SalePrice,New_Gar_Feat,New_Bsmt_Feat,New_Ovr_Kit_Feat,Exter Qual,1st Flr SF,New_Gar_Feat^2,New_Gar_Feat New_Bsmt_Feat,New_Gar_Feat New_Ovr_Kit_Feat,New_Gar_Feat Exter Qual,New_Gar_Feat 1st Flr SF,New_Bsmt_Feat^2,New_Bsmt_Feat New_Ovr_Kit_Feat,New_Bsmt_Feat Exter Qual,New_Bsmt_Feat 1st Flr SF,New_Ovr_Kit_Feat^2,New_Ovr_Kit_Feat Exter Qual,New_Ovr_Kit_Feat 1st Flr SF,Exter Qual^2,Exter Qual 1st Flr SF,1st Flr SF^2
0,109,533352170,60,0.0,13517,2,3,2,6,8,1976,2005,289.0,3,3,3,0,6,533.0,1,0.0,192.0,725.0,5,4,754,0,1479,0.0,0.0,2,1,3,1,4,6,7,0,0,1976.0,2,2.0,475.0,3,3,2,0,44,0,0,0,0,0,3,2010,0,0,0,0,1,0,1,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,1,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,130500,2850.0,4350.0,26622.0,4.0,725.0,8122500.0,12397500.0,75872700.0,11400.0,2066250.0,18922500.0,115805700.0,17400.0,3153750.0,708730900.0,106488.0,19300950.0,16.0,2900.0,525625.0
1,544,531379050,60,43.0,11492,2,3,2,7,5,1996,1997,132.0,3,4,3,0,6,637.0,1,0.0,276.0,913.0,5,4,1209,0,2122,1.0,0.0,2,1,4,1,4,8,7,1,3,1997.0,2,2.0,559.0,3,3,2,0,74,0,0,0,0,0,4,2009,0,0,0,0,1,0,1,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,1,0,0,0,1,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,220000,3354.0,6391.0,33952.0,4.0,913.0,11249316.0,21435414.0,113875008.0,13416.0,3062202.0,40844881.0,216987232.0,25564.0,5834983.0,1152738000.0,135808.0,30998176.0,16.0,3652.0,833569.0
2,153,535304180,20,68.0,7922,3,3,2,5,7,1953,2007,0.0,4,3,3,0,6,731.0,1,0.0,326.0,1057.0,3,4,0,0,1057,1.0,0.0,1,0,3,1,4,5,7,0,0,1953.0,1,1.0,246.0,3,3,2,0,52,0,0,0,0,0,1,2010,0,0,0,0,1,0,1,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,1,0,0,0,0,1,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,109000,1476.0,6342.0,16912.0,3.0,1057.0,2178576.0,9360792.0,24962112.0,4428.0,1560132.0,40220964.0,107255904.0,19026.0,6703494.0,286015700.0,50736.0,17875984.0,9.0,3171.0,1117249.0
3,318,916386060,60,73.0,9802,3,3,2,5,5,2006,2007,0.0,3,4,3,0,1,0.0,1,0.0,384.0,384.0,4,4,700,0,1444,0.0,0.0,2,1,3,1,3,7,7,0,0,2007.0,3,2.0,400.0,3,3,2,100,0,0,0,0,0,0,4,2010,0,0,0,0,1,0,1,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,1,0,0,0,1,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,174000,2400.0,2688.0,18772.0,3.0,744.0,5760000.0,6451200.0,45052800.0,7200.0,1785600.0,7225344.0,50459136.0,8064.0,1999872.0,352388000.0,56316.0,13966368.0,9.0,2232.0,553536.0
4,255,906425045,50,82.0,14235,2,3,2,6,8,1900,1993,0.0,3,2,4,0,1,0.0,1,0.0,676.0,676.0,3,4,614,0,1445,0.0,0.0,2,0,3,1,3,6,7,0,0,1957.0,1,2.0,484.0,3,3,0,0,59,0,0,0,0,0,3,2010,0,0,0,0,1,0,1,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,1,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,138500,2904.0,4056.0,24565.0,3.0,831.0,8433216.0,11778624.0,71336760.0,8712.0,2413224.0,16451136.0,99635640.0,12168.0,3370536.0,603439200.0,73695.0,20413515.0,9.0,2493.0,690561.0


In [156]:
#drop train set sale price
train.drop(columns='SalePrice', inplace=True)

In [157]:
#add it again
train['SalePrice'] = train_sale_price

In [158]:
train.head()

Unnamed: 0,Id,PID,MS SubClass,Lot Frontage,Lot Area,Lot Shape,Utilities,Land Slope,Overall Qual,Overall Cond,Year Built,Year Remod/Add,Mas Vnr Area,Exter Cond,Bsmt Qual,Bsmt Cond,Bsmt Exposure,BsmtFin Type 1,BsmtFin SF 1,BsmtFin Type 2,BsmtFin SF 2,Bsmt Unf SF,Total Bsmt SF,Heating QC,Electrical,2nd Flr SF,Low Qual Fin SF,Gr Liv Area,Bsmt Full Bath,Bsmt Half Bath,Full Bath,Half Bath,Bedroom AbvGr,Kitchen AbvGr,Kitchen Qual,TotRms AbvGrd,Functional,Fireplaces,Fireplace Qu,Garage Yr Blt,Garage Finish,Garage Cars,Garage Area,Garage Qual,Garage Cond,Paved Drive,Wood Deck SF,Open Porch SF,Enclosed Porch,3Ssn Porch,Screen Porch,Pool Area,Misc Val,Mo Sold,Yr Sold,zoning_C,zoning_FV,zoning_I,zoning_RH,zoning_RL,zoning_RM,street_Pave,land_countour_HLS,land_countour_Low,land_countour_Lvl,lot_conf_CulDSac,lot_conf_FR2,lot_conf_FR3,lot_conf_Inside,neigh_Blueste,neigh_BrDale,neigh_BrkSide,neigh_ClearCr,neigh_CollgCr,neigh_Crawfor,neigh_Edwards,neigh_Gilbert,neigh_Greens,neigh_GrnHill,neigh_IDOTRR,neigh_Landmrk,neigh_MeadowV,neigh_Mitchel,neigh_NAmes,neigh_NPkVill,neigh_NWAmes,neigh_NoRidge,neigh_NridgHt,neigh_OldTown,neigh_SWISU,neigh_Sawyer,neigh_SawyerW,neigh_Somerst,neigh_StoneBr,neigh_Timber,neigh_Veenker,cond1_Feedr,cond1_Norm,cond1_PosA,cond1_PosN,cond1_RRAe,cond1_RRAn,cond1_RRNe,cond1_RRNn,cond2_Feedr,cond2_Norm,cond2_PosA,cond2_PosN,cond2_RRAe,cond2_RRAn,cond2_RRNn,bldg_type_2fmCon,bldg_type_Duplex,bldg_type_Twnhs,bldg_type_TwnhsE,hse_style_1.5Unf,hse_style_1Story,hse_style_2.5Fin,hse_style_2.5Unf,hse_style_2Story,hse_style_SFoyer,hse_style_SLvl,rf_style_Gable,rf_style_Gambrel,rf_style_Hip,rf_style_Mansard,rf_style_Shed,rf_mat_Membran,rf_mat_Metal,rf_mat_Roll,rf_mat_Tar&Grv,rf_mat_WdShake,rf_mat_WdShngl,ext1st_AsphShn,ext1st_BrkComm,ext1st_BrkFace,ext1st_CBlock,ext1st_CemntBd,ext1st_HdBoard,ext1st_ImStucc,ext1st_MetalSd,ext1st_Plywood,ext1st_PreCast,ext1st_Stone,ext1st_Stucco,ext1st_VinylSd,ext1st_Wd Sdng,ext1st_WdShing,ext2nd_AsphShn,ext2nd_Brk Cmn,ext2nd_BrkFace,ext2nd_CBlock,ext2nd_CmentBd,ext2nd_HdBoard,ext2nd_ImStucc,ext2nd_MetalSd,ext2nd_Other,ext2nd_Plywood,ext2nd_PreCast,ext2nd_Stone,ext2nd_Stucco,ext2nd_VinylSd,ext2nd_Wd Sdng,ext2nd_Wd Shng,mas_vnr_type_BrkFace,mas_vnr_type_CBlock,mas_vnr_type_NA,mas_vnr_type_None,mas_vnr_type_Stone,found_CBlock,found_PConc,found_Slab,found_Stone,found_Wood,heat_GasA,heat_GasW,heat_Grav,heat_OthW,heat_Wall,ctrl_air_Y,gar_type_Attchd,gar_type_Basment,gar_type_BuiltIn,gar_type_CarPort,gar_type_Detchd,gar_type_NA,sale_type_CWD,sale_type_Con,sale_type_ConLD,sale_type_ConLI,sale_type_ConLw,sale_type_New,sale_type_Oth,sale_type_VWD,sale_type_WD,New_Gar_Feat,New_Bsmt_Feat,New_Ovr_Kit_Feat,Exter Qual,1st Flr SF,New_Gar_Feat^2,New_Gar_Feat New_Bsmt_Feat,New_Gar_Feat New_Ovr_Kit_Feat,New_Gar_Feat Exter Qual,New_Gar_Feat 1st Flr SF,New_Bsmt_Feat^2,New_Bsmt_Feat New_Ovr_Kit_Feat,New_Bsmt_Feat Exter Qual,New_Bsmt_Feat 1st Flr SF,New_Ovr_Kit_Feat^2,New_Ovr_Kit_Feat Exter Qual,New_Ovr_Kit_Feat 1st Flr SF,Exter Qual^2,Exter Qual 1st Flr SF,1st Flr SF^2,SalePrice
0,109,533352170,60,0.0,13517,2,3,2,6,8,1976,2005,289.0,3,3,3,0,6,533.0,1,0.0,192.0,725.0,5,4,754,0,1479,0.0,0.0,2,1,3,1,4,6,7,0,0,1976.0,2,2.0,475.0,3,3,2,0,44,0,0,0,0,0,3,2010,0,0,0,0,1,0,1,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,1,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,2850.0,4350.0,26622.0,4.0,725.0,8122500.0,12397500.0,75872700.0,11400.0,2066250.0,18922500.0,115805700.0,17400.0,3153750.0,708730900.0,106488.0,19300950.0,16.0,2900.0,525625.0,130500
1,544,531379050,60,43.0,11492,2,3,2,7,5,1996,1997,132.0,3,4,3,0,6,637.0,1,0.0,276.0,913.0,5,4,1209,0,2122,1.0,0.0,2,1,4,1,4,8,7,1,3,1997.0,2,2.0,559.0,3,3,2,0,74,0,0,0,0,0,4,2009,0,0,0,0,1,0,1,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,1,0,0,0,1,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,3354.0,6391.0,33952.0,4.0,913.0,11249316.0,21435414.0,113875008.0,13416.0,3062202.0,40844881.0,216987232.0,25564.0,5834983.0,1152738000.0,135808.0,30998176.0,16.0,3652.0,833569.0,220000
2,153,535304180,20,68.0,7922,3,3,2,5,7,1953,2007,0.0,4,3,3,0,6,731.0,1,0.0,326.0,1057.0,3,4,0,0,1057,1.0,0.0,1,0,3,1,4,5,7,0,0,1953.0,1,1.0,246.0,3,3,2,0,52,0,0,0,0,0,1,2010,0,0,0,0,1,0,1,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,1,0,0,0,0,1,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,1476.0,6342.0,16912.0,3.0,1057.0,2178576.0,9360792.0,24962112.0,4428.0,1560132.0,40220964.0,107255904.0,19026.0,6703494.0,286015700.0,50736.0,17875984.0,9.0,3171.0,1117249.0,109000
3,318,916386060,60,73.0,9802,3,3,2,5,5,2006,2007,0.0,3,4,3,0,1,0.0,1,0.0,384.0,384.0,4,4,700,0,1444,0.0,0.0,2,1,3,1,3,7,7,0,0,2007.0,3,2.0,400.0,3,3,2,100,0,0,0,0,0,0,4,2010,0,0,0,0,1,0,1,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,1,0,0,0,1,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,2400.0,2688.0,18772.0,3.0,744.0,5760000.0,6451200.0,45052800.0,7200.0,1785600.0,7225344.0,50459136.0,8064.0,1999872.0,352388000.0,56316.0,13966368.0,9.0,2232.0,553536.0,174000
4,255,906425045,50,82.0,14235,2,3,2,6,8,1900,1993,0.0,3,2,4,0,1,0.0,1,0.0,676.0,676.0,3,4,614,0,1445,0.0,0.0,2,0,3,1,3,6,7,0,0,1957.0,1,2.0,484.0,3,3,0,0,59,0,0,0,0,0,3,2010,0,0,0,0,1,0,1,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,1,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,2904.0,4056.0,24565.0,3.0,831.0,8433216.0,11778624.0,71336760.0,8712.0,2413224.0,16451136.0,99635640.0,12168.0,3370536.0,603439200.0,73695.0,20413515.0,9.0,2493.0,690561.0,138500


In [159]:
train.shape

(1931, 216)

### Test data

In [160]:
#picking the features that have the top correlation for poly

In [161]:
features = ['New_Gar_Feat','New_Bsmt_Feat','New_Ovr_Kit_Feat','Exter Qual', '1st Flr SF']
X_test = test[features]

In [162]:
from sklearn.preprocessing import PolynomialFeatures

# Instantiate PolynomialFeatures

poly = PolynomialFeatures(include_bias=False)


In [163]:
# Create X_poly
X_poly_test = poly.fit_transform(X_test)

X_poly_test.shape

(879, 20)

In [164]:
X_poly_test_df = pd.DataFrame(X_poly_test,columns=poly.get_feature_names(features))

In [165]:
X_poly_test_df.head()

Unnamed: 0,New_Gar_Feat,New_Bsmt_Feat,New_Ovr_Kit_Feat,Exter Qual,1st Flr SF,New_Gar_Feat^2,New_Gar_Feat New_Bsmt_Feat,New_Gar_Feat New_Ovr_Kit_Feat,New_Gar_Feat Exter Qual,New_Gar_Feat 1st Flr SF,New_Bsmt_Feat^2,New_Bsmt_Feat New_Ovr_Kit_Feat,New_Bsmt_Feat Exter Qual,New_Bsmt_Feat 1st Flr SF,New_Ovr_Kit_Feat^2,New_Ovr_Kit_Feat Exter Qual,New_Ovr_Kit_Feat 1st Flr SF,Exter Qual^2,Exter Qual 1st Flr SF,1st Flr SF^2
0,880.0,5100.0,30848.0,3.0,908.0,774400.0,4488000.0,27146240.0,2640.0,799040.0,26010000.0,157324800.0,15300.0,4630800.0,951599104.0,92544.0,28009984.0,9.0,2724.0,824464.0
1,3480.0,13769.0,23604.0,3.0,1967.0,12110400.0,47916120.0,82141920.0,10440.0,6845160.0,189585361.0,325003476.0,41307.0,27083623.0,557148816.0,70812.0,46429068.0,9.0,5901.0,3869089.0
2,2556.0,5232.0,23936.0,4.0,664.0,6533136.0,13372992.0,61180416.0,10224.0,1697184.0,27373824.0,125233152.0,20928.0,3474048.0,572932096.0,95744.0,15893504.0,16.0,2656.0,440896.0
3,2400.0,5808.0,13552.0,4.0,968.0,5760000.0,13939200.0,32524800.0,9600.0,2323200.0,33732864.0,78710016.0,23232.0,5622144.0,183656704.0,54208.0,13118336.0,16.0,3872.0,937024.0
4,3084.0,9758.0,19516.0,3.0,1394.0,9511056.0,30093672.0,60187344.0,9252.0,4299096.0,95218564.0,190437128.0,29274.0,13602652.0,380874256.0,58548.0,27205304.0,9.0,4182.0,1943236.0


In [166]:
test.drop(columns=['New_Gar_Feat', 'New_Bsmt_Feat', 'New_Ovr_Kit_Feat', 'Exter Qual', '1st Flr SF'], inplace=True)

In [167]:
#patch the poly features in training set
test = pd.merge(test, X_poly_test_df, how='inner', left_index=True, right_index=True)

In [168]:
test.head()

Unnamed: 0,Id,PID,MS SubClass,Lot Frontage,Lot Area,Lot Shape,Utilities,Land Slope,Overall Qual,Overall Cond,Year Built,Year Remod/Add,Mas Vnr Area,Exter Cond,Bsmt Qual,Bsmt Cond,Bsmt Exposure,BsmtFin Type 1,BsmtFin SF 1,BsmtFin Type 2,BsmtFin SF 2,Bsmt Unf SF,Total Bsmt SF,Heating QC,Electrical,2nd Flr SF,Low Qual Fin SF,Gr Liv Area,Bsmt Full Bath,Bsmt Half Bath,Full Bath,Half Bath,Bedroom AbvGr,Kitchen AbvGr,Kitchen Qual,TotRms AbvGrd,Functional,Fireplaces,Fireplace Qu,Garage Yr Blt,Garage Finish,Garage Cars,Garage Area,Garage Qual,Garage Cond,Paved Drive,Wood Deck SF,Open Porch SF,Enclosed Porch,3Ssn Porch,Screen Porch,Pool Area,Misc Val,Mo Sold,Yr Sold,zoning_C,zoning_FV,zoning_I,zoning_RH,zoning_RL,zoning_RM,street_Pave,land_countour_HLS,land_countour_Low,land_countour_Lvl,lot_conf_CulDSac,lot_conf_FR2,lot_conf_FR3,lot_conf_Inside,neigh_Blueste,neigh_BrDale,neigh_BrkSide,neigh_ClearCr,neigh_CollgCr,neigh_Crawfor,neigh_Edwards,neigh_Gilbert,neigh_Greens,neigh_GrnHill,neigh_IDOTRR,neigh_Landmrk,neigh_MeadowV,neigh_Mitchel,neigh_NAmes,neigh_NPkVill,neigh_NWAmes,neigh_NoRidge,neigh_NridgHt,neigh_OldTown,neigh_SWISU,neigh_Sawyer,neigh_SawyerW,neigh_Somerst,neigh_StoneBr,neigh_Timber,neigh_Veenker,cond1_Feedr,cond1_Norm,cond1_PosA,cond1_PosN,cond1_RRAe,cond1_RRAn,cond1_RRNe,cond1_RRNn,cond2_Feedr,cond2_Norm,cond2_PosA,cond2_PosN,cond2_RRAe,cond2_RRAn,cond2_RRNn,bldg_type_2fmCon,bldg_type_Duplex,bldg_type_Twnhs,bldg_type_TwnhsE,hse_style_1.5Unf,hse_style_1Story,hse_style_2.5Fin,hse_style_2.5Unf,hse_style_2Story,hse_style_SFoyer,hse_style_SLvl,rf_style_Gable,rf_style_Gambrel,rf_style_Hip,rf_style_Mansard,rf_style_Shed,rf_mat_Membran,rf_mat_Metal,rf_mat_Roll,rf_mat_Tar&Grv,rf_mat_WdShake,rf_mat_WdShngl,ext1st_AsphShn,ext1st_BrkComm,ext1st_BrkFace,ext1st_CBlock,ext1st_CemntBd,ext1st_HdBoard,ext1st_ImStucc,ext1st_MetalSd,ext1st_Plywood,ext1st_PreCast,ext1st_Stone,ext1st_Stucco,ext1st_VinylSd,ext1st_Wd Sdng,ext1st_WdShing,ext2nd_AsphShn,ext2nd_Brk Cmn,ext2nd_BrkFace,ext2nd_CBlock,ext2nd_CmentBd,ext2nd_HdBoard,ext2nd_ImStucc,ext2nd_MetalSd,ext2nd_Other,ext2nd_Plywood,ext2nd_PreCast,ext2nd_Stone,ext2nd_Stucco,ext2nd_VinylSd,ext2nd_Wd Sdng,ext2nd_Wd Shng,mas_vnr_type_BrkFace,mas_vnr_type_CBlock,mas_vnr_type_NA,mas_vnr_type_None,mas_vnr_type_Stone,found_CBlock,found_PConc,found_Slab,found_Stone,found_Wood,heat_GasA,heat_GasW,heat_Grav,heat_OthW,heat_Wall,ctrl_air_Y,gar_type_Attchd,gar_type_Basment,gar_type_BuiltIn,gar_type_CarPort,gar_type_Detchd,gar_type_NA,sale_type_CWD,sale_type_Con,sale_type_ConLD,sale_type_ConLI,sale_type_ConLw,sale_type_New,sale_type_Oth,sale_type_VWD,sale_type_WD,New_Gar_Feat,New_Bsmt_Feat,New_Ovr_Kit_Feat,Exter Qual,1st Flr SF,New_Gar_Feat^2,New_Gar_Feat New_Bsmt_Feat,New_Gar_Feat New_Ovr_Kit_Feat,New_Gar_Feat Exter Qual,New_Gar_Feat 1st Flr SF,New_Bsmt_Feat^2,New_Bsmt_Feat New_Ovr_Kit_Feat,New_Bsmt_Feat Exter Qual,New_Bsmt_Feat 1st Flr SF,New_Ovr_Kit_Feat^2,New_Ovr_Kit_Feat Exter Qual,New_Ovr_Kit_Feat 1st Flr SF,Exter Qual^2,Exter Qual 1st Flr SF,1st Flr SF^2
0,2658,902301120,190,69.0,9142,3,3,2,6,8,1910,1950,0.0,2,2,3,0,1,0.0,1,0.0,1020.0,1020.0,4,1,1020,0,1928,0.0,0.0,2,0,4,2,2,9,7,0,0,1910.0,1,1.0,440.0,1,1,2,0,60,112,0,0,0,0,4,2006,0,0,0,0,0,1,1,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,880.0,5100.0,30848.0,3.0,908.0,774400.0,4488000.0,27146240.0,2640.0,799040.0,26010000.0,157324800.0,15300.0,4630800.0,951599104.0,92544.0,28009984.0,9.0,2724.0,824464.0
1,2718,905108090,90,0.0,9662,2,3,2,5,4,1977,1977,0.0,3,4,3,0,1,0.0,1,0.0,1967.0,1967.0,3,4,0,0,1967,0.0,0.0,2,0,6,2,3,10,7,0,0,1977.0,3,2.0,580.0,3,3,2,170,0,0,0,0,0,0,8,2006,0,0,0,0,1,0,1,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,1,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,3480.0,13769.0,23604.0,3.0,1967.0,12110400.0,47916120.0,82141920.0,10440.0,6845160.0,189585361.0,325003476.0,41307.0,27083623.0,557148816.0,70812.0,46429068.0,9.0,5901.0,3869089.0
2,2414,528218130,60,58.0,17104,2,3,2,7,5,2006,2006,0.0,3,4,4,2,6,554.0,1,0.0,100.0,654.0,5,4,832,0,1496,1.0,0.0,2,1,3,1,4,7,7,1,4,2006.0,2,2.0,426.0,3,3,2,100,24,0,0,0,0,0,9,2006,0,0,0,0,1,0,1,0,0,1,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,1,0,0,0,1,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,2556.0,5232.0,23936.0,4.0,664.0,6533136.0,13372992.0,61180416.0,10224.0,1697184.0,27373824.0,125233152.0,20928.0,3474048.0,572932096.0,95744.0,15893504.0,16.0,2656.0,440896.0
3,1989,902207150,30,60.0,8520,3,3,2,5,6,1923,2006,0.0,3,3,3,0,1,0.0,1,0.0,968.0,968.0,3,4,0,0,968,0.0,0.0,1,0,2,1,3,5,7,0,0,1935.0,1,2.0,480.0,2,3,0,0,0,184,0,0,0,0,7,2007,0,0,0,0,0,1,1,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,1,0,0,0,0,1,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,2400.0,5808.0,13552.0,4.0,968.0,5760000.0,13939200.0,32524800.0,9600.0,2323200.0,33732864.0,78710016.0,23232.0,5622144.0,183656704.0,54208.0,13118336.0,16.0,3872.0,937024.0
4,625,535105100,20,0.0,9500,2,3,2,6,5,1963,1963,247.0,3,4,3,0,4,609.0,1,0.0,785.0,1394.0,4,4,0,0,1394,1.0,0.0,1,1,3,1,3,6,7,2,4,1963.0,2,2.0,514.0,3,3,2,0,76,0,0,185,0,0,7,2009,0,0,0,0,1,0,1,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,1,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,3084.0,9758.0,19516.0,3.0,1394.0,9511056.0,30093672.0,60187344.0,9252.0,4299096.0,95218564.0,190437128.0,29274.0,13602652.0,380874256.0,58548.0,27205304.0,9.0,4182.0,1943236.0


In [169]:
test.shape

(879, 215)

# Save

In [170]:
#save it to csv
train.to_csv('./datasets/clean_train_encoded_poly.csv', index=False)
test.to_csv('./datasets/clean_test_encoded_poly.csv', index=False)