# Import Libraries

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import OneHotEncoder
import seaborn as sns
%matplotlib inline
plt.style.use('ggplot')
pd.set_option('display.max_columns', 500)
pd.set_option('display.max_rows', 500)
plt.rcParams['figure.figsize'] = (12,8)
sns.set(style = "whitegrid")

In [2]:
train_df = pd.read_csv('./Data/train.csv')
test_df = pd.read_csv('./Data/test.csv')

# Imputation on Missing Values

### Train Set

In [3]:
train_df['LotFrontage'] = train_df['LotFrontage'].fillna(train_df['LotFrontage'].mean())
train_df['MasVnrArea'] = train_df['MasVnrArea'].fillna(0)
train_df['MasVnrType'] = train_df['MasVnrType'].fillna('None')
train_df['BsmtQual'] = train_df['BsmtQual'].fillna('None')
train_df['BsmtCond'] = train_df['BsmtCond'].fillna('None')
train_df['BsmtExposure'] = train_df['BsmtExposure'].fillna('None')
train_df['BsmtFinType1'] = train_df['BsmtFinType1'].fillna('None')
train_df['BsmtFinType2'] = train_df['BsmtFinType2'].fillna('None')
train_df['PoolQC'] = train_df['PoolQC'].fillna('None')
train_df['FireplaceQu'] = train_df['FireplaceQu'].fillna('None')
train_df['GarageFinish'] = train_df['GarageFinish'].fillna('None')
train_df['GarageType'] = train_df['GarageType'].fillna('None')
train_df['GarageCond'] = train_df['GarageCond'].fillna('None')
train_df['GarageQual'] = train_df['GarageQual'].fillna('None')

#train_df["Electrical"].fillna("SBrkr", inplace = True)

### Test Set

In [4]:
#LotFrontage with the mean of each Neighborhood in the train set
neighbor_mean = dict(train_df.groupby('Neighborhood')["LotFrontage"].mean())
test_df["LotFrontage"] = test_df["LotFrontage"].fillna(test_df["Neighborhood"].map(neighbor_mean))

#Fill the NaNs with the mode 
test_df["MasVnrType"].fillna("None", inplace = True)
test_df["MasVnrArea"].fillna(0.0, inplace = True) 
test_df["MSZoning"].fillna(train_df["MSZoning"].mode()[0], inplace = True)
test_df["BsmtHalfBath"].fillna(train_df["BsmtHalfBath"].mode()[0], inplace = True)
test_df["BsmtFullBath"].fillna(train_df["BsmtFullBath"].mode()[0], inplace = True)
test_df["Functional"].fillna(train_df["Functional"].mode()[0], inplace = True)
test_df["Exterior2nd"].fillna(train_df["Exterior2nd"].mode()[0], inplace = True)
test_df["SaleType"].fillna(train_df["SaleType"].mode()[0], inplace = True)
test_df["Exterior1st"].fillna(train_df["Exterior1st"].mode()[0], inplace = True)
test_df["KitchenQual"].fillna(train_df["KitchenQual"].mode()[0], inplace = True)
test_df["GarageCars"].fillna(train_df["GarageCars"].mode()[0], inplace = True)

# Feature Engineering

In [5]:
def feature_engineering(df):
    
    #Combine the SF for outdoor area
    df['Total_OutdoorSF'] = df['3SsnPorch']+df['EnclosedPorch']+df['OpenPorchSF']+df['ScreenPorch']+df['WoodDeckSF']
    df.drop("OpenPorchSF", axis = 1, inplace = True)
    df.drop("EnclosedPorch", axis = 1, inplace = True)
    df.drop("3SsnPorch", axis = 1, inplace = True)
    df.drop("ScreenPorch", axis = 1, inplace = True)
    df.drop("WoodDeckSF", axis = 1, inplace = True)
    
    #Change years to ages (note that 53% of houses have same year for YearBuilt and YearRemodAdd):
    #Change YearBuilt to Age (YrSold - YearBuilt)
    df['Age'] = df['YrSold'] - df['YearBuilt']

    #Change YearRemodAdd to AgeRemodAdd (YrSold - YearRemodAdd) 
    df['AgeRemodAdd'] = df['YrSold'] - df['YearRemodAdd']
    df.drop(['YearBuilt'], axis=1, inplace=True)
    df.drop(['YearRemodAdd'], axis=1, inplace=True)
    
    #Change GarageYrBlt to AgeGarage (YrSold - GarageYrBlt)
    df['AgeGarage'] = df['YrSold'] - df['GarageYrBlt']
    df.drop(['GarageYrBlt'], axis=1, inplace=True)
    
    df.drop(['Utilities'], axis=1, inplace=True)
    df.drop(['Condition2'], axis=1, inplace=True)
    df.drop(['BsmtCond'], axis=1, inplace=True)
    df.drop(['BsmtExposure'], axis=1, inplace=True)
    df.drop(['BsmtFinType1'], axis=1, inplace=True)
    df.drop(['BsmtFinSF1'], axis=1, inplace=True)
    df.drop(['BsmtFinType2'], axis=1, inplace=True)
    df.drop(['BsmtFinSF2'], axis=1, inplace=True)
    df.drop(['BsmtUnfSF'], axis=1, inplace=True)
    df.drop(['Heating'], axis=1, inplace=True)
    df.drop(['1stFlrSF'], axis=1, inplace=True)
    df.drop(['2ndFlrSF'], axis=1, inplace=True)
    df.drop(['LowQualFinSF'], axis=1, inplace=True)
    df.drop(['KitchenAbvGr'], axis=1, inplace=True)
    df.drop(['Functional'], axis=1, inplace=True)
    df.drop(['Fireplaces'], axis=1, inplace=True)
    df.drop(['MiscVal'], axis=1, inplace=True)
    df.drop(['Street'], axis=1, inplace=True)
    df.drop(['Alley'], axis=1, inplace=True)
    df.drop(['RoofMatl'], axis=1, inplace=True)
    df.drop(['Fence'], axis=1, inplace=True)
    df.drop(['LandSlope'], axis=1, inplace=True)
    df.drop(['MiscFeature'], axis=1, inplace=True)
    
# Features to potentially drop since correlation < ~0.30 and with > ~0.9 (except for MSSubClass)  
#     df.drop(['Electrical'], axis=1, inplace=True)
#     df.drop(['PavedDrive'], axis=1, inplace=True)
#     df.drop(['BedroomAbvGr'], axis=1, inplace=True)
#     df.drop(['MiscFeature'], axis=1, inplace=True)
#     df.drop(['PoolQC'], axis=1, inplace=True)
#     df.drop(['Street'], axis=1, inplace=True)
#     df.drop(['ExterCond'], axis=1, inplace=True)
#     df.drop(['MiscVal'], axis=1, inplace=True)
#     df.drop(['Alley'], axis=1, inplace=True)
#     df.drop(['OverallCond'], axis=1, inplace=True)
#     df.drop(['Fence'], axis=1, inplace=True)
#     df.drop(['AgeRemodAdd'], axis=1, inplace=True)
#     df.drop(['AgeGarage'], axis=1, inplace=True)
#     df.drop(['Age'], axis=1, inplace=True)

    
    return df

In [6]:
print("Number of features in train set before feature engineering: " + str(train_df.shape[1]))
print("-"*60)
new_train_df = feature_engineering(train_df)
print("Number of features in train set after feature engineering: " + str(new_train_df.shape[1]))

Number of features in train set before feature engineering: 81
------------------------------------------------------------
Number of features in train set after feature engineering: 54


In [7]:
print("Number of features in test set before feature engineering: " + str(test_df.shape[1]))
print("-"*60)
new_test_df = feature_engineering(test_df)
print("Number of features in test set after feature engineering: " + str(new_test_df.shape[1]))

Number of features in test set before feature engineering: 80
------------------------------------------------------------
Number of features in test set after feature engineering: 53


# Dummify 

In [8]:
#split categorical and numerical variables to dummify categorical varialbes (concat numerical after dummification)
train1 = new_train_df.select_dtypes(["object","category"])
train2 = new_train_df.select_dtypes(["float64","int64"])

In [9]:
#OneHotEncoder function to dummify
encoder = OneHotEncoder(categories = "auto",drop = 'first',sparse = False)
train1_enc = encoder.fit_transform(train1)
column = encoder.get_feature_names(train1.columns.tolist())

In [11]:
# Combine the object and numeric features back again for train set
train_df =  pd.DataFrame(train1_enc, columns= column)
train_df.set_index(train2.index, inplace = True)
train_complete = pd.concat([train_df, train2], axis = 1)

In [12]:
#also do this for trainset

## final check

In [13]:
train_complete

Unnamed: 0,MSZoning_FV,MSZoning_RH,MSZoning_RL,MSZoning_RM,LotShape_IR2,LotShape_IR3,LotShape_Reg,LandContour_HLS,LandContour_Low,LandContour_Lvl,LotConfig_CulDSac,LotConfig_FR2,LotConfig_FR3,LotConfig_Inside,Neighborhood_Blueste,Neighborhood_BrDale,Neighborhood_BrkSide,Neighborhood_ClearCr,Neighborhood_CollgCr,Neighborhood_Crawfor,Neighborhood_Edwards,Neighborhood_Gilbert,Neighborhood_IDOTRR,Neighborhood_MeadowV,Neighborhood_Mitchel,Neighborhood_NAmes,Neighborhood_NPkVill,Neighborhood_NWAmes,Neighborhood_NoRidge,Neighborhood_NridgHt,Neighborhood_OldTown,Neighborhood_SWISU,Neighborhood_Sawyer,Neighborhood_SawyerW,Neighborhood_Somerst,Neighborhood_StoneBr,Neighborhood_Timber,Neighborhood_Veenker,Condition1_Feedr,Condition1_Norm,Condition1_PosA,Condition1_PosN,Condition1_RRAe,Condition1_RRAn,Condition1_RRNe,Condition1_RRNn,BldgType_2fmCon,BldgType_Duplex,BldgType_Twnhs,BldgType_TwnhsE,HouseStyle_1.5Unf,HouseStyle_1Story,HouseStyle_2.5Fin,HouseStyle_2.5Unf,HouseStyle_2Story,HouseStyle_SFoyer,HouseStyle_SLvl,RoofStyle_Gable,RoofStyle_Gambrel,RoofStyle_Hip,RoofStyle_Mansard,RoofStyle_Shed,Exterior1st_AsphShn,Exterior1st_BrkComm,Exterior1st_BrkFace,Exterior1st_CBlock,Exterior1st_CemntBd,Exterior1st_HdBoard,Exterior1st_ImStucc,Exterior1st_MetalSd,Exterior1st_Plywood,Exterior1st_Stone,Exterior1st_Stucco,Exterior1st_VinylSd,Exterior1st_Wd Sdng,Exterior1st_WdShing,Exterior2nd_AsphShn,Exterior2nd_Brk Cmn,Exterior2nd_BrkFace,Exterior2nd_CBlock,Exterior2nd_CmentBd,Exterior2nd_HdBoard,Exterior2nd_ImStucc,Exterior2nd_MetalSd,Exterior2nd_Other,Exterior2nd_Plywood,Exterior2nd_Stone,Exterior2nd_Stucco,Exterior2nd_VinylSd,Exterior2nd_Wd Sdng,Exterior2nd_Wd Shng,MasVnrType_BrkFace,MasVnrType_None,MasVnrType_Stone,ExterQual_Fa,ExterQual_Gd,ExterQual_TA,ExterCond_Fa,ExterCond_Gd,ExterCond_Po,ExterCond_TA,Foundation_CBlock,Foundation_PConc,Foundation_Slab,Foundation_Stone,Foundation_Wood,BsmtQual_Fa,BsmtQual_Gd,BsmtQual_None,BsmtQual_TA,HeatingQC_Fa,HeatingQC_Gd,HeatingQC_Po,HeatingQC_TA,CentralAir_Y,Electrical_FuseF,Electrical_FuseP,Electrical_Mix,Electrical_SBrkr,KitchenQual_Fa,KitchenQual_Gd,KitchenQual_TA,FireplaceQu_Fa,FireplaceQu_Gd,FireplaceQu_None,FireplaceQu_Po,FireplaceQu_TA,GarageType_Attchd,GarageType_Basment,GarageType_BuiltIn,GarageType_CarPort,GarageType_Detchd,GarageType_None,GarageFinish_None,GarageFinish_RFn,GarageFinish_Unf,GarageQual_Fa,GarageQual_Gd,GarageQual_None,GarageQual_Po,GarageQual_TA,GarageCond_Fa,GarageCond_Gd,GarageCond_None,GarageCond_Po,GarageCond_TA,PavedDrive_P,PavedDrive_Y,PoolQC_Fa,PoolQC_Gd,PoolQC_None,SaleType_CWD,SaleType_Con,SaleType_ConLD,SaleType_ConLI,SaleType_ConLw,SaleType_New,SaleType_Oth,SaleType_WD,SaleCondition_AdjLand,SaleCondition_Alloca,SaleCondition_Family,SaleCondition_Normal,SaleCondition_Partial,Id,MSSubClass,LotFrontage,LotArea,OverallQual,OverallCond,MasVnrArea,TotalBsmtSF,GrLivArea,BsmtFullBath,BsmtHalfBath,FullBath,HalfBath,BedroomAbvGr,TotRmsAbvGrd,GarageCars,GarageArea,PoolArea,MoSold,YrSold,SalePrice,Total_OutdoorSF,Age,AgeRemodAdd,AgeGarage
0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,1,60,65.0,8450,7,5,196.0,856,1710,1,0,2,1,3,8,2,548,0,2,2008,208500,61,5,5,5.0
1,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,2,20,80.0,9600,6,8,0.0,1262,1262,0,1,2,0,3,6,2,460,0,5,2007,181500,298,31,31,31.0
2,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,3,60,68.0,11250,7,5,162.0,920,1786,1,0,2,1,3,6,2,608,0,9,2008,223500,42,7,6,7.0
3,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,4,70,60.0,9550,7,5,0.0,756,1717,1,0,1,0,3,7,3,642,0,2,2006,140000,307,91,36,8.0
4,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,5,60,84.0,14260,8,5,350.0,1145,2198,1,0,2,1,4,9,3,836,0,12,2008,250000,276,8,8,8.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1455,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,1456,60,62.0,7917,6,5,0.0,953,1647,0,0,2,1,3,7,2,460,0,8,2007,175000,40,8,7,8.0
1456,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,1457,20,85.0,13175,6,6,119.0,1542,2073,1,0,2,0,3,7,2,500,0,2,2010,210000,349,32,22,32.0
1457,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,1458,70,66.0,9042,7,9,0.0,1152,2340,0,0,2,0,4,9,1,252,0,5,2010,266500,60,69,4,69.0
1458,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,1459,20,68.0,9717,5,6,0.0,1078,1078,1,0,1,0,2,5,1,240,0,4,2010,142125,478,60,14,60.0


In [14]:
train_complete.isna().sum()

MSZoning_FV               0
MSZoning_RH               0
MSZoning_RL               0
MSZoning_RM               0
LotShape_IR2              0
LotShape_IR3              0
LotShape_Reg              0
LandContour_HLS           0
LandContour_Low           0
LandContour_Lvl           0
LotConfig_CulDSac         0
LotConfig_FR2             0
LotConfig_FR3             0
LotConfig_Inside          0
Neighborhood_Blueste      0
Neighborhood_BrDale       0
Neighborhood_BrkSide      0
Neighborhood_ClearCr      0
Neighborhood_CollgCr      0
Neighborhood_Crawfor      0
Neighborhood_Edwards      0
Neighborhood_Gilbert      0
Neighborhood_IDOTRR       0
Neighborhood_MeadowV      0
Neighborhood_Mitchel      0
Neighborhood_NAmes        0
Neighborhood_NPkVill      0
Neighborhood_NWAmes       0
Neighborhood_NoRidge      0
Neighborhood_NridgHt      0
Neighborhood_OldTown      0
Neighborhood_SWISU        0
Neighborhood_Sawyer       0
Neighborhood_SawyerW      0
Neighborhood_Somerst      0
Neighborhood_StoneBr