# <center> Feature Engineering 4

**Summary of Action**
* Dummy Value Creation
* Apply Stanard Scaler on Numeric Values
* Cutting and Binning Feature with Large Amounts of Categorical Values

### Import Preliminaries

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import LabelEncoder
from sklearn.feature_selection import RFECV
import warnings


train_df = pd.read_csv('https://www.dropbox.com/s/nqftja8aa01z4mn/cleaned_train_df.csv?dl=1', index_col='Id')
test_df = pd.read_csv('https://www.dropbox.com/s/gw7fi8o71kczq6q/cleaned_test_df.csv?dl=1', index_col='Id')
dfs = [train_df, test_df]

#Set pandas optio
pd.set_option('max_columns',1000)
pd.set_option('max_rows',1000)
warnings.filterwarnings("ignore")

dfs = [train_df, test_df]
train_object_columns = train_df.select_dtypes('object').columns
test_object_columns = test_df.select_dtypes('object').columns
numeric_columns = train_df.select_dtypes(['int64','float64']).columns

In [2]:
print('Training Dataframe:', train_df.shape)
print('Test Dataframe:', test_df.shape)

Training Dataframe: (1460, 80)
Test Dataframe: (1459, 79)


### Label Encode Qantative Values

In [3]:
# concat dataframes for labeling
all_data = pd.concat([train_df, test_df], axis=0)

object_columns = list(all_data.select_dtypes('object').columns)
le = LabelEncoder() 

for col in all_data[object_columns]:
    all_data[col] = le.fit_transform(all_data[col])
    
train_df = all_data[:1460]
test_df = all_data[1460:].drop('SalePrice', axis=1)

### Cut Categorical Featurers

In [4]:
for col in train_df[train_object_columns]:
  train_df[col] = pd.cut(train_df[col].copy(), 5, 
                         labels=["good", "gm" ,"medium", "mb", "bad"])
  
for col in test_df[test_object_columns]:
  test_df[col] = pd.cut(test_df[col].copy(), 5, 
                         labels=["good", "gm" ,"medium", "mb", "bad"])

### Replace Outliers with the Median Value

In [5]:
for df in dfs:
  for col in df:
    if df[col].dtype == 'int64' or df[col].dtype == 'float64':
      for value in df[col]:
        if value > df[col].quantile(.80): value = df[col].median()
        if value < df[col].quantile(.20): value = df[col].median()

### Create Dummy Variables

In [6]:
# concat dataframes for labeling
all_data = pd.concat([train_df, test_df], axis=0)

# create dummy variables for the application
all_data = pd.get_dummies(all_data)

train_df = all_data[:1460]
test_df = all_data[1460:].drop('SalePrice', axis=1)

In [7]:
train_df.head()

Unnamed: 0_level_0,1stFlrSF,2ndFlrSF,3SsnPorch,BedroomAbvGr,BsmtFinSF1,BsmtFinSF2,BsmtFullBath,BsmtHalfBath,BsmtUnfSF,EnclosedPorch,Fireplaces,FullBath,GarageArea,GarageCars,GarageYrBlt,GrLivArea,HalfBath,KitchenAbvGr,LotArea,LotFrontage,LowQualFinSF,MSSubClass,MasVnrArea,MiscVal,MoSold,OpenPorchSF,OverallCond,OverallQual,PoolArea,SalePrice,ScreenPorch,TotRmsAbvGrd,TotalBsmtSF,WoodDeckSF,YearBuilt,YearRemodAdd,YrSold,Alley_good,Alley_gm,Alley_medium,Alley_mb,Alley_bad,BldgType_good,BldgType_gm,BldgType_medium,BldgType_mb,BldgType_bad,BsmtCond_good,BsmtCond_gm,BsmtCond_medium,BsmtCond_mb,BsmtCond_bad,BsmtExposure_good,BsmtExposure_gm,BsmtExposure_medium,BsmtExposure_mb,BsmtExposure_bad,BsmtFinType1_good,BsmtFinType1_gm,BsmtFinType1_medium,BsmtFinType1_mb,BsmtFinType1_bad,BsmtFinType2_good,BsmtFinType2_gm,BsmtFinType2_medium,BsmtFinType2_mb,BsmtFinType2_bad,BsmtQual_good,BsmtQual_gm,BsmtQual_medium,BsmtQual_mb,BsmtQual_bad,CentralAir_good,CentralAir_gm,CentralAir_medium,CentralAir_mb,CentralAir_bad,Condition1_good,Condition1_gm,Condition1_medium,Condition1_mb,Condition1_bad,Condition2_good,Condition2_gm,Condition2_medium,Condition2_mb,Condition2_bad,Electrical_good,Electrical_gm,Electrical_medium,Electrical_mb,Electrical_bad,ExterCond_good,ExterCond_gm,ExterCond_medium,ExterCond_mb,ExterCond_bad,ExterQual_good,ExterQual_gm,ExterQual_medium,ExterQual_mb,ExterQual_bad,Exterior1st_good,Exterior1st_gm,Exterior1st_medium,Exterior1st_mb,Exterior1st_bad,Exterior2nd_good,Exterior2nd_gm,Exterior2nd_medium,Exterior2nd_mb,Exterior2nd_bad,Fence_good,Fence_gm,Fence_medium,Fence_mb,Fence_bad,FireplaceQu_good,FireplaceQu_gm,FireplaceQu_medium,FireplaceQu_mb,FireplaceQu_bad,Foundation_good,Foundation_gm,Foundation_medium,Foundation_mb,Foundation_bad,Functional_good,Functional_gm,Functional_medium,Functional_mb,Functional_bad,GarageCond_good,GarageCond_gm,GarageCond_medium,GarageCond_mb,GarageCond_bad,GarageFinish_good,GarageFinish_gm,GarageFinish_medium,GarageFinish_mb,GarageFinish_bad,GarageQual_good,GarageQual_gm,GarageQual_medium,GarageQual_mb,GarageQual_bad,GarageType_good,GarageType_gm,GarageType_medium,GarageType_mb,GarageType_bad,Heating_good,Heating_gm,Heating_medium,Heating_mb,Heating_bad,HeatingQC_good,HeatingQC_gm,HeatingQC_medium,HeatingQC_mb,HeatingQC_bad,HouseStyle_good,HouseStyle_gm,HouseStyle_medium,HouseStyle_mb,HouseStyle_bad,KitchenQual_good,KitchenQual_gm,KitchenQual_medium,KitchenQual_mb,KitchenQual_bad,LandContour_good,LandContour_gm,LandContour_medium,LandContour_mb,LandContour_bad,LandSlope_good,LandSlope_gm,LandSlope_medium,LandSlope_mb,LandSlope_bad,LotConfig_good,LotConfig_gm,LotConfig_medium,LotConfig_mb,LotConfig_bad,LotShape_good,LotShape_gm,LotShape_medium,LotShape_mb,LotShape_bad,MSZoning_good,MSZoning_gm,MSZoning_medium,MSZoning_mb,MSZoning_bad,MasVnrType_good,MasVnrType_gm,MasVnrType_medium,MasVnrType_mb,MasVnrType_bad,MiscFeature_good,MiscFeature_gm,MiscFeature_medium,MiscFeature_mb,MiscFeature_bad,Neighborhood_good,Neighborhood_gm,Neighborhood_medium,Neighborhood_mb,Neighborhood_bad,PavedDrive_good,PavedDrive_gm,PavedDrive_medium,PavedDrive_mb,PavedDrive_bad,PoolQC_good,PoolQC_gm,PoolQC_medium,PoolQC_mb,PoolQC_bad,RoofMatl_good,RoofMatl_gm,RoofMatl_medium,RoofMatl_mb,RoofMatl_bad,RoofStyle_good,RoofStyle_gm,RoofStyle_medium,RoofStyle_mb,RoofStyle_bad,SaleCondition_good,SaleCondition_gm,SaleCondition_medium,SaleCondition_mb,SaleCondition_bad,SaleType_good,SaleType_gm,SaleType_medium,SaleType_mb,SaleType_bad,Street_good,Street_gm,Street_medium,Street_mb,Street_bad,Utilities_good,Utilities_gm,Utilities_medium,Utilities_mb,Utilities_bad
Id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1,Unnamed: 82_level_1,Unnamed: 83_level_1,Unnamed: 84_level_1,Unnamed: 85_level_1,Unnamed: 86_level_1,Unnamed: 87_level_1,Unnamed: 88_level_1,Unnamed: 89_level_1,Unnamed: 90_level_1,Unnamed: 91_level_1,Unnamed: 92_level_1,Unnamed: 93_level_1,Unnamed: 94_level_1,Unnamed: 95_level_1,Unnamed: 96_level_1,Unnamed: 97_level_1,Unnamed: 98_level_1,Unnamed: 99_level_1,Unnamed: 100_level_1,Unnamed: 101_level_1,Unnamed: 102_level_1,Unnamed: 103_level_1,Unnamed: 104_level_1,Unnamed: 105_level_1,Unnamed: 106_level_1,Unnamed: 107_level_1,Unnamed: 108_level_1,Unnamed: 109_level_1,Unnamed: 110_level_1,Unnamed: 111_level_1,Unnamed: 112_level_1,Unnamed: 113_level_1,Unnamed: 114_level_1,Unnamed: 115_level_1,Unnamed: 116_level_1,Unnamed: 117_level_1,Unnamed: 118_level_1,Unnamed: 119_level_1,Unnamed: 120_level_1,Unnamed: 121_level_1,Unnamed: 122_level_1,Unnamed: 123_level_1,Unnamed: 124_level_1,Unnamed: 125_level_1,Unnamed: 126_level_1,Unnamed: 127_level_1,Unnamed: 128_level_1,Unnamed: 129_level_1,Unnamed: 130_level_1,Unnamed: 131_level_1,Unnamed: 132_level_1,Unnamed: 133_level_1,Unnamed: 134_level_1,Unnamed: 135_level_1,Unnamed: 136_level_1,Unnamed: 137_level_1,Unnamed: 138_level_1,Unnamed: 139_level_1,Unnamed: 140_level_1,Unnamed: 141_level_1,Unnamed: 142_level_1,Unnamed: 143_level_1,Unnamed: 144_level_1,Unnamed: 145_level_1,Unnamed: 146_level_1,Unnamed: 147_level_1,Unnamed: 148_level_1,Unnamed: 149_level_1,Unnamed: 150_level_1,Unnamed: 151_level_1,Unnamed: 152_level_1,Unnamed: 153_level_1,Unnamed: 154_level_1,Unnamed: 155_level_1,Unnamed: 156_level_1,Unnamed: 157_level_1,Unnamed: 158_level_1,Unnamed: 159_level_1,Unnamed: 160_level_1,Unnamed: 161_level_1,Unnamed: 162_level_1,Unnamed: 163_level_1,Unnamed: 164_level_1,Unnamed: 165_level_1,Unnamed: 166_level_1,Unnamed: 167_level_1,Unnamed: 168_level_1,Unnamed: 169_level_1,Unnamed: 170_level_1,Unnamed: 171_level_1,Unnamed: 172_level_1,Unnamed: 173_level_1,Unnamed: 174_level_1,Unnamed: 175_level_1,Unnamed: 176_level_1,Unnamed: 177_level_1,Unnamed: 178_level_1,Unnamed: 179_level_1,Unnamed: 180_level_1,Unnamed: 181_level_1,Unnamed: 182_level_1,Unnamed: 183_level_1,Unnamed: 184_level_1,Unnamed: 185_level_1,Unnamed: 186_level_1,Unnamed: 187_level_1,Unnamed: 188_level_1,Unnamed: 189_level_1,Unnamed: 190_level_1,Unnamed: 191_level_1,Unnamed: 192_level_1,Unnamed: 193_level_1,Unnamed: 194_level_1,Unnamed: 195_level_1,Unnamed: 196_level_1,Unnamed: 197_level_1,Unnamed: 198_level_1,Unnamed: 199_level_1,Unnamed: 200_level_1,Unnamed: 201_level_1,Unnamed: 202_level_1,Unnamed: 203_level_1,Unnamed: 204_level_1,Unnamed: 205_level_1,Unnamed: 206_level_1,Unnamed: 207_level_1,Unnamed: 208_level_1,Unnamed: 209_level_1,Unnamed: 210_level_1,Unnamed: 211_level_1,Unnamed: 212_level_1,Unnamed: 213_level_1,Unnamed: 214_level_1,Unnamed: 215_level_1,Unnamed: 216_level_1,Unnamed: 217_level_1,Unnamed: 218_level_1,Unnamed: 219_level_1,Unnamed: 220_level_1,Unnamed: 221_level_1,Unnamed: 222_level_1,Unnamed: 223_level_1,Unnamed: 224_level_1,Unnamed: 225_level_1,Unnamed: 226_level_1,Unnamed: 227_level_1,Unnamed: 228_level_1,Unnamed: 229_level_1,Unnamed: 230_level_1,Unnamed: 231_level_1,Unnamed: 232_level_1,Unnamed: 233_level_1,Unnamed: 234_level_1,Unnamed: 235_level_1,Unnamed: 236_level_1,Unnamed: 237_level_1,Unnamed: 238_level_1,Unnamed: 239_level_1,Unnamed: 240_level_1,Unnamed: 241_level_1,Unnamed: 242_level_1,Unnamed: 243_level_1,Unnamed: 244_level_1,Unnamed: 245_level_1,Unnamed: 246_level_1,Unnamed: 247_level_1,Unnamed: 248_level_1,Unnamed: 249_level_1,Unnamed: 250_level_1,Unnamed: 251_level_1,Unnamed: 252_level_1
1,856,854,0,3,706.0,0.0,1.0,0.0,150.0,0,0,2,548.0,2.0,2003.0,1710,1,1,8450,65.0,0,60,196.0,0,2,61,5,7,0,208500.0,0,8,856.0,0,2003,2003,2008,0,0,0,0,1,1,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,1,0,1,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,1,0,0,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0,0,1,0,0,1,0,0,0,0,0,0,1,0,1,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,1,0,0,1,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0,0,0,1,1,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,1,1,0,0,0,0
2,1262,0,0,3,978.0,0.0,0.0,1.0,284.0,0,1,2,460.0,2.0,1976.0,1262,0,1,9600,80.0,0,20,0.0,0,5,0,8,6,0,181500.0,0,6,1262.0,298,1976,1976,2007,0,0,0,0,1,1,0,0,0,0,0,0,0,1,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,1,1,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,1,0,0,1,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,1,0,1,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,1,0,0,0,0,0,0,1,0,1,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,1,1,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,1,0,0,0,0,1,1,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,1,1,0,0,0,0
3,920,866,0,3,486.0,0.0,1.0,0.0,434.0,0,1,2,608.0,2.0,2001.0,1786,1,1,11250,68.0,0,60,162.0,0,9,42,5,7,0,223500.0,0,6,920.0,0,2001,2002,2008,0,0,0,0,1,1,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,1,0,1,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,1,0,0,0,1,0,0,1,0,0,0,0,0,0,0,1,0,0,0,1,0,0,1,0,0,0,0,0,0,1,0,1,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0,0,0,1,1,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,1,1,0,0,0,0
4,961,756,0,3,216.0,0.0,1.0,0.0,540.0,272,1,1,642.0,3.0,1998.0,1717,0,1,9550,60.0,0,70,0.0,0,2,35,5,7,0,140000.0,0,7,756.0,0,1915,1970,2006,0,0,0,0,1,1,0,0,0,0,0,1,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,1,0,1,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,1,0,0,0,0,1,0,0,0,0,1,0,1,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,1,1,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,1,1,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0,0,0,1,1,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,1,1,0,0,0,0
5,1145,1053,0,4,655.0,0.0,1.0,0.0,490.0,0,1,2,836.0,3.0,2000.0,2198,1,1,14260,84.0,0,60,350.0,0,12,84,5,8,0,250000.0,0,9,1145.0,192,2000,2000,2008,0,0,0,0,1,1,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,1,0,1,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,1,0,0,0,1,0,0,1,0,0,0,0,0,0,0,1,0,0,0,1,0,0,1,0,0,0,0,0,0,1,0,1,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,1,1,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,1,0,0,0,0,1,1,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,1,1,0,0,0,0


### Export DataFrames

In [8]:
train_df.to_csv('Data/featured_train_df.csv', index=True)
test_df.to_csv('Data/featured_test_df.csv', index=True)