In [364]:
import warnings
warnings.filterwarnings('ignore')

# 1. Getting all the libraries

In [365]:
import numpy as np
import pandas as pd

from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.pipeline import Pipeline

# numerical feature engineering
from scipy.stats import skew
#categorical feature engineering
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import OneHotEncoder
# FeatureUnion
from sklearn.pipeline import FeatureUnion

# Model Selection
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import GridSearchCV

In [366]:
train = pd.read_csv('data/train.csv').drop(['Id'], axis=1)
print("Shape of training data:", train.shape)

Shape of training data: (1460, 80)


# 2. Data imputation/mannual engineering

## 2.1 Preprocessing

In [367]:
class Preprocessor(BaseEstimator, TransformerMixin):
    def __init__(self, *args, **kwargs):
        pass
    
    def fit(self, X, y=None):
        return self
    
    def transform(self, X, y=None, outlier=False):
        # take log of the SalePrice
        X['SalePrice'] = np.log(X['SalePrice'])
        
        # Get rid of Outliers
        if outlier:
            X = X[X.GrLivArea < 4000]

        # Alley : data description says NA means "no alley access"
        X.loc[:, "Alley"] = X.loc[:, "Alley"].fillna("None")
        # BedroomAbvGr : NA most likely means 0
        X.loc[:, "BedroomAbvGr"] = X.loc[:, "BedroomAbvGr"].fillna(0)
        # BsmtQual etc : data description says NA for basement features is "no basement"
        X.loc[:, "BsmtQual"] = X.loc[:, "BsmtQual"].fillna("No")
        X.loc[:, "BsmtCond"] = X.loc[:, "BsmtCond"].fillna("No")
        X.loc[:, "BsmtExposure"] = X.loc[:, "BsmtExposure"].fillna("No")
        X.loc[:, "BsmtFinType1"] = X.loc[:, "BsmtFinType1"].fillna("No")
        X.loc[:, "BsmtFinType2"] = X.loc[:, "BsmtFinType2"].fillna("No")
        X.loc[:, "BsmtFullBath"] = X.loc[:, "BsmtFullBath"].fillna(0)
        X.loc[:, "BsmtHalfBath"] = X.loc[:, "BsmtHalfBath"].fillna(0)
        X.loc[:, "BsmtUnfSF"] = X.loc[:, "BsmtUnfSF"].fillna(0)
        # CentralAir : NA most likely means No
        X.loc[:, "CentralAir"] = X.loc[:, "CentralAir"].fillna("N")
        # Condition : NA most likely means Normal
        X.loc[:, "Condition1"] = X.loc[:, "Condition1"].fillna("Norm")
        X.loc[:, "Condition2"] = X.loc[:, "Condition2"].fillna("Norm")
        # EnclosedPorch : NA most likely means no enclosed porch
        X.loc[:, "EnclosedPorch"] = X.loc[:, "EnclosedPorch"].fillna(0)
        # External stuff : NA most likely means average
        X.loc[:, "ExterCond"] = X.loc[:, "ExterCond"].fillna("TA")
        X.loc[:, "ExterQual"] = X.loc[:, "ExterQual"].fillna("TA")
        # Fence : data description says NA means "no fence"
        X.loc[:, "Fence"] = X.loc[:, "Fence"].fillna("No")
        # FireplaceQu : data description says NA means "no fireplace"
        X.loc[:, "FireplaceQu"] = X.loc[:, "FireplaceQu"].fillna("No")
        X.loc[:, "Fireplaces"] = X.loc[:, "Fireplaces"].fillna(0)
        # Functional : data description says NA means typical
        X.loc[:, "Functional"] = X.loc[:, "Functional"].fillna("Typ")
        # GarageType etc : data description says NA for garage features is "no garage"
        X.loc[:, "GarageType"] = X.loc[:, "GarageType"].fillna("No")
        X.loc[:, "GarageFinish"] = X.loc[:, "GarageFinish"].fillna("No")
        X.loc[:, "GarageQual"] = X.loc[:, "GarageQual"].fillna("No")
        X.loc[:, "GarageCond"] = X.loc[:, "GarageCond"].fillna("No")
        X.loc[:, "GarageArea"] = X.loc[:, "GarageArea"].fillna(0)
        X.loc[:, "GarageCars"] = X.loc[:, "GarageCars"].fillna(0)
        # HalfBath : NA most likely means no half baths above grade
        X.loc[:, "HalfBath"] = X.loc[:, "HalfBath"].fillna(0)
        # HeatingQC : NA most likely means typical
        X.loc[:, "HeatingQC"] = X.loc[:, "HeatingQC"].fillna("TA")
        # KitchenAbvGr : NA most likely means 0
        X.loc[:, "KitchenAbvGr"] = X.loc[:, "KitchenAbvGr"].fillna(0)
        # KitchenQual : NA most likely means typical
        X.loc[:, "KitchenQual"] = X.loc[:, "KitchenQual"].fillna("TA")
        # LotFrontage : NA most likely means no lot frontage
        X.loc[:, "LotFrontage"] = X.loc[:, "LotFrontage"].fillna(0)
        # LotShape : NA most likely means regular
        X.loc[:, "LotShape"] = X.loc[:, "LotShape"].fillna("Reg")
        # MasVnrType : NA most likely means no veneer
        X.loc[:, "MasVnrType"] = X.loc[:, "MasVnrType"].fillna("None")
        X.loc[:, "MasVnrArea"] = X.loc[:, "MasVnrArea"].fillna(0)
        # MiscFeature : data description says NA means "no misc feature"
        X.loc[:, "MiscFeature"] = X.loc[:, "MiscFeature"].fillna("No")
        X.loc[:, "MiscVal"] = X.loc[:, "MiscVal"].fillna(0)
        # OpenPorchSF : NA most likely means no open porch
        X.loc[:, "OpenPorchSF"] = X.loc[:, "OpenPorchSF"].fillna(0)
        # PavedDrive : NA most likely means not paved
        X.loc[:, "PavedDrive"] = X.loc[:, "PavedDrive"].fillna("N")
        # PoolQC : data description says NA means "no pool"
        X.loc[:, "PoolQC"] = X.loc[:, "PoolQC"].fillna("No")
        X.loc[:, "PoolArea"] = X.loc[:, "PoolArea"].fillna(0)
        # SaleCondition : NA most likely means normal sale
        X.loc[:, "SaleCondition"] = X.loc[:, "SaleCondition"].fillna("Normal")
        # ScreenPorch : NA most likely means no screen porch
        X.loc[:, "ScreenPorch"] = X.loc[:, "ScreenPorch"].fillna(0)
        # TotRmsAbvGrd : NA most likely means 0
        X.loc[:, "TotRmsAbvGrd"] = X.loc[:, "TotRmsAbvGrd"].fillna(0)
        # Utilities : NA most likely means all public utilities
        X.loc[:, "Utilities"] = X.loc[:, "Utilities"].fillna("AllPub")
        # WoodDeckSF : NA most likely means no wood deck
        X.loc[:, "WoodDeckSF"] = X.loc[:, "WoodDeckSF"].fillna(0)
        
        
        
        
        
        # Some numerical features are actually really categories
        X = X.replace({"MSSubClass" : {20 : "SC20", 30 : "SC30", 40 : "SC40", 45 : "SC45", 
                                       50 : "SC50", 60 : "SC60", 70 : "SC70", 75 : "SC75", 
                                       80 : "SC80", 85 : "SC85", 90 : "SC90", 120 : "SC120", 
                                       150 : "nan", 160 : "SC160", 180 : "SC180", 190 : "SC190"},
                       "MoSold" : {1 : "Jan", 2 : "Feb", 3 : "Mar", 4 : "Apr", 5 : "May", 6 : "Jun",
                                   7 : "Jul", 8 : "Aug", 9 : "Sep", 10 : "Oct", 11 : "Nov", 12 : "Dec"}
                      })
        
        
        
        
        # Encode some categorical features as ordered numbers when there is information in the order
        X = X.replace({"Alley" : {"Grvl" : 1, "Pave" : 2},
                       "BsmtCond" : {"No" : 0, "Po" : 1, "Fa" : 2, "TA" : 3, "Gd" : 4, "Ex" : 5},
                       "BsmtExposure" : {"No" : 0, "Mn" : 1, "Av": 2, "Gd" : 3},
                       "BsmtFinType1" : {"No" : 0, "Unf" : 1, "LwQ": 2, "Rec" : 3, "BLQ" : 4, 
                                         "ALQ" : 5, "GLQ" : 6},
                       "BsmtFinType2" : {"No" : 0, "Unf" : 1, "LwQ": 2, "Rec" : 3, "BLQ" : 4, 
                                         "ALQ" : 5, "GLQ" : 6},
                       "BsmtQual" : {"No" : 0, "Po" : 1, "Fa" : 2, "TA": 3, "Gd" : 4, "Ex" : 5},
                       "ExterCond" : {"Po" : 1, "Fa" : 2, "TA": 3, "Gd": 4, "Ex" : 5},
                       "ExterQual" : {"Po" : 1, "Fa" : 2, "TA": 3, "Gd": 4, "Ex" : 5},
                       "FireplaceQu" : {"No" : 0, "Po" : 1, "Fa" : 2, "TA" : 3, "Gd" : 4, "Ex" : 5},
                       "Functional" : {"Sal" : 1, "Sev" : 2, "Maj2" : 3, "Maj1" : 4, "Mod": 5, 
                                       "Min2" : 6, "Min1" : 7, "Typ" : 8},
                       "GarageCond" : {"No" : 0, "Po" : 1, "Fa" : 2, "TA" : 3, "Gd" : 4, "Ex" : 5},
                       "GarageQual" : {"No" : 0, "Po" : 1, "Fa" : 2, "TA" : 3, "Gd" : 4, "Ex" : 5},
                       "HeatingQC" : {"Po" : 1, "Fa" : 2, "TA" : 3, "Gd" : 4, "Ex" : 5},
                       "KitchenQual" : {"Po" : 1, "Fa" : 2, "TA" : 3, "Gd" : 4, "Ex" : 5},
                       "LandSlope" : {"Sev" : 1, "Mod" : 2, "Gtl" : 3},
                       "LotShape" : {"IR3" : 1, "IR2" : 2, "IR1" : 3, "Reg" : 4},
                       "PavedDrive" : {"N" : 0, "P" : 1, "Y" : 2},
                       "PoolQC" : {"No" : 0, "Fa" : 1, "TA" : 2, "Gd" : 3, "Ex" : 4},
                       "Street" : {"Grvl" : 1, "Pave" : 2},
                       "Utilities" : {"ELO" : 1, "NoSeWa" : 2, "NoSewr" : 3, "AllPub" : 4}}
                     )
        
        
        
        
        # Total number of bathrooms
        X["TotalBath"] = X["BsmtFullBath"] + (0.5 * X["BsmtHalfBath"]) + \
        X["FullBath"] + (0.5 * X["HalfBath"])
        # Total number of bathrooms
        X["TotalBath"] = X["BsmtFullBath"] + (0.5 * X["BsmtHalfBath"]) + \
        X["FullBath"] + (0.5 * X["HalfBath"])
        # Total SF for house (incl. basement)
        X["AllSF"] = X["GrLivArea"] + X["TotalBsmtSF"]
        # Total SF for 1st + 2nd floors
        X["AllFlrsSF"] = X["1stFlrSF"] + X["2ndFlrSF"]
        # Total SF for porch
        X["AllPorchSF"] = X["OpenPorchSF"] + X["EnclosedPorch"] + \
        X["3SsnPorch"] + X["ScreenPorch"]
        # Has masonry veneer or not
        X["HasMasVnr"] = X.MasVnrType.replace({"BrkCmn" : 1, "BrkFace" : 1, "CBlock" : 1, 
                                                       "Stone" : 1, "None" : 0})
        # House completed before sale or not
        X["BoughtOffPlan"] = X.SaleCondition.replace({"Abnorml" : 0, "Alloca" : 0, "AdjLand" : 0, 
                                                              "Family" : 0, "Normal" : 0, "Partial" : 1})
        
        #shuffle_index = np.random.permutation(X.shape[1], self.rd_state)
        
        return X #[shuffle_index]

## 2.2 Seperating Numerical and Categorical Features

In [368]:
train = pd.read_csv('data/train.csv').drop(['Id'], axis=1)
print("Shape of training data:", train.shape)

P = Preprocessor()
train = P.transform(train, outlier=False)

# Differentiate numerical features (minus the target) and categorical features
categorical_features = train.select_dtypes(include = ["object"]).columns
numerical_features = train.select_dtypes(exclude = ["object"]).columns

print("Numerical features : " + str(len(numerical_features)))
print("Categorical features : " + str(len(categorical_features)))
print("Shape of training data post processing:", train.shape)

Shape of training data: (1460, 80)
Numerical features : 60
Categorical features : 26
Shape of training data post processing: (1460, 86)


In [369]:
# go from pd.dataFrame to np.array
class DataFrameSelector(BaseEstimator, TransformerMixin):
    def __init__(self, attribute_names, method=None, dtype='array'):
        self.method = method
        self.attribute_names = attribute_names
        self.dtype = dtype
        
    def fit(self, X, y=None):
        return self
    
    def transform(self, X, y=None):
        if self.method == 'all':
            attribute_names = X.columns
        elif self.method == 'all-drop':
            attribute_names = X.columns.drop(['SalePrice'])
        elif self.method == 'drop':
            attribute_names = self.attribute_names.columns.drop(['SalePrice'])
        else:
            attribute_names = self.attribute_names
            
        if self.dtype == 'array':
            return X[attribute_names].values
        else:
            return X[attribute_names]

## 2.3 Numerical Features Engineering (Everything is in np.array)
### 2.3.1 Fill NA's in numerical features

In [370]:
class FillNAN(BaseEstimator, TransformerMixin):
    def __init__(self, *args, **kwargs):    
        pass
    
    def fit(self, X, y=None):
        self.median = X.median()
        return self
    
    def transform(self, X, y=None):
        return X.fillna(self.median)

In [371]:
# Handle remaining missing values for numerical features by using median as replacement
X_train_num = train[numerical_features]
F = FillNAN()
X_train_num = F.fit_transform(X_train_num)

print("Remaining NAs for numerical features in train : " + str(X_train_num.isnull().values.sum()))

Remaining NAs for numerical features in train : 0


### 1.3.2 Add top k quadratic + top k cubic + top k square roots + top k inverses; k=20 default

In [372]:
class AddNonlin(BaseEstimator, TransformerMixin):
    def __init__(self, k=10, *args, **kwargs):
        self.top_indices = [] # order is [original, quad, cube, sqrt, invr, quad_diag, cube_diag]
        self.k = k
    
    def fit(self, X, y=None):
        #X = X.copy()
        corr = np.abs(X.corr())
        corr.sort_values(["SalePrice"], ascending = False, inplace = True)
        self.top_indices.append(corr["SalePrice"].index)
        
        
        y = X[['SalePrice']]
        X = X.drop(['SalePrice'], axis=1)
        cols = X.columns
        
        
        # sqrt and inverse
        sqrt = np.sqrt(np.abs(X))
        invr = 1/(1+X)
        
        # quadratic computation with all possible features in X
        quad = pd.DataFrame()
        n = len(X.columns)
        for i in range(n):
            for j in range(i, n):
                quad[cols[i] + '-' + cols[j]] = X[cols[i]] * X[cols[j]]
        
        #print('hi2')
        # cubic computation, only compute cubic terms from top 10 original features
        cube = pd.DataFrame()
        #print(self.top_indices[0])
        #print(self.top_indices[0])
        #print(X.columns)
        X = X[self.top_indices[0][1:self.k+1]]
        cols = X.columns
        n = len(X.columns)
        
        #print('hi')
        for i in range(n):
            for j in range(i, n):
                for k in range(j, n):
                    cube[cols[i] + '-' + cols[j] + '-' + cols[k]] = X[cols[i]] * X[cols[j]] * X[cols[k]]
        
        
        for data in [quad, cube, sqrt, invr]:
            data = pd.concat([data, y], axis=1)
            #print(data.columns)
            corr = np.abs(data.corr())
            corr.sort_values(["SalePrice"], ascending = False, inplace = True)
            self.top_indices.append(corr["SalePrice"].index[1:self.k+1])
        
        
        return self
        

    def transform(self, X, y=None):
        quad = pd.DataFrame()
        cube = pd.DataFrame()
        sqrt = np.sqrt(X[self.top_indices[3]])
        sqrt.columns = sqrt.columns + '-sqrt'
        invr = 1/(1+X[self.top_indices[4]])
        invr.columns = invr.columns + '-invr'

        
        for ind in self.top_indices[1]:
            a, b = ind.split('-')
            quad[a + '-' + b] = X[a] * X[b]
        
        for ind in self.top_indices[2]:
            a, b, c = ind.split('-')
            cube[a + '-' + b + '-' + c] = X[a] * X[b] * X[c]
        
        #print('X.columns', X.columns)
        #print(X.columns[self.k_original+1])
        
        
        return pd.concat([X, quad, cube, sqrt, invr], axis=1)

In [373]:
N = AddNonlin()
t = N.fit_transform(train[numerical_features])

### 2.3.3 Skewness Correction

In [374]:
class Unskew(BaseEstimator, TransformerMixin):
    def __init__(self, *args, **kwargs):
        pass
    
    def fit(self, X, y=None):
        self.skewness = X.apply(lambda x: skew(x))
        self.skewness = self.skewness[abs(self.skewness) > 0.5]
        self.skewed_features = self.skewness.index
        
        return self
    
    
    def transform(self, X, y=None):
        X[self.skewed_features] = np.log1p(X[self.skewed_features])
        return X

### 1.3.X Numerical Feature Engineering Pipeline

In [375]:
num_feature_eng_pipeline = Pipeline([
    ('selec_num', DataFrameSelector(numerical_features, dtype='df')), # input and output types = dataFrame
    ('nonlinearity', AddNonlin(k=20)), # this need target to fit cann't drop SalePrice yet
    ('fillnan', FillNAN()), # input and output types = dataFrame
    ('drop_SalePrice', DataFrameSelector(numerical_features, method='all-drop', dtype='df')), 
    # input and output types = dataFrame
    # SalePrice is dropped
    # everything 
    ('unskew', Unskew()),
    #('selector_array', DataFrameSelector(numerical_features, method='all', dtype='array')),
])

In [376]:
X = num_feature_eng_pipeline.fit_transform(train)

## 2.4 Categorical Features Pipeline

### 2.4.1 Fill missing values by most frequent values

In [377]:
class FillCatNan(BaseEstimator, TransformerMixin):
    def __init__(self, *args, **kwargs):
        self.imputer = SimpleImputer(missing_values='nan', strategy='most_frequent')
    
    def fit(self, X, y=None):
        self.imputer.fit(X.astype(str))
        return self
    
    def transform(self, X, y=None):
        return self.imputer.transform(X.astype(str))

### 2.4.2 Encoding

In [378]:
class MyLabelEncoder(BaseEstimator, TransformerMixin):
    def __init__(self, *args, **kwargs):
        self.encoders = []
    
    def fit(self, x, y=0):
        X = x.transpose()
        self.encoders = []
        
        
        for i in range(X.shape[0]):
            l = LabelEncoder()
            l.fit(X[i])
            self.encoders.append(l)
            
        return self
    
    def transform(self, x, y=0):
        X = x.transpose()
        for i, l in enumerate(self.encoders):
            #print(i)
            X[i] = l.transform(X[i])
        
        return X.transpose()

### 1.4.3 OneHot

In [379]:
class MyOneHotEncoder(BaseEstimator, TransformerMixin):
    def __init__(self, *args, **kwargs):
        self.onehot = OneHotEncoder()
    
    def fit(self, X, y=None):
        self.onehot.fit(X)
        return self
    
    def transform(self, X, y=None):
        return self.onehot.transform(X).toarray()

### 2.4.X Categorical Pipeline

In [380]:
cat_feature_eng_pipeline = Pipeline([
    ('selec_cat', DataFrameSelector(categorical_features, dtype='df')), # i/o type = df
    ('fill_by_mode', FillCatNan()), # output is array
    ('encoder', MyLabelEncoder()),
    ('onehot', MyOneHotEncoder()),
])

In [381]:
#c = cat_feature_eng_pipeline.fit_transform(train)
#c.shape

## 2.5 FeatureUnion

In [382]:
full_pipeline = FeatureUnion(transformer_list=[
    ("num_feature_eng_pipeline", num_feature_eng_pipeline),
    ("cat_feature_eng_pipeline", cat_feature_eng_pipeline),
])

housing_prepared = full_pipeline.fit_transform(train)

In [383]:
housing_prepared.shape

(1460, 342)

## 2.5 Test Case Transform

In [384]:
test = pd.read_csv('data/test.csv').drop(['Id'], axis=1)
test = pd.concat([test, pd.DataFrame({'SalePrice' : [0]})]).iloc[:-1,:]
test['SalePrice'] = 1
print(test.shape)

(1459, 80)


In [385]:
test = P.transform(test, outlier=False)
print(test.shape)

(1459, 86)


In [386]:
test = full_pipeline.transform(test)

# 3 Model Selection

In [387]:
y = train['SalePrice'].values

shuffle_index = np.random.permutation(y.shape[0])
housing_prepared, y = housing_prepared[shuffle_index], y[shuffle_index]

In [388]:
def score_model(model, cv=10):
    reg_score = cross_val_score(model, housing_prepared, y, scoring='neg_mean_squared_error', cv=cv)
    s = np.sqrt(-reg_score)
    print('scores:', s)
    print('mean score:', s.mean())
    return s

In [389]:
def model_predict(model):
    model.fit(housing_prepared, y)
    ypred = np.exp(model.predict(test))
    Id = list(range(1461, 2920))
    output = pd.DataFrame({'Id': Id, 'SalePrice' : ypred})
    with open('submit.csv', 'w') as f:
        f.write(output.to_csv(index=False))

## 3.1 Linear Models

### 3.1.1 Linear Regression

In [390]:
from sklearn.linear_model import LinearRegression
lin_reg_scores = score_model(LinearRegression())

scores: [0.13007893 9.70177899 0.14619136 0.15517907 0.1770308  0.12441692
 0.14942105 0.21484887 1.24738044 0.106041  ]
mean score: 1.2152367439796685


In [391]:
model_predict(LinearRegression())

### 3.1.2 l1 Regularized

In [392]:
from sklearn.linear_model import LassoCV

In [393]:
lasso = LassoCV(alphas=[0.0001, 0.0002, 0.0004, 0.0006, 0.0008, 0.001, 0.002, 
                          0.004, 0.006, 0.008, 0.01, 0.02, 0.04, 0.06, 0.08, 0.1, 0.5, 1, 2,5,10], cv=10)
lasso_reg_sores = score_model(lasso)

scores: [0.11371078 0.12116999 0.10540151 0.12282519 0.12484305 0.11190368
 0.13169205 0.16582415 0.15029146 0.08782647]
mean score: 0.123548832607889


In [394]:
model_predict(lasso)

### 3.1.3 l2 Regularized

In [395]:
from sklearn.linear_model import RidgeCV
ridge = RidgeCV(alphas=[0.01, 0.02, 0.04, 0.06, 0.1, 0.5, 1, 2,5,10, 20, 22, 24, 26, 28, 30, 32], cv=10)
ridge_reg_sores = score_model(ridge)

scores: [0.12193188 0.12400835 0.11106818 0.13081762 0.12067149 0.11464279
 0.13563227 0.16978367 0.15303497 0.08963942]
mean score: 0.12712306285546782


### 3.1.4 l1 and l2 Regularized

In [396]:
from sklearn.linear_model import ElasticNetCV

In [397]:
alphas = [0.0001, 0.0002, 0.0004, 0.0006, 0.0008, 0.001, 0.002, 
                          0.004, 0.006, 0.008, 0.01, 0.02, 0.04, 0.06, 0.08, 0.1, 0.5, 0.8, 0.9, 0.95, 1, 1.1, 2,5,10] 
elasticNet = ElasticNetCV(l1_ratio = [0.1, 0.3, 0.5, 0.6, 0.7, 0.8, 0.85, 0.9, 0.95, 1],
                          alphas = alphas, 
                          max_iter = 50000, cv = 2)

In [398]:
elasticNet_score = score_model(elasticNet, cv=2)

scores: [0.12364123 0.13417067]
mean score: 0.12890594926064158


## 3.2 Random Forest

In [399]:
from sklearn.ensemble import RandomForestRegressor

rf_reg_scores = score_model(RandomForestRegressor())

scores: [0.14049716 0.14224561 0.13679382 0.14391492 0.14234535 0.12344555
 0.14687628 0.14292183 0.16881085 0.12549851]
mean score: 0.14133498719412493
