# Modeling & Evaluation

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')
%matplotlib inline
plt.style.use('ggplot')
from sklearn.base import BaseEstimator, TransformerMixin, RegressorMixin, clone
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import RobustScaler, StandardScaler
from sklearn.metrics import mean_squared_error
from sklearn.pipeline import Pipeline, make_pipeline
from scipy.stats import skew
from sklearn.decomposition import PCA, KernelPCA
from sklearn.preprocessing import Imputer
from sklearn.model_selection import cross_val_score, GridSearchCV, KFold
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import Ridge
from sklearn.linear_model import Lasso
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor, ExtraTreesRegressor
from sklearn.svm import SVR, LinearSVR
from sklearn.linear_model import ElasticNet, SGDRegressor, BayesianRidge
from sklearn.kernel_ridge import KernelRidge
from xgboost import XGBRegressor

In [2]:
X_train = pd.read_csv("dataset/X_train.csv", header=None)
X_test = pd.read_csv("dataset/X_test.csv", header=None)
y_train = pd.read_csv("dataset/y_train.csv", header=None)
y_train_log = np.log(y_train)
X_train.shape, y_train_log.shape

((1458, 263), (1458, 1))

In [3]:
def rmse_cv(model,X,y):
    rmse = np.sqrt(-cross_val_score(model,
                                    X, y, 
                                    scoring="neg_mean_squared_error", 
                                    cv=5))
    return rmse


In [4]:
models = [LinearRegression(),
          Ridge(),Lasso(alpha=0.01,max_iter=10000),
          RandomForestRegressor(),GradientBoostingRegressor(),SVR(),
          LinearSVR(),ElasticNet(alpha=0.001,max_iter=10000),
          SGDRegressor(max_iter=1000,tol=1e-3),BayesianRidge(),
          KernelRidge(alpha=0.6, kernel='polynomial', degree=2, coef0=2.5),
          ExtraTreesRegressor(),XGBRegressor()]

names = ["LR", "Ridge", "Lasso", "RF", "GBR", "SVR", 
         "LinSVR", "Ela","SGD","Bay","Ker","Extra","Xgb"]

for name, model in zip(names, models):
    score = rmse_cv(model, X_train, y_train_log)
    print("{}: {:.6f}, {:.4f}".format(name,score.mean(),score.std()))

LR: 0.128964, 0.0125
Ridge: 0.117950, 0.0087
Lasso: 0.120817, 0.0044
RF: 0.156658, 0.0033
GBR: 0.134734, 0.0056
SVR: 0.120410, 0.0081
LinSVR: 0.134249, 0.0184
Ela: 0.109980, 0.0058
SGD: 173548898142.171478, 124405681354.2683
Bay: 0.110258, 0.0057
Ker: 0.109920, 0.0060
Extra: 0.146473, 0.0043
Xgb: 0.136239, 0.0046


In [5]:
class grid():
    def __init__(self,model):
        self.model = model
    
    def grid_get(self,X,y,param_grid):
        grid_search = GridSearchCV(self.model,param_grid,cv=5,
                                   scoring="neg_mean_squared_error")
        grid_search.fit(X,y)
        print(grid_search.best_params_, np.sqrt(-grid_search.best_score_))
        grid_search.cv_results_['mean_test_score'] = np.sqrt(
            -grid_search.cv_results_['mean_test_score'])
        print(pd.DataFrame(
            grid_search.cv_results_)[['params',
                                      'mean_test_score',
                                      'std_test_score']])

In [6]:
grid(Lasso()).grid_get(X_train,y_train_log,
                       {'alpha': [0.0002,0.0004,0.0006,
                                  0.0008,0.0005,0.0007],
                        'max_iter':[10000]})

{'alpha': 0.0005, 'max_iter': 10000} 0.11016370036657303
                                 params  mean_test_score  std_test_score
0  {'alpha': 0.0002, 'max_iter': 10000}         0.111465        0.001671
1  {'alpha': 0.0004, 'max_iter': 10000}         0.110241        0.001391
2  {'alpha': 0.0006, 'max_iter': 10000}         0.110232        0.001243
3  {'alpha': 0.0008, 'max_iter': 10000}         0.110341        0.001128
4  {'alpha': 0.0005, 'max_iter': 10000}         0.110164        0.001312
5  {'alpha': 0.0007, 'max_iter': 10000}         0.110305        0.001180


In [7]:
grid(Ridge()).grid_get(
    X_train,y_train_log,{'alpha':range(62, 69)})

{'alpha': 67} 0.10994676999065356
          params  mean_test_score  std_test_score
0  {'alpha': 62}         0.109950        0.001128
1  {'alpha': 63}         0.109949        0.001127
2  {'alpha': 64}         0.109948        0.001125
3  {'alpha': 65}         0.109947        0.001123
4  {'alpha': 66}         0.109947        0.001121
5  {'alpha': 67}         0.109947        0.001119
6  {'alpha': 68}         0.109947        0.001118


In [8]:
grid(SVR()).grid_get(
    X_train,y_train_log,{'C':[8, 9, 10],
                    'kernel':["rbf"],
                    "gamma":[0.0004, 0.0005, 0.0006],
                    "epsilon":[0.009, 0.01]})

{'C': 9, 'epsilon': 0.01, 'gamma': 0.0004, 'kernel': 'rbf'} 0.10781024834469576
                                               params  mean_test_score  \
0   {'C': 8, 'epsilon': 0.009, 'gamma': 0.0004, 'k...         0.107904   
1   {'C': 8, 'epsilon': 0.009, 'gamma': 0.0005, 'k...         0.108056   
2   {'C': 8, 'epsilon': 0.009, 'gamma': 0.0006, 'k...         0.108493   
3   {'C': 8, 'epsilon': 0.01, 'gamma': 0.0004, 'ke...         0.107866   
4   {'C': 8, 'epsilon': 0.01, 'gamma': 0.0005, 'ke...         0.108039   
5   {'C': 8, 'epsilon': 0.01, 'gamma': 0.0006, 'ke...         0.108476   
6   {'C': 9, 'epsilon': 0.009, 'gamma': 0.0004, 'k...         0.107823   
7   {'C': 9, 'epsilon': 0.009, 'gamma': 0.0005, 'k...         0.108267   
8   {'C': 9, 'epsilon': 0.009, 'gamma': 0.0006, 'k...         0.108765   
9   {'C': 9, 'epsilon': 0.01, 'gamma': 0.0004, 'ke...         0.107810   
10  {'C': 9, 'epsilon': 0.01, 'gamma': 0.0005, 'ke...         0.108220   
11  {'C': 9, 'epsilon': 0.01, 'g

In [9]:
param_grid={'alpha':[0.2,0.3,0.4,0.5], 'kernel':["polynomial"],
            'degree':[3],'coef0':[0.8,1,1.2]}
grid(KernelRidge()).grid_get(X_train,y_train_log,param_grid)

{'alpha': 0.4, 'coef0': 1.2, 'degree': 3, 'kernel': 'polynomial'} 0.11451167630519105
                                               params  mean_test_score  \
0   {'alpha': 0.2, 'coef0': 0.8, 'degree': 3, 'ker...         0.119414   
1   {'alpha': 0.2, 'coef0': 1, 'degree': 3, 'kerne...         0.116243   
2   {'alpha': 0.2, 'coef0': 1.2, 'degree': 3, 'ker...         0.115670   
3   {'alpha': 0.3, 'coef0': 0.8, 'degree': 3, 'ker...         0.120924   
4   {'alpha': 0.3, 'coef0': 1, 'degree': 3, 'kerne...         0.116115   
5   {'alpha': 0.3, 'coef0': 1.2, 'degree': 3, 'ker...         0.114791   
6   {'alpha': 0.4, 'coef0': 0.8, 'degree': 3, 'ker...         0.122820   
7   {'alpha': 0.4, 'coef0': 1, 'degree': 3, 'kerne...         0.116526   
8   {'alpha': 0.4, 'coef0': 1.2, 'degree': 3, 'ker...         0.114512   
9   {'alpha': 0.5, 'coef0': 0.8, 'degree': 3, 'ker...         0.124849   
10  {'alpha': 0.5, 'coef0': 1, 'degree': 3, 'kerne...         0.117188   
11  {'alpha': 0.5, 'coef0'

In [10]:
grid(ElasticNet()).grid_get(
    X_train,y_train_log,{'alpha':[0.0008,0.004,0.005,0.006],
                         'l1_ratio':[0.08,0.1,0.3,0.5,0.7],
                         'max_iter':[10000]})

{'alpha': 0.005, 'l1_ratio': 0.08, 'max_iter': 10000} 0.11000566611944872
                                               params  mean_test_score  \
0   {'alpha': 0.0008, 'l1_ratio': 0.08, 'max_iter'...         0.114363   
1   {'alpha': 0.0008, 'l1_ratio': 0.1, 'max_iter':...         0.113723   
2   {'alpha': 0.0008, 'l1_ratio': 0.3, 'max_iter':...         0.110754   
3   {'alpha': 0.0008, 'l1_ratio': 0.5, 'max_iter':...         0.110195   
4   {'alpha': 0.0008, 'l1_ratio': 0.7, 'max_iter':...         0.110171   
5   {'alpha': 0.004, 'l1_ratio': 0.08, 'max_iter':...         0.110095   
6   {'alpha': 0.004, 'l1_ratio': 0.1, 'max_iter': ...         0.110026   
7   {'alpha': 0.004, 'l1_ratio': 0.3, 'max_iter': ...         0.111155   
8   {'alpha': 0.004, 'l1_ratio': 0.5, 'max_iter': ...         0.112189   
9   {'alpha': 0.004, 'l1_ratio': 0.7, 'max_iter': ...         0.113417   
10  {'alpha': 0.005, 'l1_ratio': 0.08, 'max_iter':...         0.110006   
11  {'alpha': 0.005, 'l1_ratio': 0.1, 

In [11]:
class AverageWeight(BaseEstimator, RegressorMixin):
    def __init__(self,mod,weight):
        self.mod = mod
        self.weight = weight
        
    def fit(self,X,y):
        self.models_ = [clone(x) for x in self.mod]
        for model in self.models_:
            model.fit(X,y)
        return self
    
    def predict(self,X):
        w = list()
        pred = np.array([model.predict(X) for model in self.models_])
        for data in range(pred.shape[1]):
            single = [pred[model,data]*weight for model,weight in zip(range(pred.shape[0]),self.weight)]
            w.append(np.sum(single))
        return w

In [12]:
lasso = Lasso(alpha=0.0006,max_iter=10000) # 0.10987
ridge = Ridge(alpha=66) # 0.10992
svr = SVR(gamma= 0.0004,kernel='rbf',C=9,epsilon=0.01) # 0.10786
ker = KernelRidge(alpha=0.4 ,kernel='polynomial',degree=3 , coef0=1.2) # 0.11423
ela = ElasticNet(alpha=0.005,l1_ratio=0.08,max_iter=10000) # 0.10983
bay = BayesianRidge() # 0.11020


In [13]:
class stacking(BaseEstimator, RegressorMixin, TransformerMixin):
    def __init__(self,mod,meta_model):
        self.mod = mod
        self.meta_model = meta_model
        self.kf = KFold(n_splits=5, random_state=42, shuffle=True)
        
    def fit(self,X,y):
        self.saved_model = [list() for i in self.mod]
        oof_train = np.zeros((X.shape[0], len(self.mod)))
        
        for i,model in enumerate(self.mod):
            for train_index, val_index in self.kf.split(X,y):
                renew_model = clone(model)
                renew_model.fit(X[train_index], y[train_index])
                self.saved_model[i].append(renew_model)
                oof_train[val_index,i] = renew_model.predict(X[val_index])
        
        self.meta_model.fit(oof_train,y)
        return self
    
    def predict(self,X):
        whole_test = np.column_stack([np.column_stack(model.predict(X) for model in single_model).mean(axis=1) 
                                      for single_model in self.saved_model]) 
        return self.meta_model.predict(whole_test)
    
    def get_oof(self,X,y,test_X):
        oof = np.zeros((X.shape[0],len(self.mod)))
        test_single = np.zeros((test_X.shape[0],5))
        test_mean = np.zeros((test_X.shape[0],len(self.mod)))
        for i,model in enumerate(self.mod):
            for j, (train_index,val_index) in enumerate(self.kf.split(X,y)):
                clone_model = clone(model)
                clone_model.fit(X[train_index],y[train_index])
                oof[val_index,i] = clone_model.predict(X[val_index])
                test_single[:,j] = clone_model.predict(test_X)
            test_mean[:,i] = test_single.mean(axis=1)
        return oof, test_mean

In [14]:
a = Imputer().fit_transform(X_train)
b = Imputer().fit_transform(y_train_log.values.reshape(-1,1)).ravel()



### Find best meta model

In [15]:
metas = [lasso,ridge,svr,ker,ela,bay]
for meta in metas:
    stack_model = stacking(mod=[lasso,ridge,svr,ker,ela,bay],meta_model=meta)
    print(str(meta),rmse_cv(stack_model,a,b).mean())

Lasso(alpha=0.0006, copy_X=True, fit_intercept=True, max_iter=10000,
   normalize=False, positive=False, precompute=False, random_state=None,
   selection='cyclic', tol=0.0001, warm_start=False) 0.10706868155859901
Ridge(alpha=66, copy_X=True, fit_intercept=True, max_iter=None,
   normalize=False, random_state=None, solver='auto', tol=0.001) 0.11035380437856634
SVR(C=9, cache_size=200, coef0=0.0, degree=3, epsilon=0.01, gamma=0.0004,
  kernel='rbf', max_iter=-1, shrinking=True, tol=0.001, verbose=False) 0.10850297252809478
KernelRidge(alpha=0.4, coef0=1.2, degree=3, gamma=None, kernel='polynomial',
      kernel_params=None) 0.10651329835819942
ElasticNet(alpha=0.005, copy_X=True, fit_intercept=True, l1_ratio=0.08,
      max_iter=10000, normalize=False, positive=False, precompute=False,
      random_state=None, selection='cyclic', tol=0.0001, warm_start=False) 0.10756559036424125
BayesianRidge(alpha_1=1e-06, alpha_2=1e-06, compute_score=False, copy_X=True,
       fit_intercept=True, lam

In [16]:
stack_model = stacking(mod=[lasso,ridge,svr,ker,ela,bay],meta_model=ker)


In [17]:
X_train_stack, X_test_stack = stack_model.get_oof(a,b,X_test)

In [18]:
X_train_add = np.hstack((a,X_train_stack))
X_test_add = np.hstack((X_test,X_test_stack))

In [19]:
X_train_add.shape, X_test_add.shape

((1458, 269), (1459, 269))

In [20]:
print(rmse_cv(stack_model,X_train_add,b))
print(rmse_cv(stack_model,X_train_add,b).mean())

[0.09810303 0.10596942 0.11384484 0.09684264 0.10444453]
0.10384089369493806


In [21]:
stack_model = stacking(mod=[lasso,ridge,svr,ker,ela,bay],meta_model=ker)

In [22]:
stack_model.fit(a,b)

stacking(meta_model=KernelRidge(alpha=0.4, coef0=1.2, degree=3, gamma=None, kernel='polynomial',
      kernel_params=None),
     mod=[Lasso(alpha=0.0006, copy_X=True, fit_intercept=True, max_iter=10000,
   normalize=False, positive=False, precompute=False, random_state=None,
   selection='cyclic', tol=0.0001, warm_start=False), Ridge(alpha=66, copy_X=True, fit_intercept=True, max_iter=None,
   normalize=False, random_state=No...True, lambda_1=1e-06, lambda_2=1e-06, n_iter=300,
       normalize=False, tol=0.001, verbose=False)])

In [23]:
pred = np.exp(stack_model.predict(X_test))

In [24]:
result=pd.DataFrame({'Id':range(1461, 2920), 'SalePrice':pred})
result.to_csv("dataset/submission.csv",index=False)