In [301]:
#######################
# Importing Libraries #
#######################

#--Adding Data Types--#
import numpy as np
import pandas as pd
#--Processing--#
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import RobustScaler
from sklearn.model_selection import train_test_split
#--RandomForest--#
from sklearn.ensemble import RandomForestRegressor
#--Gradient Boosting--#
from sklearn.ensemble import GradientBoostingRegressor
#--Extreme Gradient Boosting--#
from xgboost import XGBRegressor
#--Linear ElasticNet Regression--#
from sklearn.linear_model import ElasticNet
#--Pipeline For Stacking--#
from sklearn.pipeline import make_pipeline
#--Error Metric--#
from sklearn.model_selection import cross_val_score
from sklearn.metrics import mean_squared_error
#--Optimization--#
from bayes_opt import BayesianOptimization

In [292]:
pd.set_option('display.max_columns', 100)

# Data Preperation For Tree's

In [53]:
####################
# Loading the Data #
####################

train_clean = pd.read_csv("train_clean.csv")
test_clean = pd.read_csv("test_clean.csv")

print("Training Dimensions: ", train_clean.shape)
print("Testing Dimensions: ", test_clean.shape)

######################
# Getting Id Columns #
######################
colId = pd.read_csv("test.csv")
colId = colId.Id

Training Dimensions:  (1460, 65)
Testing Dimensions:  (1459, 64)


In [54]:
####################################
# Applying Transforms to Functions #
####################################

train_clean['SalePrice'] = train_clean['SalePrice'].apply(lambda x: np.log(x + 1))
train_clean['GarageArea'] = train_clean['GarageArea'].apply(lambda x: np.log(x + 1))
train_clean['X2ndFlrSF'] = train_clean['X2ndFlrSF'].apply(lambda x: np.log(x + 1))
train_clean['TotalBsmtSF'] = train_clean['TotalBsmtSF'].apply(lambda x: np.log(x + 1))

test_clean['GarageArea'] = test_clean['GarageArea'].apply(lambda x: np.log(x + 1))
test_clean['X2ndFlrSF'] = test_clean['X2ndFlrSF'].apply(lambda x: np.log(x + 1))
test_clean['TotalBsmtSF'] = test_clean['TotalBsmtSF'].apply(lambda x: np.log(x + 1))



for c in train_clean.columns:
    if train_clean[c].dtype == 'object':
        le = LabelEncoder()
        # Need to convert the column type to string in order to encode missing values
        train_clean[c] = le.fit_transform(train_clean[c].astype(str))
for c in test_clean.columns:
    if test_clean[c].dtype == 'object':
        le = LabelEncoder()
        # Need to convert the column type to string in order to encode missing values
        test_clean[c] = le.fit_transform(test_clean[c].astype(str))

In [55]:
# train_clean.head()
# test_clean.head()

In [114]:
##################
# Splitting Data #  #(Only splitting the training data into two more sets called train_set, and test_set)
##################

train_set, test_set = train_test_split(train_clean, test_size = 0.2, random_state = 42)

print(train_set.shape)
print(test_set.shape)

X_train = train_set.drop("SalePrice", axis = 1)
Y_train = train_set.SalePrice

X_test = test_set.drop("SalePrice", axis = 1)
Y_test = test_set.SalePrice

#########################################
# The Full Original Training Set to Use #
#########################################

X_full_train = train_clean.drop("SalePrice", axis = 1)
Y_full_train = train_clean.SalePrice

(1168, 65)
(292, 65)


# Random Forest Section

In [107]:
#################################################
# RandomForest Model To See Best Features Split #
#################################################
mse = []
for i in range(1,65):
    randForest = RandomForestRegressor(n_estimators=1000, min_samples_leaf= 5, 
                                       max_features=i, oob_score = True, random_state=42, n_jobs=3)
    randForest.fit(X_train, Y_train)
    forestPredictions = randForest.predict(X_test)
    mse.append(mean_squared_error(Y_test, forestPredictions))

In [108]:
############################################
# Just to See The Index of the Lowest Tree #
############################################
lowest = 100000
index = 100000
for i,k in enumerate(mse):
    if k < lowest:
        lowest = k
        index = i
print(index, ':', lowest)

48 : 0.0224897570761


In [115]:
############################################
# Running the Forest on The Whole Training #
############################################

randForest = RandomForestRegressor(n_estimators=10000, min_samples_leaf= 5, 
                                       max_features=48, oob_score = True, random_state=42, n_jobs=3)
randForest.fit(X_full_train, Y_full_train)
# forestPredictions = randForest.predict(X_test)

RandomForestRegressor(bootstrap=True, criterion='mse', max_depth=None,
           max_features=48, max_leaf_nodes=None, min_impurity_decrease=0.0,
           min_impurity_split=None, min_samples_leaf=5,
           min_samples_split=2, min_weight_fraction_leaf=0.0,
           n_estimators=10000, n_jobs=3, oob_score=True, random_state=42,
           verbose=0, warm_start=False)

In [116]:
###################################################
# Predicting The Kaggle DataSet with RandomForest #
###################################################

KagglePredictions = randForest.predict(test_clean)
KagglePredictions = [np.exp(x) - 1 for x in KagglePredictions]
pd.DataFrame({"SalePrice":KagglePredictions, "Id": colId}).to_csv("KaggleSubmitPythonForest.csv", index = False)
print(KagglePredictions[0:5])

[122753.72779751559, 152625.27336808146, 178481.14250168673, 182418.27386204046, 197268.41760919656]


# Gradient Boosting Section

In [272]:
################################
# Setting Up Gradient Boosting #
################################

def gradBoostCV(n_estimators, max_depth, max_features):
    val = cross_val_score(GradientBoostingRegressor(
    n_estimators = int(n_estimators), max_depth = int(max_depth), max_features = int(max_features), random_state = 42, learning_rate = 0.05
    ),X_train, Y_train, scoring = 'neg_mean_squared_error', cv = 10, n_jobs = 3).mean()
    return val

gradBoostBaye = BayesianOptimization(gradBoostCV, {
    'n_estimators': (100, 10000),
    'max_depth': (1,15),
    "max_features": (1,65)
})

In [273]:
gradBoostBaye.maximize(n_iter=30)

[31mInitialization[0m
[94m-----------------------------------------------------------------------------[0m
 Step |   Time |      Value |   max_depth |   max_features |   n_estimators | 
    1 | 00m02s | [35m  -0.01905[0m | [32m     1.1963[0m | [32m       56.8297[0m | [32m     1156.1884[0m | 
    2 | 00m08s | [35m  -0.01905[0m | [32m     9.5562[0m | [32m        6.2955[0m | [32m     8017.3138[0m | 
    3 | 00m11s | [35m  -0.01892[0m | [32m     1.7431[0m | [32m       26.0807[0m | [32m     8977.2555[0m | 
    4 | 00m05s | [35m  -0.01787[0m | [32m     8.7703[0m | [32m        7.5826[0m | [32m     3559.2395[0m | 
    5 | 00m23s |   -0.02145 |      8.7159 |        53.9793 |      4532.3364 | 
[31mBayesian Optimization[0m
[94m-----------------------------------------------------------------------------[0m
 Step |   Time |      Value |   max_depth |   max_features |   n_estimators | 
    6 | 00m13s |   -0.02415 |     13.9240 |         1.2810 |       133.790

  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)


   14 | 00m07s |   -0.01856 |      1.1423 |         1.2138 |      4187.2935 | 


  " state: %s" % convergence_dict)


   15 | 00m21s |   -0.01884 |      1.1000 |        64.5533 |      7605.5941 | 
   16 | 00m25s |   -0.03362 |     13.9095 |        63.8218 |      1708.1108 | 
   17 | 00m06s |   -0.01993 |      1.6500 |        64.8743 |       529.2550 | 


  " state: %s" % convergence_dict)


   18 | 00m30s |   -0.03431 |     13.2111 |        64.5076 |      9585.2933 | 


  " state: %s" % convergence_dict)


   19 | 00m27s |   -0.01888 |      1.2412 |        64.0778 |      8496.8869 | 


  " state: %s" % convergence_dict)


   20 | 00m20s |   -0.01864 |      1.2979 |        64.5157 |      5576.8893 | 


  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)


   21 | 00m21s |   -0.01873 |      1.1811 |        64.3704 |      6481.6189 | 
   22 | 00m07s |   -0.02186 |      2.5542 |        61.4557 |       113.0310 | 


  " state: %s" % convergence_dict)


   23 | 00m15s |   -0.01867 |      1.1093 |        62.8424 |      3799.0358 | 
   24 | 00m13s |   -0.01832 |      1.8275 |         1.1951 |      8587.3630 | 
   25 | 00m09s |   -0.01895 |      1.0514 |         3.2446 |      3512.2068 | 
   26 | 00m31s |   -0.03733 |     14.9167 |        64.0922 |      7018.2622 | 
   27 | 00m30s |   -0.01888 |      1.1939 |        63.6209 |      9995.2925 | 


  " state: %s" % convergence_dict)


   28 | 00m11s |   -0.01862 |      1.0417 |         2.5451 |      6527.2975 | 
   29 | 00m12s |   -0.01835 |      1.0494 |         1.1199 |      7552.5662 | 
   30 | 00m29s |   -0.03324 |     12.8718 |        64.9542 |      2411.7148 | 
   31 | 00m20s |   -0.01854 |      1.4810 |        63.6799 |      4954.5782 | 
   32 | 00m13s |   -0.01943 |      1.1385 |         7.5733 |      8156.3433 | 
   33 | 00m13s |   -0.01833 |      1.0678 |         1.7761 |      7821.4805 | 
   34 | 00m09s |   -0.01913 |      1.1251 |         1.1556 |      2683.1441 | 


  " state: %s" % convergence_dict)


   35 | 00m11s |   -0.01845 |      1.3722 |         1.7314 |      4752.4941 | 


In [274]:
print('Final Results')
print('Gradient Boosting: ', gradBoostBaye.res['max']['max_val'])
print('Gradient Boosting: ', gradBoostBaye.res['max']['max_params'])

Final Results
Gradient Boosting:  -0.0166675431798
Gradient Boosting:  {'n_estimators': 5105.9898871841924, 'max_depth': 2.5304641691974643, 'max_features': 1.3327223853824961}


In [163]:
################################
# MSE of Running the GradBoost #
################################

testGradBoost = GradientBoostingRegressor(n_estimators=3096, max_depth=4, max_features=22, random_state=42, learning_rate=0.05)
testGradBoost.fit(X_train, Y_train)
testGradBoostPredictions = testGradBoost.predict(X_test)
mean_squared_error(Y_test, testGradBoostPredictions) ** 0.5

0.1336176502338593

In [150]:
#############################
# Running Gradient Boosting #
#############################

gradBoost = GradientBoostingRegressor(n_estimators=3096, max_depth=4, max_features=22, random_state=42, learning_rate=0.05)
gradBoost.fit(X_full_train, Y_full_train)

GradientBoostingRegressor(alpha=0.9, criterion='friedman_mse', init=None,
             learning_rate=0.05, loss='ls', max_depth=4, max_features=22,
             max_leaf_nodes=None, min_impurity_decrease=0.0,
             min_impurity_split=None, min_samples_leaf=1,
             min_samples_split=2, min_weight_fraction_leaf=0.0,
             n_estimators=3096, presort='auto', random_state=42,
             subsample=1.0, verbose=0, warm_start=False)

In [154]:
########################################################
# Predicting The Kaggle DataSet with Gradient Boosting #
########################################################

KagglePredictionsGradBoost = gradBoost.predict(test_clean)
KagglePredictionsGradBoost = [np.exp(x) - 1 for x in KagglePredictionsGradBoost]
pd.DataFrame({"SalePrice":KagglePredictionsGradBoost, "Id": colId}).to_csv("KaggleSubmitPythonGradBoost.csv", index = False)
print(KagglePredictionsGradBoost[0:5])

[124105.46185595595, 155390.34664528855, 196589.95566309863, 195645.76526259384, 178104.70239195603]


# XG Boost Section

In [287]:
###########################
# Setting Up XG Boosting #
###########################

def xgBoostCV(n_estimators, max_depth, gamma, min_child_weight):
    val = cross_val_score(XGBRegressor(n_estimators=int(n_estimators), max_depth=int(max_depth), 
                                      gamma = gamma, min_child_weight = min_child_weight, learning_rate = 0.05),
                          X_train, Y_train, scoring = 'neg_mean_squared_error', 
                          cv = 10, n_jobs = 3).mean()
    return val

xgBoostBaye = BayesianOptimization(xgBoostCV, {
    'n_estimators': (100, 10000),
    'max_depth': (1,30),
    "gamma": (0,50),
    'min_child_weight': (1,50)
})

In [None]:
xgBoostBaye.maximize(n_iter=15)

[31mInitialization[0m
[94m---------------------------------------------------------------------------------------------[0m
 Step |   Time |      Value |     gamma |   max_depth |   min_child_weight |   n_estimators | 


In [183]:
print('Final Results')
print('XG Boosting: ', xgBoostBaye.res['max']['max_val'])
print('XG Boosting: ', xgBoostBaye.res['max']['max_params'])

Final Results
XG Boosting:  -0.0199679676457
XG Boosting:  {'n_estimators': 1082.5155677959419, 'max_depth': 12.955466425662708, 'gamma': 0.19778513243654205, 'min_child_weight': 9.1635832012028757}


In [284]:
###############################
# MSE of Running the XG Boost #
###############################

testXGBoost = XGBRegressor(n_estimators=1083, max_depth=13, gamma=0.197, min_child_weight = 9, learning_rate=0.05, nthread = 3)
testXGBoost.fit(X_train, Y_train)
testXGBoostPredictions = testXGBoost.predict(X_test)
mean_squared_error(Y_test, testXGBoostPredictions) ** 0.5

0.14719576021259259

In [207]:
#######################
# Running XG Boosting #
#######################

XGBoost = XGBRegressor(n_estimators=1083, max_depth=13, gamma = 0.197, min_child_weight=9, learning_rate=0.05, nthread = 3)
XGBoost.fit(X_full_train, Y_full_train)

XGBRegressor(base_score=0.5, colsample_bylevel=1, colsample_bytree=1,
       gamma=0.197, learning_rate=0.05, max_delta_step=0, max_depth=13,
       min_child_weight=9, missing=None, n_estimators=1083, nthread=3,
       objective='reg:linear', reg_alpha=0, reg_lambda=1,
       scale_pos_weight=1, seed=0, silent=True, subsample=1)

In [186]:
##################################################
# Predicting The Kaggle DataSet with XG Boosting #
##################################################

KagglePredictionsXGBoost = XGBoost.predict(test_clean)
KagglePredictionsXGBoost = [np.exp(x) - 1 for x in KagglePredictionsXGBoost]
pd.DataFrame({"SalePrice":KagglePredictionsXGBoost, "Id": colId}).to_csv("KaggleSubmitPythonXGBoost.csv", index = False)
print(KagglePredictionsXGBoost[0:5])

[123001.765625, 154870.75, 177175.359375, 182500.109375, 196891.5625]


# Data Preparation For Linear

In [302]:
####################
# Loading the Data #
####################

train_clean_2 = pd.read_csv("train_clean.csv")
test_clean_2 = pd.read_csv("test_clean.csv")

print("Training Dimensions: ", train_clean_2.shape)
print("Testing Dimensions: ", test_clean_2.shape)

######################
# Getting Id Columns #
######################
colId_2 = pd.read_csv("test.csv")
colId_2 = colId_2.Id

Training Dimensions:  (1460, 65)
Testing Dimensions:  (1459, 64)


In [303]:
####################################
# Applying Transforms to Functions #
####################################

full_one_hot = pd.concat([train_clean_2, test_clean_2])
full_one_hot['SalePrice'] = full_one_hot['SalePrice'].apply(lambda x: np.log(x+1))
full_one_hot['GarageArea'] = full_one_hot['GarageArea'].apply(lambda x: np.log(x+1))
full_one_hot['X2ndFlrSF'] = full_one_hot['X2ndFlrSF'].apply(lambda x: np.log(x+1))
full_one_hot['TotalBsmtSF'] = full_one_hot['TotalBsmtSF'].apply(lambda x: np.log(x+1))

full_one_hot = pd.get_dummies(full_one_hot, drop_first=True, dummy_na=True)

one_hot_train = full_one_hot[0:1460]
one_hot_test = full_one_hot[1460:].drop('SalePrice', axis = 1)
# train_clean_2['SalePrice'] = train_clean_2['SalePrice'].apply(lambda x: np.log(x + 1))
# train_clean_2['GarageArea'] = train_clean_2['GarageArea'].apply(lambda x: np.log(x + 1))
# train_clean_2['X2ndFlrSF'] = train_clean_2['X2ndFlrSF'].apply(lambda x: np.log(x + 1))
# train_clean_2['TotalBsmtSF'] = train_clean_2['TotalBsmtSF'].apply(lambda x: np.log(x + 1))

# test_clean_2['GarageArea'] = test_clean_2['GarageArea'].apply(lambda x: np.log(x + 1))
# test_clean_2['X2ndFlrSF'] = test_clean_2['X2ndFlrSF'].apply(lambda x: np.log(x + 1))
# test_clean_2['TotalBsmtSF'] = test_clean_2['TotalBsmtSF'].apply(lambda x: np.log(x + 1))

# one_hot_train = pd.get_dummies(train_clean_2, drop_first=True, dummy_na=True)
# one_hot_test = pd.get_dummies(test_clean_2, drop_first=True, dummy_na=True)

In [304]:
one_hot_test.tail()

Unnamed: 0,BedroomAbvGr,BsmtFinSF1,BsmtFullBath,BsmtHalfBath,BsmtUnfSF,EnclosedPorch,Fireplaces,FullBath,GarageArea,GarageCars,GarageYrBlt,GrLivArea,HalfBath,KitchenAbvGr,LotArea,LotFrontage,MSSubClass,MasVnrArea,MoSold,OpenPorchSF,OverallCond,OverallQual,ScreenPorch,TotRmsAbvGrd,TotalBsmtSF,WoodDeckSF,X1stFlrSF,X2ndFlrSF,YearBuilt,YearRemodAdd,YrSold,BldgType_2fmCon,BldgType_Duplex,BldgType_Twnhs,BldgType_TwnhsE,BldgType_nan,BsmtCond_Fa,BsmtCond_Gd,BsmtCond_Po,BsmtCond_TA,BsmtCond_nan,BsmtExposure_Av,BsmtExposure_Gd,BsmtExposure_Mn,BsmtExposure_No,BsmtExposure_nan,BsmtFinType1_Absent,BsmtFinType1_BLQ,BsmtFinType1_GLQ,BsmtFinType1_LwQ,...,MasVnrType_nan,Neighborhood_Blueste,Neighborhood_BrDale,Neighborhood_BrkSide,Neighborhood_ClearCr,Neighborhood_CollgCr,Neighborhood_Crawfor,Neighborhood_Edwards,Neighborhood_Gilbert,Neighborhood_IDOTRR,Neighborhood_MeadowV,Neighborhood_Mitchel,Neighborhood_NAmes,Neighborhood_NPkVill,Neighborhood_NWAmes,Neighborhood_NoRidge,Neighborhood_NridgHt,Neighborhood_OldTown,Neighborhood_SWISU,Neighborhood_Sawyer,Neighborhood_SawyerW,Neighborhood_Somerst,Neighborhood_StoneBr,Neighborhood_Timber,Neighborhood_Veenker,Neighborhood_nan,PavedDrive_P,PavedDrive_Y,PavedDrive_nan,RoofStyle_Gable,RoofStyle_Gambrel,RoofStyle_Hip,RoofStyle_Mansard,RoofStyle_Shed,RoofStyle_nan,SaleCondition_AdjLand,SaleCondition_Alloca,SaleCondition_Family,SaleCondition_Normal,SaleCondition_Partial,SaleCondition_nan,SaleType_CWD,SaleType_Con,SaleType_ConLD,SaleType_ConLI,SaleType_ConLw,SaleType_New,SaleType_Oth,SaleType_WD,SaleType_nan
1454,3,0.0,0,0,6.304449,0.0,0,1,0.0,0,0,6.996681,1,1,7.568896,3.091042,160,0.0,6,0.0,7,4,0.0,5,6.304449,0.0,6.304449,6.304449,1970,1970,2006,0,0,1,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0
1455,3,5.533389,0,0,5.686975,0.0,0,1,5.659482,1,1,6.996681,1,1,7.546974,3.091042,160,0.0,4,3.218876,5,4,0.0,6,6.304449,0.0,6.304449,6.304449,1970,1970,2006,0,0,0,1,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0
1456,4,7.110696,1,0,0.0,0.0,1,1,6.357842,2,1,7.110696,0,1,9.903538,5.081404,20,0.0,9,0.0,7,5,0.0,7,7.110696,6.163315,7.110696,0.0,1960,1996,2006,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0
1457,3,5.823046,0,1,6.356108,0.0,0,1,0.0,0,0,6.878326,0,1,9.253591,4.143135,85,0.0,7,3.496508,5,5,0.0,6,6.816736,4.394449,6.878326,0.0,1992,1992,2006,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0
1458,3,6.632002,0,0,5.476464,0.0,1,2,6.47851,3,1,7.601402,1,1,9.172431,4.317488,60,4.553877,11,3.89182,5,7,0.0,9,6.904751,5.252273,6.904751,6.912743,1993,1994,2006,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,1,...,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0


In [305]:
##################
# Splitting Data #  #(Only splitting the training data into two more sets called train_set, and test_set)
##################

train_set_2, test_set_2 = train_test_split(one_hot_train, test_size = 0.2, random_state = 42)

print("Train Shape: ", train_set_2.shape)
print("Test Shape: ", test_set_2.shape)

X_train_2 = train_set_2.drop("SalePrice", axis = 1)
Y_train_2 = train_set_2.SalePrice

X_test_2 = test_set_2.drop("SalePrice", axis = 1)
Y_test_2 = test_set_2.SalePrice

#########################################
# The Full Original Training Set to Use #
#########################################

X_full_train_2 = one_hot_train.drop("SalePrice", axis = 1)
Y_full_train_2 = one_hot_train.SalePrice
print("Full Shape: ", X_full_train_2.shape)

Train Shape:  (1168, 250)
Test Shape:  (292, 250)
Full Shape:  (1460, 249)


# Linear Regression Section

In [306]:
def LinRegCV(alpha, l1_ratio):
    val = cross_val_score(make_pipeline(RobustScaler(), ElasticNet(alpha = alpha, l1_ratio = l1_ratio, random_state=42)),
                         X_train_2, Y_train_2, scoring = 'neg_mean_squared_error', 
                          cv = 10, n_jobs = 3).mean()
    return val

LinRegBaye = BayesianOptimization(LinRegCV,{
    'alpha': (0,1),
    'l1_ratio': (0,1)
})

In [307]:
LinRegBaye.maximize(n_iter=30)

[31mInitialization[0m
[94m------------------------------------------------------[0m
 Step |   Time |      Value |     alpha |   l1_ratio | 
    1 | 00m00s | [35m  -0.04077[0m | [32m   0.1796[0m | [32m    0.1510[0m | 
    2 | 00m00s |   -0.05115 |    0.4039 |     0.0424 | 
    3 | 00m00s |   -0.06231 |    0.4213 |     0.1680 | 
    4 | 00m00s |   -0.13056 |    0.6442 |     0.4367 | 
    5 | 00m00s |   -0.14448 |    0.7651 |     0.5824 | 
[31mBayesian Optimization[0m
[94m------------------------------------------------------[0m
 Step |   Time |      Value |     alpha |   l1_ratio | 


  self._final_estimator.fit(Xt, y, **fit_params)
  self._final_estimator.fit(Xt, y, **fit_params)
  positive)
  positive)
  self._final_estimator.fit(Xt, y, **fit_params)
  positive)
  self._final_estimator.fit(Xt, y, **fit_params)
  positive)
  self._final_estimator.fit(Xt, y, **fit_params)
  positive)
  self._final_estimator.fit(Xt, y, **fit_params)
  positive)
  self._final_estimator.fit(Xt, y, **fit_params)
  positive)
  self._final_estimator.fit(Xt, y, **fit_params)
  positive)
  self._final_estimator.fit(Xt, y, **fit_params)
  positive)
  self._final_estimator.fit(Xt, y, **fit_params)
  positive)


    6 | 00m04s |   -0.04109 |    0.0000 |     1.0000 | 


  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)




    9 | 00m02s |   -0.15262 |    1.0000 |     1.0000 | 



  self._final_estimator.fit(Xt, y, **fit_params)
  positive)
  self._final_estimator.fit(Xt, y, **fit_params)
  self._final_estimator.fit(Xt, y, **fit_params)
  positive)
  positive)
  self._final_estimator.fit(Xt, y, **fit_params)
  positive)
  self._final_estimator.fit(Xt, y, **fit_params)
  positive)
  self._final_estimator.fit(Xt, y, **fit_params)
  positive)
  self._final_estimator.fit(Xt, y, **fit_params)
  positive)
  self._final_estimator.fit(Xt, y, **fit_params)
  positive)
  self._final_estimator.fit(Xt, y, **fit_params)
  positive)
  self._final_estimator.fit(Xt, y, **fit_params)
  positive)


   11 | 00m03s |   -0.06178 |    0.0000 |     0.0000 | 


  self._final_estimator.fit(Xt, y, **fit_params)
  self._final_estimator.fit(Xt, y, **fit_params)
  positive)
  positive)
  self._final_estimator.fit(Xt, y, **fit_params)
  positive)
  self._final_estimator.fit(Xt, y, **fit_params)
  positive)
  self._final_estimator.fit(Xt, y, **fit_params)
  positive)
  self._final_estimator.fit(Xt, y, **fit_params)
  positive)
  self._final_estimator.fit(Xt, y, **fit_params)
  positive)
  self._final_estimator.fit(Xt, y, **fit_params)
  positive)
  self._final_estimator.fit(Xt, y, **fit_params)
  positive)
  self._final_estimator.fit(Xt, y, **fit_params)
  positive)


   12 | 00m04s |   -0.06003 |    0.0000 |     0.4548 | 




   13 | 00m06s | [35m  -0.03894[0m | [32m   0.2253[0m | [32m    0.0000[0m | 




   14 | 00m05s |   -0.03963 |    0.2361 |     0.0654 | 




  " state: %s" % convergence_dict)


   17 | 00m03s |   -0.03903 |    0.9992 |     0.0002 | 
   18 | 00m04s | [35m  -0.03096[0m | [32m   0.7318[0m | [32m    0.0003[0m | 


  " state: %s" % convergence_dict)


   19 | 00m04s |   -0.12518 |    0.2522 |     1.0000 | 
   20 | 00m04s |   -0.11468 |    1.0000 |     0.1826 | 
   21 | 00m04s |   -0.08153 |    0.1980 |     0.6444 | 


  self._final_estimator.fit(Xt, y, **fit_params)
  self._final_estimator.fit(Xt, y, **fit_params)
  positive)
  positive)
  self._final_estimator.fit(Xt, y, **fit_params)
  positive)
  self._final_estimator.fit(Xt, y, **fit_params)
  positive)
  self._final_estimator.fit(Xt, y, **fit_params)
  positive)
  self._final_estimator.fit(Xt, y, **fit_params)
  positive)
  self._final_estimator.fit(Xt, y, **fit_params)
  positive)
  self._final_estimator.fit(Xt, y, **fit_params)
  positive)
  self._final_estimator.fit(Xt, y, **fit_params)
  positive)
  self._final_estimator.fit(Xt, y, **fit_params)
  positive)


   22 | 00m06s |   -0.03849 |    0.0000 |     0.7075 | 
   23 | 00m06s |   -0.05539 |    0.6197 |     0.0964 | 
   24 | 00m06s |   -0.03972 |    0.8740 |     0.0000 | 




   25 | 00m06s |   -0.15262 |    1.0000 |     0.7404 | 


  self._final_estimator.fit(Xt, y, **fit_params)
  self._final_estimator.fit(Xt, y, **fit_params)
  positive)
  self._final_estimator.fit(Xt, y, **fit_params)
  positive)
  positive)
  self._final_estimator.fit(Xt, y, **fit_params)
  positive)
  self._final_estimator.fit(Xt, y, **fit_params)
  positive)
  self._final_estimator.fit(Xt, y, **fit_params)
  positive)
  self._final_estimator.fit(Xt, y, **fit_params)
  positive)
  self._final_estimator.fit(Xt, y, **fit_params)
  positive)
  self._final_estimator.fit(Xt, y, **fit_params)
  positive)
  self._final_estimator.fit(Xt, y, **fit_params)
  positive)


   26 | 00m07s |   -0.04388 |    0.0000 |     0.1901 | 




   27 | 00m07s | [35m  -0.02728[0m | [32m   0.5780[0m | [32m    0.0000[0m | 


  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)


   28 | 00m02s |   -0.03887 |    0.1056 |     0.0582 | 


  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)


   29 | 00m04s | [35m  -0.02365[0m | [32m   0.6182[0m | [32m    0.0000[0m | 


  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)


   30 | 00m04s |   -0.02393 |    0.6364 |     0.0000 | 


  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)




   31 | 00m03s |   -0.02598 |    0.6222 |     0.0000 | 


  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)


   32 | 00m02s |   -0.12561 |    0.2973 |     0.8880 | 


  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)


   33 | 00m02s |   -0.02872 |    0.6778 |     0.0020 | 


  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)


   34 | 00m04s |   -0.02893 |    0.5984 |     0.0000 | 


  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)


   35 | 00m03s |   -0.13039 |    0.4050 |     0.8089 | 




In [308]:
print('Final Results')
print('Linear Regression: ', LinRegBaye.res['max']['max_val'])
print('Linear Regression: ', LinRegBaye.res['max']['max_params'])

Final Results
Linear Regression:  -0.023646021655
Linear Regression:  {'alpha': 0.61820865457212471, 'l1_ratio': 0.0}


In [320]:
########################################
# MSE of Running the Linear Regression #
#################### ####################

testLinReg = make_pipeline(RobustScaler(), ElasticNet(alpha = 0.618208, l1_ratio = 0, random_state=42))
testLinReg.fit(X_train_2, Y_train_2)
testLinRegPredictions = testLinReg.predict(X_test_2)
# print(testLinRegPredictions[:20])
mean_squared_error(Y_test_2, testLinRegPredictions)**.5

# testLinReg = Ridge(random_state=42, tol=0.000000001)
# testLinReg.fit(X_train_2, Y_train_2)
# testLinRegPredictions = testLinReg.predict(X_test_2)
# print(testLinRegPredictions[:20])
# print(mean_squared_error(Y_test_2, testLinRegPredictions)**.5)



0.17270081286615011

In [321]:
#############################
# Running Linear Regression #
#############################

LinReg = make_pipeline(RobustScaler(), ElasticNet(alpha = 0.618208, l1_ratio = 0.0, random_state=42))
LinReg.fit(X_full_train_2, Y_full_train_2)



Pipeline(memory=None,
     steps=[('robustscaler', RobustScaler(copy=True, quantile_range=(25.0, 75.0), with_centering=True,
       with_scaling=True)), ('elasticnet', ElasticNet(alpha=0.618208, copy_X=True, fit_intercept=True, l1_ratio=0.0,
      max_iter=1000, normalize=False, positive=False, precompute=False,
      random_state=42, selection='cyclic', tol=0.0001, warm_start=False))])

In [323]:
########################################################
# Predicting The Kaggle DataSet with Linear Regression #
########################################################

KagglePredictionsLinReg = LinReg.predict(one_hot_test)
KagglePredictionsLinReg = [np.exp(x) - 1 for x in KagglePredictionsLinReg]
pd.DataFrame({"SalePrice":KagglePredictionsLinReg, "Id": colId_2}).to_csv("KaggleSubmitPythonLinReg.csv", index = False)
print(KagglePredictionsLinReg[0:5])

[128071.99095266343, 157952.70122511886, 187452.50666754215, 204913.685601873, 184032.73106373384]


# Support Vector Section