In [1]:
# Import libraries necessary for this project
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from IPython.display import display # Allows the use of display() for DataFrames
import scipy 
from matplotlib import pylab
import time

# Pretty display for notebooks
%matplotlib inline

## Import data to training the models

In [2]:
#Load the saved file to verify
train_data = pd.read_pickle("./data/preprocessed/preprocessed_train.pkl")
display(train_data.head())

#ave the identification number
Id = train_data['Id']

#save target varible
target = train_data['SalePrice']
display(train_data['SalePrice'].head(10))

#remove target and ID, only features
train_data.drop(labels=['SalePrice', 'Id'], axis=1, inplace=True)

display(train_data.head())

Unnamed: 0,Id,MSSubClass,LotArea,OverallQual,OverallCond,YearBuilt,YearRemodAdd,BsmtFinSF1,BsmtFinSF2,BsmtUnfSF,...,SaleType_New,SaleType_Oth,SaleType_WD,SaleCondition_Abnorml,SaleCondition_AdjLand,SaleCondition_Alloca,SaleCondition_Family,SaleCondition_Normal,SaleCondition_Partial,SalePrice
0,1,4.110874,9.04204,7,5,2003,2003,6.561031,0.0,5.01728,...,0,0,1,0,0,0,0,1,0,12.247699
1,2,3.044522,9.169623,6,8,1976,1976,6.886532,0.0,5.652489,...,0,0,1,0,0,0,0,1,0,12.109016
2,3,4.110874,9.328212,7,5,2001,2002,6.188264,0.0,6.075346,...,0,0,1,0,0,0,0,1,0,12.317171
3,4,4.26268,9.164401,7,5,1915,1970,5.379897,0.0,6.293419,...,0,0,1,1,0,0,0,0,0,11.849405
4,5,4.110874,9.565284,8,5,2000,2000,6.486161,0.0,6.196444,...,0,0,1,0,0,0,0,1,0,12.42922


0    12.247699
1    12.109016
2    12.317171
3    11.849405
4    12.429220
5    11.870607
6    12.634606
7    12.206078
8    11.774528
9    11.678448
Name: SalePrice, dtype: float64

Unnamed: 0,MSSubClass,LotArea,OverallQual,OverallCond,YearBuilt,YearRemodAdd,BsmtFinSF1,BsmtFinSF2,BsmtUnfSF,TotalBsmtSF,...,SaleType_ConLw,SaleType_New,SaleType_Oth,SaleType_WD,SaleCondition_Abnorml,SaleCondition_AdjLand,SaleCondition_Alloca,SaleCondition_Family,SaleCondition_Normal,SaleCondition_Partial
0,4.110874,9.04204,7,5,2003,2003,6.561031,0.0,5.01728,6.753438,...,0,0,0,1,0,0,0,0,1,0
1,3.044522,9.169623,6,8,1976,1976,6.886532,0.0,5.652489,7.141245,...,0,0,0,1,0,0,0,0,1,0
2,4.110874,9.328212,7,5,2001,2002,6.188264,0.0,6.075346,6.82546,...,0,0,0,1,0,0,0,0,1,0
3,4.26268,9.164401,7,5,1915,1970,5.379897,0.0,6.293419,6.629363,...,0,0,0,1,1,0,0,0,0,0
4,4.110874,9.565284,8,5,2000,2000,6.486161,0.0,6.196444,7.044033,...,0,0,0,1,0,0,0,0,1,0


### Split features and target

In [3]:
#Split TRAINING SET into train and test sets
from sklearn.model_selection import train_test_split

#split dataset where 33% is testing dataset
X_train, X_test, y_train, y_test = train_test_split(train_data, target, test_size=0.33, random_state=42)

## Verify the shape
display(X_train.shape)
display(X_test.shape)

display(y_train.shape)
display(y_test.shape)

(978, 220)

(482, 220)

(978L,)

(482L,)

## Individual Models

In this section, I will try many machine learning models used to regression problems:
Linear Regression (with Ridge and Lasso regularization); Support Vector Regression; Linear
Support Vector Regression; Stochastic Gradient Descent; K Neighbors Regressor; Kernel Ridge
Regression; Decision Tree Regressor;
The method Cross Validation Score with score parameter defined as Root Mean Squared Error
will be used to evaluate and compare the result of each individual learner. Just the bests model
will be fine-tuned in the next step.


> NOTE: [Skelarn.org](http://scikit-learn.org/stable/) was used as base to write comments about methods.


## Fine-Tune Individual Models

As the promising models were defined in the last step, now I will fine-tune the most promising
models. The method to perform the fine-tune the hyper parameters will be Grid Search
CV configured according to the project requisites. Thereby, each individual model will find the
best parameters configuration.

### Croos-Valiation Score for training dataset

In [4]:
from sklearn.model_selection import cross_val_score
def rmse_CV(model, X, y):
    print "RMSE Metric"
    neg_msqe = cross_val_score(model, X, y, scoring='neg_mean_squared_error', cv=3, n_jobs=-1, verbose=0)
    rmse= np.sqrt(-neg_msqe)
    print "Scores: ",rmse 
    print 'Mean:', rmse.mean()
    print 'Standard Deviation: ', rmse.std()

### Grid Search Function: Fine-Tune

In [6]:
from sklearn.model_selection import GridSearchCV

def GridSearch(reg, parameters, X, y):
    """
    excecute grid search to determine the optimized parameters combination
    
    input
    reg: the model; parameters: dict of parameters and options; X: dataframe, training features; y:pandas series, target feature;
    """
    GSCV = GridSearchCV(reg, parameters, verbose=0, cv=5, n_jobs=-1)
    best_model = GSCV.fit(X, y)
    display(best_model)
    display(best_model.best_params_)
    
    #return the configuration of best model
    return best_model.best_estimator_

# Individual and Default Models

### Decision Tree

In [25]:
#Import model
from sklearn.tree import DecisionTreeRegressor

#Create Regressor
tree_reg = DecisionTreeRegressor()

#Fit
tree_reg.fit(X_train, y_train)

#Predict
rmse_CV(tree_reg, X_test, y_test)

RMSE Metric
Scores:  [0.22812705 0.21240967 0.19650371]
Mean: 0.21234680972875672
Standard Deviation:  0.012910250261882738


### K Neighbors Regressor

Whrite about method: Regression based on k-nearest neighbors.
The target is predicted by local interpolation of the targets associated of the nearest neighbors in the training set.

In [26]:
from sklearn.neighbors import KNeighborsRegressor

#Create the regressor
neigh = KNeighborsRegressor()

#Fit
neigh.fit(X_train, y_train)

#Predict
#display(rmse(y_test, neigh.predict(X_test)))
#Predict
rmse_CV(neigh, X_test, y_test)

RMSE Metric
Scores:  [0.2898332  0.27254986 0.24603219]
Mean: 0.2694717493515401
Standard Deviation:  0.018013663196502137


### Linear Support Vector Machine Regressor

In [27]:
from sklearn.svm import LinearSVR

#Create the regressor
lin_svr = LinearSVR()

#Fit
lin_svr.fit(X_train, y_train)

#Predict
#display(rmse(y_test, lin_svr.predict(X_test)))
rmse_CV(lin_svr, X_test, y_test)

RMSE Metric
Scores:  [0.26438393 0.39685316 0.39551933]
Mean: 0.3522521412970557
Standard Deviation:  0.06213459366926334


### Epsilon Support Vector Regression

In [28]:
from sklearn.svm import SVR

#Create Regressor
svr = SVR()

#Fit
svr.fit(X_train, y_train)

#Predict
#display(rmse(y_test, svr.predict(X_test)))
rmse_CV(svr, X_test, y_test)

RMSE Metric
Scores:  [0.31528728 0.2957921  0.26767401]
Mean: 0.2929177938961795
Standard Deviation:  0.01954400345949258


### Kernel Ridge Regressor

"Kernel ridge regression (KRR) [M2012] combines Ridge Regression (linear least squares with l2-norm regularization) with the kernel trick. It thus learns a linear function in the space induced by the respective kernel and the data. For non-linear kernels, this corresponds to a non-linear function in the original space.

The form of the model learned by KernelRidge is identical to support vector regression (SVR). However, different loss functions are used: KRR uses squared error loss while support vector regression uses \epsilon-insensitive loss, both combined with l2 regularization. In contrast to SVR, fitting KernelRidge can be done in closed-form and is typically faster for medium-sized datasets." (Kernel ridge regression, Scikit-Learn)

In [29]:
from sklearn.kernel_ridge import KernelRidge
from sklearn.metrics.pairwise import chi2_kernel, laplacian_kernel
#Create Regressor
ker_rid = KernelRidge()

#Fit
ker_rid.fit(X_train, y_train)

#Predict
#display(rmse(y_test, ker_rid.predict(X_test)))
rmse_CV(ker_rid, X_test, y_test)

RMSE Metric
Scores:  [0.1315246  0.1347771  0.13511582]
Mean: 0.13380583733776083
Standard Deviation:  0.0016189972512706927


### Linear Regression

Ordinary least squares Linear Regression.


In [34]:
from sklearn.linear_model import LinearRegression

#Create regressor
lin_reg = LinearRegression(n_jobs=-1)

#Fit
lin_reg.fit(X_train, y_train)

#Predict
#display(rmse(y_test, ker_rid.predict(X_test)))
rmse_CV(lin_reg, X_test, y_test)

RMSE Metric
Scores:  [0.17171291 0.14545081 0.17146583]
Mean: 0.1628765145739622
Standard Deviation:  0.012322249565294896


### Lasso 

In [35]:
from sklearn.linear_model import Lasso

#create 
lasso = Lasso()

#Fit
lasso.fit(X_train, y_train)

#Predict
#display(rmse(y_test, ker_rid.predict(X_test)))
rmse_CV(lasso, X_test, y_test)

RMSE Metric
Scores:  [0.29397053 0.28491712 0.25919599]
Mean: 0.27936121223583466
Standard Deviation:  0.01473020244739484


### Elastic Net Regressor

"ElasticNet is a linear regression model trained with L1 and L2 prior as regularizer. This combination allows for learning a sparse model where few of the weights are non-zero like Lasso, while still maintaining the regularization properties of Ridge. We control the convex combination of L1 and L2 using the l1_ratio parameter."


In [36]:
from sklearn.linear_model import ElasticNet

#create regrossor
elastic = ElasticNet()

#Fit
elastic.fit(X_train, y_train)

#Predict
#display(rmse(y_test, ker_rid.predict(X_test)))
rmse_CV(elastic, X_test, y_test)

RMSE Metric
Scores:  [0.28871097 0.28027105 0.25813338]
Mean: 0.27570513216180476
Standard Deviation:  0.012894002967574909


### Multi-layer Perceptron Regressor

In [38]:
from sklearn.neural_network import MLPRegressor

#Create regressor
MLP = MLPRegressor()

#Fit
MLP.fit(X_train, y_train)

#Predict
#display(rmse(y_test, ker_rid.predict(X_test)))
rmse_CV(MLP, X_test, y_test)

RMSE Metric
Scores:  [106.09450874 121.79677806  10.76018625]
Mean: 79.55049101634147
Standard Deviation:  49.06267989415548


### Gaussian Process Regression



In [39]:
from sklearn.gaussian_process import GaussianProcessRegressor

#Create Regressor
gpr = GaussianProcessRegressor()

#Fit
gpr.fit(X_train, y_train)

#Predict
#display(rmse(y_test, ker_rid.predict(X_test)))
rmse_CV(gpr, X_test, y_test)

RMSE Metric
Scores:  [11.99715014 11.9992195  12.05912791]
Mean: 12.018499181960719
Standard Deviation:  0.02874126575553559


### Gradient Boosting Regressor

In [40]:
from sklearn.ensemble import GradientBoostingRegressor

#Create Regressor
GBR = GradientBoostingRegressor()

#Fit
GBR.fit(X_train, y_train)

#Predict
#display(rmse(y_test, ker_rid.predict(X_test)))
rmse_CV(GBR, X_test, y_test)


RMSE Metric
Scores:  [0.12638924 0.16237844 0.12653831]
Mean: 0.13843533085744136
Standard Deviation:  0.01693044332030328


### Random Forest Regressor

In [41]:
#Random Forest regressor
from sklearn.ensemble import RandomForestRegressor

#Create Regressor
rand_F = RandomForestRegressor(n_jobs=-1)

#Fit
rand_F.fit(X_train, y_train)

#Predict
#display(rmse(y_test, ker_rid.predict(X_test)))
rmse_CV(rand_F, X_test, y_test)


RMSE Metric
Scores:  [0.17697204 0.18488708 0.15988305]
Mean: 0.17391405724450704
Standard Deviation:  0.010434362521077236


## Fine-Tune Promissing Models

### Decision Tree Regressor

Write about thi method


In [7]:
#Import model
from sklearn.tree import DecisionTreeRegressor

#Create Regressor
tree_reg = DecisionTreeRegressor()

#Create Hyperparamter Search Space
criterion = ['mse', 'friedman_mse', 'mae']
splitter = ['best', 'random']
max_depth = [2, 4, 6, 8, 10, 12] #== 'None' becuase nodes are expanded until 
#all leaves are pure or until all leaves contain less than min_samples_split samples
max_features = ['auto', 'sqrt', 'log2', None]

#Create hyperparameter options
tree_hyper = dict(criterion=criterion, splitter=splitter, max_features=max_features)
#print tree_hyper


#Apply Grid Search To Determine the best model
s_time = time.time()
best_tree = GridSearch(tree_reg, tree_hyper, X_train, y_train)
f_time = time.time()
print "Training and Fine-Tune Time: {:3f}".format((s_time - f_time))
#display(best_tree.get_params)




GridSearchCV(cv=5, error_score='raise',
       estimator=DecisionTreeRegressor(criterion='mse', max_depth=None, max_features=None,
           max_leaf_nodes=None, min_impurity_decrease=0.0,
           min_impurity_split=None, min_samples_leaf=1,
           min_samples_split=2, min_weight_fraction_leaf=0.0,
           presort=False, random_state=None, splitter='best'),
       fit_params=None, iid=True, n_jobs=-1,
       param_grid={'max_features': ['auto', 'sqrt', 'log2', None], 'splitter': ['best', 'random'], 'criterion': ['mse', 'friedman_mse', 'mae']},
       pre_dispatch='2*n_jobs', refit=True, return_train_score='warn',
       scoring=None, verbose=0)

{'criterion': 'friedman_mse', 'max_features': 'auto', 'splitter': 'best'}

Training and Fine-Tune Time: -11.225000


In [8]:
print best_tree

DecisionTreeRegressor(criterion='friedman_mse', max_depth=None,
           max_features='auto', max_leaf_nodes=None,
           min_impurity_decrease=0.0, min_impurity_split=None,
           min_samples_leaf=1, min_samples_split=2,
           min_weight_fraction_leaf=0.0, presort=False, random_state=None,
           splitter='best')


> **NOTE:** Default Parameters

In [59]:
score_tree = rmse_CV(best_tree, X_test, y_test)

RMSE Metric
Scores:  [0.2038968  0.23366443 0.22982821]
Mean: 0.22246314630207745
Standard Deviation:  0.013221472127812437


### K Neighbors Regressor

Whrite about method: Regression based on k-nearest neighbors.
The target is predicted by local interpolation of the targets associated of the nearest neighbors in the training set.

In [60]:
from sklearn.neighbors import KNeighborsRegressor

#Create the regressor
neigh = KNeighborsRegressor()

#Create Hyperparamter Search Space
n_neighbors = [2, 4, 6, 8, 10, 12, 15, 20, 25]
weights = ['uniform', 'distance']
algorithm = ['ball_tree', 'kd_tree', 'brute']
#algorithm = ['auto']
p = [1, 2]

#Create hyperparameter options
neigh_param = dict(n_neighbors=n_neighbors, weights=weights, algorithm=algorithm, p=p)
#print tree_hyper


#Apply Grid Search To Determine the best model
s_time = time.time()
best_neigh = GridSearch(neigh, neigh_param, X_train, y_train)
f_time = time.time()
print "Training and Fine-Tune Time: {:3f}".format((s_time - f_time))
#display(best_tree.get_params)





GridSearchCV(cv=5, error_score='raise',
       estimator=KNeighborsRegressor(algorithm='auto', leaf_size=30, metric='minkowski',
          metric_params=None, n_jobs=1, n_neighbors=5, p=2,
          weights='uniform'),
       fit_params=None, iid=True, n_jobs=-1,
       param_grid={'p': [1, 2], 'weights': ['uniform', 'distance'], 'algorithm': ['ball_tree', 'kd_tree', 'brute'], 'n_neighbors': [2, 4, 6, 8, 10, 12, 15, 20, 25]},
       pre_dispatch='2*n_jobs', refit=True, return_train_score='warn',
       scoring=None, verbose=0)

{'algorithm': 'ball_tree', 'n_neighbors': 6, 'p': 1, 'weights': 'distance'}

Training and Fine-Tune Time: -32.861000


In [None]:
print best_neigh

In [47]:
#print best_neigh.best_params_

{'n_neighbors': 6, 'weights': 'distance', 'algorithm': 'ball_tree', 'p': 1}


In [61]:
score_neigh = rmse_CV(best_neigh, X_test, y_test)

RMSE Metric
Scores:  [0.25410762 0.24924477 0.20992898]
Mean: 0.23776045523237385
Standard Deviation:  0.01977970242938253


### Kernel Ridge Regressor

"Kernel ridge regression (KRR) [M2012] combines Ridge Regression (linear least squares with l2-norm regularization) with the kernel trick. It thus learns a linear function in the space induced by the respective kernel and the data. For non-linear kernels, this corresponds to a non-linear function in the original space.

The form of the model learned by KernelRidge is identical to support vector regression (SVR). However, different loss functions are used: KRR uses squared error loss while support vector regression uses \epsilon-insensitive loss, both combined with l2 regularization. In contrast to SVR, fitting KernelRidge can be done in closed-form and is typically faster for medium-sized datasets."

In [11]:
from sklearn.kernel_ridge import KernelRidge
from sklearn.metrics.pairwise import chi2_kernel, laplacian_kernel
#Create Regressor
ker_rid = KernelRidge()

#Create Hyperparamter Search Space
alpha= [1.0, 0.1, 0.01, 0.001, 0.0001, 0.0]
kernel = ['laplacian', 'rbf', 'linear', 'poly', 'sigmoid']
degree = [2,3,4,5,6,7]
gamma = np.logspace(-3, 2, 6)

#Create hyperparameter options
ker_rid_param = dict(alpha=alpha, kernel=kernel, degree=degree, gamma=gamma)

#Apply Grid Search To Determine the best model
s_time = time.time()
best_ker_rid = GridSearch(ker_rid, ker_rid_param, X_train, y_train)
f_time = time.time()
print "Training and Fine-Tune Time: {:3f}".format((s_time - f_time))

GridSearchCV(cv=5, error_score='raise',
       estimator=KernelRidge(alpha=1, coef0=1, degree=3, gamma=None, kernel='linear',
      kernel_params=None),
       fit_params=None, iid=True, n_jobs=-1,
       param_grid={'kernel': ['laplacian', 'rbf', 'linear', 'poly', 'sigmoid'], 'gamma': array([1.e-03, 1.e-02, 1.e-01, 1.e+00, 1.e+01, 1.e+02]), 'degree': [2, 3, 4, 5, 6, 7], 'alpha': [1.0, 0.1, 0.01, 0.001, 0.0001, 0.0]},
       pre_dispatch='2*n_jobs', refit=True, return_train_score='warn',
       scoring=None, verbose=0)

{'alpha': 1.0, 'degree': 2, 'gamma': 0.001, 'kernel': 'linear'}

Training and Fine-Tune Time: -924.851000


In [None]:
print best_ker

In [12]:
score_ker_rid = rmse_CV(best_ker_rid, X_test, y_test)

RMSE Metric
Scores:  [0.1315246  0.1347771  0.13511582]
Mean: 0.13380583733776083
Standard Deviation:  0.0016189972512706927


### Stochastic Gradient Descent

whrite about model: "The class SGDRegressor implements a plain stochastic gradient descent learning routine which supports different loss functions and penalties to fit linear regression models. **SGDRegressor is well suited for regression problems with a large number of training samples (> 10.000)**, for other problems we recommend Ridge, Lasso, or ElasticNet." (Stochastic Gradient Descent, Scikit-Learn)

In [11]:
#from sklearn.linear_model import SGDRegressor

#Create a regressor
#sgd = SGDRegressor()

#Create Hyperparamter Search Space
#loss = ['squared_loss', 'huber', 'epsilon_insensitive', 'squared_epsilon_insensitive']
#penalty = ['l2', 'l1', 'elasticnet']
#alpha= [1.0, 0.1, 0.01, 0.001, 0.0001]
#epsilon=[0, 0, 0.01, 0.1, 0.5, 1, 2, 4]
#learning_rate = ['constant', 'optimal', 'invscaling']

#Create hyperparameter options
#sgd_param = dict(loss=loss, penalty=penalty, alpha=alpha, epsilon=epsilon, learning_rate=learning_rate)

#Apply Grid Search To Determine the best model
#s_time = time.time()
#best_sgd = GridSearch(sgd, sgd_param, X_train, y_train)
#f_time = time.time()
#print "Training and Fine-Tune Time: {:3f}".format((s_time - f_time))


#sgd_reg.fit(X, y)



GridSearchCV(cv=5, error_score='raise',
       estimator=SGDRegressor(alpha=0.0001, average=False, epsilon=0.1, eta0=0.01,
       fit_intercept=True, l1_ratio=0.15, learning_rate='invscaling',
       loss='squared_loss', max_iter=None, n_iter=None, penalty='l2',
       power_t=0.25, random_state=None, shuffle=True, tol=None, verbose=0,
       warm_start=False),
       fit_params=None, iid=True, n_jobs=-1,
       param_grid={'penalty': ['l2', 'l1', 'elasticnet'], 'epsilon': [0, 0, 0.01, 0.1, 0.5, 1, 2, 4], 'learning_rate': ['constant', 'optimal', 'invscaling'], 'alpha': [1.0, 0.1, 0.01, 0.001, 0.0001], 'loss': ['squared_loss', 'huber', 'epsilon_insensitive', 'squared_epsilon_insensitive']},
       pre_dispatch='2*n_jobs', refit=True, return_train_score='warn',
       scoring=None, verbose=0)

Training and Fine-Tune Time: -284.812000


In [12]:
#score_sgd = rmse_CV(best_sgd, X_train, y_train)

RMSE Metric
Scores:  [0.02460181 0.02275077 0.02262368 0.02334927 0.02355519 0.02407193
 0.02302386 0.02109088 0.02132617 0.02126972]
Mean: 0.02276632855466759
Standard Deviation:  0.001150810555570769


### Linear Regression

Ordinary least squares Linear Regression.


In [13]:
from sklearn.linear_model import LinearRegression

#Create regressor
lin_reg = LinearRegression(n_jobs=-1)

#Create Hyperparamter Search Space

#There isn't parameters to tune

best_lin_reg = lin_reg

#lin_reg.fit(X,y)

In [14]:
score_lin_reg = rmse_CV(best_lin_reg, X_test, y_test)

RMSE Metric
Scores:  [0.17171291 0.14545081 0.17146583]
Mean: 0.1628765145739622
Standard Deviation:  0.012322249565294896


### Lasso 

In [15]:
from sklearn.linear_model import Lasso

#create 
lasso = Lasso()

#Create Hyperparamter Search Space
alpha= [1.0, 0.1, 0.01, 0.001, 0.0001, 0.0]
fit_intercept = [True, False]
normalize=[False]
#precompute=[auto]
selection=['cyclic', 'random']

#Create hyperparameter options
lasso_param = dict(alpha=alpha, fit_intercept=fit_intercept, normalize=normalize, selection=selection)

#Apply Grid Search To Determine the best model
s_time = time.time()
best_lasso= GridSearch(lasso, lasso_param, X_train, y_train)
f_time = time.time()
print "Training and Fine-Tune Time: {:3f}".format((s_time - f_time))



GridSearchCV(cv=5, error_score='raise',
       estimator=Lasso(alpha=1.0, copy_X=True, fit_intercept=True, max_iter=1000,
   normalize=False, positive=False, precompute=False, random_state=None,
   selection='cyclic', tol=0.0001, warm_start=False),
       fit_params=None, iid=True, n_jobs=-1,
       param_grid={'normalize': [False], 'alpha': [1.0, 0.1, 0.01, 0.001, 0.0001, 0.0], 'selection': ['cyclic', 'random'], 'fit_intercept': [True, False]},
       pre_dispatch='2*n_jobs', refit=True, return_train_score='warn',
       scoring=None, verbose=0)

{'alpha': 0.0001,
 'fit_intercept': True,
 'normalize': False,
 'selection': 'random'}

Training and Fine-Tune Time: -11.879000


In [16]:
score_lasso = rmse_CV(best_lasso, X_test, y_test)

RMSE Metric
Scores:  [0.15035719 0.13107585 0.13499215]
Mean: 0.13880839390878633
Standard Deviation:  0.00832127350957245


### Elastic Net Regressor

"ElasticNet is a linear regression model trained with L1 and L2 prior as regularizer. This combination allows for learning a sparse model where few of the weights are non-zero like Lasso, while still maintaining the regularization properties of Ridge. We control the convex combination of L1 and L2 using the l1_ratio parameter." (Scikit-Learn Documentation)


In [17]:
from sklearn.linear_model import ElasticNet

#create regrossor
elastic = ElasticNet()

##Create Hyperparamter Search Space
alpha= [1.0, 0.1, 0.01, 0.001, 0.0001, 0.0]
l1_ratio = [1.0, 0.1, 0.01, 0.001, 0.0001, 0.0]
fit_intercept = [True, False]
normalize=[False]
#precompute=['auto']
selection=['cyclic', 'random']

#Create hyperparameter options
elastic_param = dict(alpha=alpha, fit_intercept=fit_intercept, l1_ratio=l1_ratio, normalize=normalize, selection=selection)


#Apply Grid Search To Determine the best model
s_time = time.time()
best_elastic= GridSearch(elastic, elastic_param, X_train, y_train)
f_time = time.time()
print "Training and Fine-Tune Time: {:3f}".format((s_time - f_time))


#elast_reg.fit(X, y)

GridSearchCV(cv=5, error_score='raise',
       estimator=ElasticNet(alpha=1.0, copy_X=True, fit_intercept=True, l1_ratio=0.5,
      max_iter=1000, normalize=False, positive=False, precompute=False,
      random_state=None, selection='cyclic', tol=0.0001, warm_start=False),
       fit_params=None, iid=True, n_jobs=-1,
       param_grid={'normalize': [False], 'l1_ratio': [1.0, 0.1, 0.01, 0.001, 0.0001, 0.0], 'selection': ['cyclic', 'random'], 'fit_intercept': [True, False], 'alpha': [1.0, 0.1, 0.01, 0.001, 0.0001, 0.0]},
       pre_dispatch='2*n_jobs', refit=True, return_train_score='warn',
       scoring=None, verbose=0)

{'alpha': 0.0001,
 'fit_intercept': True,
 'l1_ratio': 1.0,
 'normalize': False,
 'selection': 'random'}

Training and Fine-Tune Time: -82.903000


In [18]:
score_lasso = rmse_CV(best_elastic, X_test, y_test)

RMSE Metric
Scores:  [0.15045421 0.13108303 0.13507172]
Mean: 0.1388696531754481
Standard Deviation:  0.008351803659718092


### Perceptron Regressor

"The Perceptron is another simple algorithm suitable for large scale learning. By default:

* It does not require a learning rate.
* It is not regularized (penalized).
* It updates its model only on mistakes.

The last characteristic implies that the Perceptron is slightly faster to train than SGD with the hinge loss and that the resulting models are sparser." ([W3cubDocs](http://docs.w3cub.com/scikit_learn/modules/linear_model/))

> **NOTE:** This optmization failed.

In [24]:
#from sklearn.linear_model import Perceptron

#create regressor
#percep = Perceptron()

#Create Hyperparamter Search Space
#penalty=['l1', 'l2', 'elasticnet']
#alpha = [1.0, 0.1, 0.01, 0.001, 0.0001, 0.0]
#Create hyperparameter options
#percep_param = dict(penalty=penalty, alpha=alpha, n_jobs=n_jobs, class_weight=class_weight)


#Apply Grid Search To Determine the best model
#s_time = time.time()
#best_percep= GridSearch(percep, percep_param, X_train, y_train)
#f_time = time.time()
#print "Training and Fine-Tune Time: {:3f}".format((s_time - f_time))




#percep.fit(X,y)

JoblibValueError: JoblibValueError
___________________________________________________________________________
Multiprocessing exception:
...........................................................................
C:\Users\User\Anaconda3\envs\py2\lib\runpy.py in _run_module_as_main(mod_name='ipykernel_launcher', alter_argv=1)
    169     pkg_name = mod_name.rpartition('.')[0]
    170     main_globals = sys.modules["__main__"].__dict__
    171     if alter_argv:
    172         sys.argv[0] = fname
    173     return _run_code(code, main_globals, None,
--> 174                      "__main__", fname, loader, pkg_name)
        fname = r'C:\Users\User\Anaconda3\envs\py2\lib\site-packages\ipykernel_launcher.py'
        loader = <pkgutil.ImpLoader instance>
        pkg_name = ''
    175 
    176 def run_module(mod_name, init_globals=None,
    177                run_name=None, alter_sys=False):
    178     """Execute a module's code without importing it

...........................................................................
C:\Users\User\Anaconda3\envs\py2\lib\runpy.py in _run_code(code=<code object <module> at 000000000253CAB0, file ...lib\site-packages\ipykernel_launcher.py", line 5>, run_globals={'__builtins__': <module '__builtin__' (built-in)>, '__doc__': 'Entry point for launching an IPython kernel.\n\nTh...orts until\nafter removing the cwd from sys.path.\n', '__file__': r'C:\Users\User\Anaconda3\envs\py2\lib\site-packages\ipykernel_launcher.py', '__loader__': <pkgutil.ImpLoader instance>, '__name__': '__main__', '__package__': '', 'app': <module 'ipykernel.kernelapp' from 'C:\Users\Use...s\py2\lib\site-packages\ipykernel\kernelapp.pyc'>, 'sys': <module 'sys' (built-in)>}, init_globals=None, mod_name='__main__', mod_fname=r'C:\Users\User\Anaconda3\envs\py2\lib\site-packages\ipykernel_launcher.py', mod_loader=<pkgutil.ImpLoader instance>, pkg_name='')
     67         run_globals.update(init_globals)
     68     run_globals.update(__name__ = mod_name,
     69                        __file__ = mod_fname,
     70                        __loader__ = mod_loader,
     71                        __package__ = pkg_name)
---> 72     exec code in run_globals
        code = <code object <module> at 000000000253CAB0, file ...lib\site-packages\ipykernel_launcher.py", line 5>
        run_globals = {'__builtins__': <module '__builtin__' (built-in)>, '__doc__': 'Entry point for launching an IPython kernel.\n\nTh...orts until\nafter removing the cwd from sys.path.\n', '__file__': r'C:\Users\User\Anaconda3\envs\py2\lib\site-packages\ipykernel_launcher.py', '__loader__': <pkgutil.ImpLoader instance>, '__name__': '__main__', '__package__': '', 'app': <module 'ipykernel.kernelapp' from 'C:\Users\Use...s\py2\lib\site-packages\ipykernel\kernelapp.pyc'>, 'sys': <module 'sys' (built-in)>}
     73     return run_globals
     74 
     75 def _run_module_code(code, init_globals=None,
     76                     mod_name=None, mod_fname=None,

...........................................................................
C:\Users\User\Anaconda3\envs\py2\lib\site-packages\ipykernel_launcher.py in <module>()
     11     # This is added back by InteractiveShellApp.init_path()
     12     if sys.path[0] == '':
     13         del sys.path[0]
     14 
     15     from ipykernel import kernelapp as app
---> 16     app.launch_new_instance()

...........................................................................
C:\Users\User\Anaconda3\envs\py2\lib\site-packages\traitlets\config\application.py in launch_instance(cls=<class 'ipykernel.kernelapp.IPKernelApp'>, argv=None, **kwargs={})
    653 
    654         If a global instance already exists, this reinitializes and starts it
    655         """
    656         app = cls.instance(**kwargs)
    657         app.initialize(argv)
--> 658         app.start()
        app.start = <bound method IPKernelApp.start of <ipykernel.kernelapp.IPKernelApp object>>
    659 
    660 #-----------------------------------------------------------------------------
    661 # utility functions, for convenience
    662 #-----------------------------------------------------------------------------

...........................................................................
C:\Users\User\Anaconda3\envs\py2\lib\site-packages\ipykernel\kernelapp.py in start(self=<ipykernel.kernelapp.IPKernelApp object>)
    481         if self.poller is not None:
    482             self.poller.start()
    483         self.kernel.start()
    484         self.io_loop = ioloop.IOLoop.current()
    485         try:
--> 486             self.io_loop.start()
        self.io_loop.start = <bound method ZMQIOLoop.start of <zmq.eventloop.ioloop.ZMQIOLoop object>>
    487         except KeyboardInterrupt:
    488             pass
    489 
    490 launch_new_instance = IPKernelApp.launch_instance

...........................................................................
C:\Users\User\Anaconda3\envs\py2\lib\site-packages\zmq\eventloop\ioloop.py in start(self=<zmq.eventloop.ioloop.ZMQIOLoop object>)
    172             )
    173         return loop
    174     
    175     def start(self):
    176         try:
--> 177             super(ZMQIOLoop, self).start()
        self.start = <bound method ZMQIOLoop.start of <zmq.eventloop.ioloop.ZMQIOLoop object>>
    178         except ZMQError as e:
    179             if e.errno == ETERM:
    180                 # quietly return on ETERM
    181                 pass

...........................................................................
C:\Users\User\Anaconda3\envs\py2\lib\site-packages\tornado\ioloop.py in start(self=<zmq.eventloop.ioloop.ZMQIOLoop object>)
    883                 self._events.update(event_pairs)
    884                 while self._events:
    885                     fd, events = self._events.popitem()
    886                     try:
    887                         fd_obj, handler_func = self._handlers[fd]
--> 888                         handler_func(fd_obj, events)
        handler_func = <function null_wrapper>
        fd_obj = <zmq.sugar.socket.Socket object>
        events = 5
    889                     except (OSError, IOError) as e:
    890                         if errno_from_exception(e) == errno.EPIPE:
    891                             # Happens when the client closes the connection
    892                             pass

...........................................................................
C:\Users\User\Anaconda3\envs\py2\lib\site-packages\tornado\stack_context.py in null_wrapper(*args=(<zmq.sugar.socket.Socket object>, 5), **kwargs={})
    272         # Fast path when there are no active contexts.
    273         def null_wrapper(*args, **kwargs):
    274             try:
    275                 current_state = _state.contexts
    276                 _state.contexts = cap_contexts[0]
--> 277                 return fn(*args, **kwargs)
        args = (<zmq.sugar.socket.Socket object>, 5)
        kwargs = {}
    278             finally:
    279                 _state.contexts = current_state
    280         null_wrapper._wrapped = True
    281         return null_wrapper

...........................................................................
C:\Users\User\Anaconda3\envs\py2\lib\site-packages\zmq\eventloop\zmqstream.py in _handle_events(self=<zmq.eventloop.zmqstream.ZMQStream object>, fd=<zmq.sugar.socket.Socket object>, events=5)
    435             # dispatch events:
    436             if events & IOLoop.ERROR:
    437                 gen_log.error("got POLLERR event on ZMQStream, which doesn't make sense")
    438                 return
    439             if events & IOLoop.READ:
--> 440                 self._handle_recv()
        self._handle_recv = <bound method ZMQStream._handle_recv of <zmq.eventloop.zmqstream.ZMQStream object>>
    441                 if not self.socket:
    442                     return
    443             if events & IOLoop.WRITE:
    444                 self._handle_send()

...........................................................................
C:\Users\User\Anaconda3\envs\py2\lib\site-packages\zmq\eventloop\zmqstream.py in _handle_recv(self=<zmq.eventloop.zmqstream.ZMQStream object>)
    467                 gen_log.error("RECV Error: %s"%zmq.strerror(e.errno))
    468         else:
    469             if self._recv_callback:
    470                 callback = self._recv_callback
    471                 # self._recv_callback = None
--> 472                 self._run_callback(callback, msg)
        self._run_callback = <bound method ZMQStream._run_callback of <zmq.eventloop.zmqstream.ZMQStream object>>
        callback = <function null_wrapper>
        msg = [<zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>]
    473                 
    474         # self.update_state()
    475         
    476 

...........................................................................
C:\Users\User\Anaconda3\envs\py2\lib\site-packages\zmq\eventloop\zmqstream.py in _run_callback(self=<zmq.eventloop.zmqstream.ZMQStream object>, callback=<function null_wrapper>, *args=([<zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>],), **kwargs={})
    409         close our socket."""
    410         try:
    411             # Use a NullContext to ensure that all StackContexts are run
    412             # inside our blanket exception handler rather than outside.
    413             with stack_context.NullContext():
--> 414                 callback(*args, **kwargs)
        callback = <function null_wrapper>
        args = ([<zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>],)
        kwargs = {}
    415         except:
    416             gen_log.error("Uncaught exception, closing connection.",
    417                           exc_info=True)
    418             # Close the socket on an uncaught exception from a user callback

...........................................................................
C:\Users\User\Anaconda3\envs\py2\lib\site-packages\tornado\stack_context.py in null_wrapper(*args=([<zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>],), **kwargs={})
    272         # Fast path when there are no active contexts.
    273         def null_wrapper(*args, **kwargs):
    274             try:
    275                 current_state = _state.contexts
    276                 _state.contexts = cap_contexts[0]
--> 277                 return fn(*args, **kwargs)
        args = ([<zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>],)
        kwargs = {}
    278             finally:
    279                 _state.contexts = current_state
    280         null_wrapper._wrapped = True
    281         return null_wrapper

...........................................................................
C:\Users\User\Anaconda3\envs\py2\lib\site-packages\ipykernel\kernelbase.py in dispatcher(msg=[<zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>])
    278         if self.control_stream:
    279             self.control_stream.on_recv(self.dispatch_control, copy=False)
    280 
    281         def make_dispatcher(stream):
    282             def dispatcher(msg):
--> 283                 return self.dispatch_shell(stream, msg)
        msg = [<zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>]
    284             return dispatcher
    285 
    286         for s in self.shell_streams:
    287             s.on_recv(make_dispatcher(s), copy=False)

...........................................................................
C:\Users\User\Anaconda3\envs\py2\lib\site-packages\ipykernel\kernelbase.py in dispatch_shell(self=<ipykernel.ipkernel.IPythonKernel object>, stream=<zmq.eventloop.zmqstream.ZMQStream object>, msg={'buffers': [], 'content': {u'allow_stdin': True, u'code': u'from sklearn.linear_model import Perceptron\n\...mat((s_time - f_time))\n\n\n\n\n#percep.fit(X,y)', u'silent': False, u'stop_on_error': True, u'store_history': True, u'user_expressions': {}}, 'header': {u'date': datetime.datetime(2018, 3, 12, 20, 12, 5, 262000, tzinfo=tzutc()), u'msg_id': u'2a4e9c3054294274abae1e8573d61c47', u'msg_type': u'execute_request', u'session': u'80766c297ee54be9827299854bda0864', u'username': u'username', u'version': u'5.2'}, 'metadata': {}, 'msg_id': u'2a4e9c3054294274abae1e8573d61c47', 'msg_type': u'execute_request', 'parent_header': {}})
    228             self.log.warn("Unknown message type: %r", msg_type)
    229         else:
    230             self.log.debug("%s: %s", msg_type, msg)
    231             self.pre_handler_hook()
    232             try:
--> 233                 handler(stream, idents, msg)
        handler = <bound method IPythonKernel.execute_request of <ipykernel.ipkernel.IPythonKernel object>>
        stream = <zmq.eventloop.zmqstream.ZMQStream object>
        idents = ['80766c297ee54be9827299854bda0864']
        msg = {'buffers': [], 'content': {u'allow_stdin': True, u'code': u'from sklearn.linear_model import Perceptron\n\...mat((s_time - f_time))\n\n\n\n\n#percep.fit(X,y)', u'silent': False, u'stop_on_error': True, u'store_history': True, u'user_expressions': {}}, 'header': {u'date': datetime.datetime(2018, 3, 12, 20, 12, 5, 262000, tzinfo=tzutc()), u'msg_id': u'2a4e9c3054294274abae1e8573d61c47', u'msg_type': u'execute_request', u'session': u'80766c297ee54be9827299854bda0864', u'username': u'username', u'version': u'5.2'}, 'metadata': {}, 'msg_id': u'2a4e9c3054294274abae1e8573d61c47', 'msg_type': u'execute_request', 'parent_header': {}}
    234             except Exception:
    235                 self.log.error("Exception in message handler:", exc_info=True)
    236             finally:
    237                 self.post_handler_hook()

...........................................................................
C:\Users\User\Anaconda3\envs\py2\lib\site-packages\ipykernel\kernelbase.py in execute_request(self=<ipykernel.ipkernel.IPythonKernel object>, stream=<zmq.eventloop.zmqstream.ZMQStream object>, ident=['80766c297ee54be9827299854bda0864'], parent={'buffers': [], 'content': {u'allow_stdin': True, u'code': u'from sklearn.linear_model import Perceptron\n\...mat((s_time - f_time))\n\n\n\n\n#percep.fit(X,y)', u'silent': False, u'stop_on_error': True, u'store_history': True, u'user_expressions': {}}, 'header': {u'date': datetime.datetime(2018, 3, 12, 20, 12, 5, 262000, tzinfo=tzutc()), u'msg_id': u'2a4e9c3054294274abae1e8573d61c47', u'msg_type': u'execute_request', u'session': u'80766c297ee54be9827299854bda0864', u'username': u'username', u'version': u'5.2'}, 'metadata': {}, 'msg_id': u'2a4e9c3054294274abae1e8573d61c47', 'msg_type': u'execute_request', 'parent_header': {}})
    394         if not silent:
    395             self.execution_count += 1
    396             self._publish_execute_input(code, parent, self.execution_count)
    397 
    398         reply_content = self.do_execute(code, silent, store_history,
--> 399                                         user_expressions, allow_stdin)
        user_expressions = {}
        allow_stdin = True
    400 
    401         # Flush output before sending the reply.
    402         sys.stdout.flush()
    403         sys.stderr.flush()

...........................................................................
C:\Users\User\Anaconda3\envs\py2\lib\site-packages\ipykernel\ipkernel.py in do_execute(self=<ipykernel.ipkernel.IPythonKernel object>, code=u'from sklearn.linear_model import Perceptron\n\...mat((s_time - f_time))\n\n\n\n\n#percep.fit(X,y)', silent=False, store_history=True, user_expressions={}, allow_stdin=True)
    203 
    204         self._forward_input(allow_stdin)
    205 
    206         reply_content = {}
    207         try:
--> 208             res = shell.run_cell(code, store_history=store_history, silent=silent)
        res = undefined
        shell.run_cell = <bound method ZMQInteractiveShell.run_cell of <ipykernel.zmqshell.ZMQInteractiveShell object>>
        code = u'from sklearn.linear_model import Perceptron\n\...mat((s_time - f_time))\n\n\n\n\n#percep.fit(X,y)'
        store_history = True
        silent = False
    209         finally:
    210             self._restore_input()
    211 
    212         if res.error_before_exec is not None:

...........................................................................
C:\Users\User\Anaconda3\envs\py2\lib\site-packages\ipykernel\zmqshell.py in run_cell(self=<ipykernel.zmqshell.ZMQInteractiveShell object>, *args=(u'from sklearn.linear_model import Perceptron\n\...mat((s_time - f_time))\n\n\n\n\n#percep.fit(X,y)',), **kwargs={'silent': False, 'store_history': True})
    532             )
    533         self.payload_manager.write_payload(payload)
    534 
    535     def run_cell(self, *args, **kwargs):
    536         self._last_traceback = None
--> 537         return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
        self.run_cell = <bound method ZMQInteractiveShell.run_cell of <ipykernel.zmqshell.ZMQInteractiveShell object>>
        args = (u'from sklearn.linear_model import Perceptron\n\...mat((s_time - f_time))\n\n\n\n\n#percep.fit(X,y)',)
        kwargs = {'silent': False, 'store_history': True}
    538 
    539     def _showtraceback(self, etype, evalue, stb):
    540         # try to preserve ordering of tracebacks and print statements
    541         sys.stdout.flush()

...........................................................................
C:\Users\User\Anaconda3\envs\py2\lib\site-packages\IPython\core\interactiveshell.py in run_cell(self=<ipykernel.zmqshell.ZMQInteractiveShell object>, raw_cell=u'from sklearn.linear_model import Perceptron\n\...mat((s_time - f_time))\n\n\n\n\n#percep.fit(X,y)', store_history=True, silent=False, shell_futures=True)
   2713                 self.displayhook.exec_result = result
   2714 
   2715                 # Execute the user code
   2716                 interactivity = "none" if silent else self.ast_node_interactivity
   2717                 has_raised = self.run_ast_nodes(code_ast.body, cell_name,
-> 2718                    interactivity=interactivity, compiler=compiler, result=result)
        interactivity = 'last_expr'
        compiler = <IPython.core.compilerop.CachingCompiler instance>
   2719                 
   2720                 self.last_execution_succeeded = not has_raised
   2721 
   2722                 # Reset this so later displayed values do not modify the

...........................................................................
C:\Users\User\Anaconda3\envs\py2\lib\site-packages\IPython\core\interactiveshell.py in run_ast_nodes(self=<ipykernel.zmqshell.ZMQInteractiveShell object>, nodelist=[<_ast.ImportFrom object>, <_ast.Assign object>, <_ast.Assign object>, <_ast.Assign object>, <_ast.Assign object>, <_ast.Assign object>, <_ast.Assign object>, <_ast.Assign object>, <_ast.Assign object>, <_ast.Assign object>, <_ast.Print object>], cell_name='<ipython-input-24-585cc111cdd8>', interactivity='none', compiler=<IPython.core.compilerop.CachingCompiler instance>, result=<ExecutionResult object at b608160, execution_co..._before_exec=None error_in_exec=None result=None>)
   2817 
   2818         try:
   2819             for i, node in enumerate(to_run_exec):
   2820                 mod = ast.Module([node])
   2821                 code = compiler(mod, cell_name, "exec")
-> 2822                 if self.run_code(code, result):
        self.run_code = <bound method ZMQInteractiveShell.run_code of <ipykernel.zmqshell.ZMQInteractiveShell object>>
        code = <code object <module> at 000000000B330130, file "<ipython-input-24-585cc111cdd8>", line 19>
        result = <ExecutionResult object at b608160, execution_co..._before_exec=None error_in_exec=None result=None>
   2823                     return True
   2824 
   2825             for i, node in enumerate(to_run_interactive):
   2826                 mod = ast.Interactive([node])

...........................................................................
C:\Users\User\Anaconda3\envs\py2\lib\site-packages\IPython\core\interactiveshell.py in run_code(self=<ipykernel.zmqshell.ZMQInteractiveShell object>, code_obj=<code object <module> at 000000000B330130, file "<ipython-input-24-585cc111cdd8>", line 19>, result=<ExecutionResult object at b608160, execution_co..._before_exec=None error_in_exec=None result=None>)
   2877         outflag = 1  # happens in more places, so it's easier as default
   2878         try:
   2879             try:
   2880                 self.hooks.pre_run_code_hook()
   2881                 #rprint('Running code', repr(code_obj)) # dbg
-> 2882                 exec(code_obj, self.user_global_ns, self.user_ns)
        code_obj = <code object <module> at 000000000B330130, file "<ipython-input-24-585cc111cdd8>", line 19>
        self.user_global_ns = {'C': [0.001, 0.01, 0.1, 1, 10, 100], 'DecisionTreeRegressor': <class 'sklearn.tree.tree.DecisionTreeRegressor'>, 'ElasticNet': <class 'sklearn.linear_model.coordinate_descent.ElasticNet'>, 'GridSearch': <function GridSearch>, 'GridSearchCV': <class 'sklearn.model_selection._search.GridSearchCV'>, 'In': ['', u"# Import libraries necessary for this project\...books\nget_ipython().magic(u'matplotlib inline')", u'#Load the saved file to verify\ntrain_data = p...processed_train_target.pkl")\n#display(y.head())', u'#Load the saved file to verify\ntrain_data = p...())\ndisplay(train_data[\'SalePrice\'].head(10))', u"#train samples\ny_train = train_data['SalePric...st = test_data.drop(columns='SalePrice', axis=1)", u'#Load the saved file to verify\ntest_data = pd...eprocessed_test_target.pkl")\n#display(y.head())', u'## Verify the shape\ndisplay(train_data.shape)...in_PCA_data.shape)\ndisplay(test_PCA_data.shape)', u"#train samples\ny_train = train_data['SalePric...st = test_data.drop(columns='SalePrice', axis=1)", u'from sklearn.model_selection import cross_val_...ndard Deviation: \', rmse.std()\n    return rmse', u'from sklearn.metrics import mean_squared_error...   return np.sqrt(mean_squared_error(y, y_pred))', u'from sklearn.model_selection import Randomized...\n    display(best_model)\n    return best_model', u'from sklearn.model_selection import GridSearch...\n    display(best_model)\n    return best_model', u'#Import model\nfrom sklearn.tree import Decisi...RandomSearch(tree_reg, tree_hyper, X_PCA, y_PCA)', u'score_tree = rmse_CV(best_tree, X_train, y_train)', u'from sklearn.neighbors import KNeighborsRegres...RandomSearch(tree_reg, tree_hyper, X_PCA, y_PCA)', u'score_neigh = rmse_CV(best_neigh, X_train, y_train)', u'#import\nfrom sklearn.svm import LinearSVR\n\n...should scale better to large numbers of samples.', u'score_lin_svr = rmse_CV(best_lin_svr, X_train, y_train)', u"from sklearn.linear_model import LinearRegress...e\n\nbest_lin_reg = lin_reg\n\n#lin_reg.fit(X,y)", u'score_lin_reg = rmse_CV(best_lin_reg, X_train, y_train)', ...], 'KNeighborsRegressor': <class 'sklearn.neighbors.regression.KNeighborsRegressor'>, 'Lasso': <class 'sklearn.linear_model.coordinate_descent.Lasso'>, 'LinearRegression': <class 'sklearn.linear_model.base.LinearRegression'>, 'LinearSVR': <class 'sklearn.svm.classes.LinearSVR'>, ...}
        self.user_ns = {'C': [0.001, 0.01, 0.1, 1, 10, 100], 'DecisionTreeRegressor': <class 'sklearn.tree.tree.DecisionTreeRegressor'>, 'ElasticNet': <class 'sklearn.linear_model.coordinate_descent.ElasticNet'>, 'GridSearch': <function GridSearch>, 'GridSearchCV': <class 'sklearn.model_selection._search.GridSearchCV'>, 'In': ['', u"# Import libraries necessary for this project\...books\nget_ipython().magic(u'matplotlib inline')", u'#Load the saved file to verify\ntrain_data = p...processed_train_target.pkl")\n#display(y.head())', u'#Load the saved file to verify\ntrain_data = p...())\ndisplay(train_data[\'SalePrice\'].head(10))', u"#train samples\ny_train = train_data['SalePric...st = test_data.drop(columns='SalePrice', axis=1)", u'#Load the saved file to verify\ntest_data = pd...eprocessed_test_target.pkl")\n#display(y.head())', u'## Verify the shape\ndisplay(train_data.shape)...in_PCA_data.shape)\ndisplay(test_PCA_data.shape)', u"#train samples\ny_train = train_data['SalePric...st = test_data.drop(columns='SalePrice', axis=1)", u'from sklearn.model_selection import cross_val_...ndard Deviation: \', rmse.std()\n    return rmse', u'from sklearn.metrics import mean_squared_error...   return np.sqrt(mean_squared_error(y, y_pred))', u'from sklearn.model_selection import Randomized...\n    display(best_model)\n    return best_model', u'from sklearn.model_selection import GridSearch...\n    display(best_model)\n    return best_model', u'#Import model\nfrom sklearn.tree import Decisi...RandomSearch(tree_reg, tree_hyper, X_PCA, y_PCA)', u'score_tree = rmse_CV(best_tree, X_train, y_train)', u'from sklearn.neighbors import KNeighborsRegres...RandomSearch(tree_reg, tree_hyper, X_PCA, y_PCA)', u'score_neigh = rmse_CV(best_neigh, X_train, y_train)', u'#import\nfrom sklearn.svm import LinearSVR\n\n...should scale better to large numbers of samples.', u'score_lin_svr = rmse_CV(best_lin_svr, X_train, y_train)', u"from sklearn.linear_model import LinearRegress...e\n\nbest_lin_reg = lin_reg\n\n#lin_reg.fit(X,y)", u'score_lin_reg = rmse_CV(best_lin_reg, X_train, y_train)', ...], 'KNeighborsRegressor': <class 'sklearn.neighbors.regression.KNeighborsRegressor'>, 'Lasso': <class 'sklearn.linear_model.coordinate_descent.Lasso'>, 'LinearRegression': <class 'sklearn.linear_model.base.LinearRegression'>, 'LinearSVR': <class 'sklearn.svm.classes.LinearSVR'>, ...}
   2883             finally:
   2884                 # Reset our crash handler in place
   2885                 sys.excepthook = old_excepthook
   2886         except SystemExit as e:

...........................................................................
C:\Users\User\Desktop\Machine Learning ND\Capstone Project\Capstone Project\<ipython-input-24-585cc111cdd8> in <module>()
     14 percep_param = dict(penalty=penalty, alpha=alpha, n_jobs=n_jobs, class_weight=class_weight)
     15 
     16 
     17 #Apply Grid Search To Determine the best model
     18 s_time = time.time()
---> 19 best_percep= GridSearch(percep, percep_param, X_train, y_train)
     20 f_time = time.time()
     21 print "Training and Fine-Tune Time: {:3f}".format((s_time - f_time))
     22 
     23 

...........................................................................
C:\Users\User\Desktop\Machine Learning ND\Capstone Project\Capstone Project\<ipython-input-11-9ad707ef2880> in GridSearch(reg=Perceptron(alpha=0.0001, class_weight=None, eta0...ffle=True, tol=None, verbose=0, warm_start=False), parameters={'alpha': [1.0, 0.1, 0.01, 0.001, 0.0001, 0.0], 'class_weight': ['balanced', None], 'n_jobs': [-1], 'penalty': ['l1', 'l2', 'elasticnet']}, X=      Lot Area  Overall Qual  Overall Cond  Year... 0                 0  

[1897 rows x 287 columns], y=2259    0.206746
1615    0.204613
1241    0.1819...940
Name: SalePrice, Length: 1897, dtype: float64)
      1 from sklearn.model_selection import GridSearchCV
      2 
      3 def GridSearch(reg, parameters, X, y):
      4     GSCV = GridSearchCV(reg, parameters, verbose=0, cv=5, n_jobs=-1)
----> 5     best_model = GSCV.fit(X, y)
      6     display(best_model)
      7     return best_model

...........................................................................
C:\Users\User\Anaconda3\envs\py2\lib\site-packages\sklearn\model_selection\_search.py in fit(self=GridSearchCV(cv=5, error_score='raise',
       e...ain_score='warn',
       scoring=None, verbose=0), X=      Lot Area  Overall Qual  Overall Cond  Year... 0                 0  

[1897 rows x 287 columns], y=2259    0.206746
1615    0.204613
1241    0.1819...940
Name: SalePrice, Length: 1897, dtype: float64, groups=None, **fit_params={})
    634                                   return_train_score=self.return_train_score,
    635                                   return_n_test_samples=True,
    636                                   return_times=True, return_parameters=False,
    637                                   error_score=self.error_score)
    638           for parameters, (train, test) in product(candidate_params,
--> 639                                                    cv.split(X, y, groups)))
        cv.split = <bound method KFold.split of KFold(n_splits=5, random_state=None, shuffle=False)>
        X =       Lot Area  Overall Qual  Overall Cond  Year... 0                 0  

[1897 rows x 287 columns]
        y = 2259    0.206746
1615    0.204613
1241    0.1819...940
Name: SalePrice, Length: 1897, dtype: float64
        groups = None
    640 
    641         # if one choose to see train score, "out" will contain train score info
    642         if self.return_train_score:
    643             (train_score_dicts, test_score_dicts, test_sample_counts, fit_time,

...........................................................................
C:\Users\User\Anaconda3\envs\py2\lib\site-packages\sklearn\externals\joblib\parallel.py in __call__(self=Parallel(n_jobs=-1), iterable=<generator object <genexpr>>)
    784             if pre_dispatch == "all" or n_jobs == 1:
    785                 # The iterable was consumed all at once by the above for loop.
    786                 # No need to wait for async callbacks to trigger to
    787                 # consumption.
    788                 self._iterating = False
--> 789             self.retrieve()
        self.retrieve = <bound method Parallel.retrieve of Parallel(n_jobs=-1)>
    790             # Make sure that we get a last message telling us we are done
    791             elapsed_time = time.time() - self._start_time
    792             self._print('Done %3i out of %3i | elapsed: %s finished',
    793                         (len(self._output), len(self._output),

---------------------------------------------------------------------------
Sub-process traceback:
---------------------------------------------------------------------------
ValueError                                         Mon Mar 12 18:50:32 2018
PID: 8272        Python 2.7.14: C:\Users\User\Anaconda3\envs\py2\python.exe
...........................................................................
C:\Users\User\Anaconda3\envs\py2\lib\site-packages\sklearn\externals\joblib\parallel.py in __call__(self=<sklearn.externals.joblib.parallel.BatchedCalls object>)
    126     def __init__(self, iterator_slice):
    127         self.items = list(iterator_slice)
    128         self._size = len(self.items)
    129 
    130     def __call__(self):
--> 131         return [func(*args, **kwargs) for func, args, kwargs in self.items]
        func = <function _fit_and_score>
        args = (Perceptron(alpha=1.0, class_weight='balanced', e...ffle=True, tol=None, verbose=0, warm_start=False),       Lot Area  Overall Qual  Overall Cond  Year... 0                 0  

[1897 rows x 287 columns], 2259    0.206746
1615    0.204613
1241    0.1819...940
Name: SalePrice, Length: 1897, dtype: float64, {'score': <function _passthrough_scorer>}, array([ 380,  381,  382, ..., 1894, 1895, 1896]), array([  0,   1,   2,   3,   4,   5,   6,   7,  ..., 372, 373, 374, 375, 376,
       377, 378, 379]), 0, {'alpha': 1.0, 'class_weight': 'balanced', 'n_jobs': -1, 'penalty': 'l1'})
        kwargs = {'error_score': 'raise', 'fit_params': {}, 'return_n_test_samples': True, 'return_parameters': False, 'return_times': True, 'return_train_score': 'warn'}
        self.items = [(<function _fit_and_score>, (Perceptron(alpha=1.0, class_weight='balanced', e...ffle=True, tol=None, verbose=0, warm_start=False),       Lot Area  Overall Qual  Overall Cond  Year... 0                 0  

[1897 rows x 287 columns], 2259    0.206746
1615    0.204613
1241    0.1819...940
Name: SalePrice, Length: 1897, dtype: float64, {'score': <function _passthrough_scorer>}, array([ 380,  381,  382, ..., 1894, 1895, 1896]), array([  0,   1,   2,   3,   4,   5,   6,   7,  ..., 372, 373, 374, 375, 376,
       377, 378, 379]), 0, {'alpha': 1.0, 'class_weight': 'balanced', 'n_jobs': -1, 'penalty': 'l1'}), {'error_score': 'raise', 'fit_params': {}, 'return_n_test_samples': True, 'return_parameters': False, 'return_times': True, 'return_train_score': 'warn'})]
    132 
    133     def __len__(self):
    134         return self._size
    135 

...........................................................................
C:\Users\User\Anaconda3\envs\py2\lib\site-packages\sklearn\model_selection\_validation.py in _fit_and_score(estimator=Perceptron(alpha=1.0, class_weight='balanced', e...ffle=True, tol=None, verbose=0, warm_start=False), X=      Lot Area  Overall Qual  Overall Cond  Year... 0                 0  

[1897 rows x 287 columns], y=2259    0.206746
1615    0.204613
1241    0.1819...940
Name: SalePrice, Length: 1897, dtype: float64, scorer={'score': <function _passthrough_scorer>}, train=array([ 380,  381,  382, ..., 1894, 1895, 1896]), test=array([  0,   1,   2,   3,   4,   5,   6,   7,  ..., 372, 373, 374, 375, 376,
       377, 378, 379]), verbose=0, parameters={'alpha': 1.0, 'class_weight': 'balanced', 'n_jobs': -1, 'penalty': 'l1'}, fit_params={}, return_train_score='warn', return_parameters=False, return_n_test_samples=True, return_times=True, error_score='raise')
    453 
    454     try:
    455         if y_train is None:
    456             estimator.fit(X_train, **fit_params)
    457         else:
--> 458             estimator.fit(X_train, y_train, **fit_params)
        estimator.fit = <bound method Perceptron.fit of Perceptron(alpha...fle=True, tol=None, verbose=0, warm_start=False)>
        X_train =       Lot Area  Overall Qual  Overall Cond  Year... 0                 0  

[1517 rows x 287 columns]
        y_train = 697     0.134334
2446    0.299922
348     0.1640...940
Name: SalePrice, Length: 1517, dtype: float64
        fit_params = {}
    459 
    460     except Exception as e:
    461         # Note fit time as time until error
    462         fit_time = time.time() - start_time

...........................................................................
C:\Users\User\Anaconda3\envs\py2\lib\site-packages\sklearn\linear_model\stochastic_gradient.py in fit(self=Perceptron(alpha=1.0, class_weight='balanced', e...ffle=True, tol=None, verbose=0, warm_start=False), X=      Lot Area  Overall Qual  Overall Cond  Year... 0                 0  

[1517 rows x 287 columns], y=697     0.134334
2446    0.299922
348     0.1640...940
Name: SalePrice, Length: 1517, dtype: float64, coef_init=None, intercept_init=None, sample_weight=None)
    581         self : returns an instance of self.
    582         """
    583         return self._fit(X, y, alpha=self.alpha, C=1.0,
    584                          loss=self.loss, learning_rate=self.learning_rate,
    585                          coef_init=coef_init, intercept_init=intercept_init,
--> 586                          sample_weight=sample_weight)
        sample_weight = None
    587 
    588 
    589 class SGDClassifier(BaseSGDClassifier):
    590     """Linear classifiers (SVM, logistic regression, a.o.) with SGD training.

...........................................................................
C:\Users\User\Anaconda3\envs\py2\lib\site-packages\sklearn\linear_model\stochastic_gradient.py in _fit(self=Perceptron(alpha=1.0, class_weight='balanced', e...ffle=True, tol=None, verbose=0, warm_start=False), X=array([[0.02124481, 0.23489509, 0.4624497 , ...,....., 0.        , 0.        ,
        0.        ]]), y=array([0.13433433, 0.29992203, 0.1640804 , ..., 0.19548016, 0.16127689,
       0.13893983]), alpha=1.0, C=1.0, loss='perceptron', learning_rate='constant', coef_init=None, intercept_init=None, sample_weight=None)
    439 
    440         # Clear iteration count for multiple call to fit.
    441         self.t_ = 1.0
    442 
    443         self._partial_fit(X, y, alpha, C, loss, learning_rate, self._max_iter,
--> 444                           classes, sample_weight, coef_init, intercept_init)
        classes = array([0.        , 0.00057008, 0.02753011, ..., 0.33929943, 0.34458077,
       0.35229819])
        sample_weight = None
        coef_init = None
        intercept_init = None
    445 
    446         if (self._tol is not None and self._tol > -np.inf
    447                 and self.n_iter_ == self._max_iter):
    448             warnings.warn("Maximum number of iteration reached before "

...........................................................................
C:\Users\User\Anaconda3\envs\py2\lib\site-packages\sklearn\linear_model\stochastic_gradient.py in _partial_fit(self=Perceptron(alpha=1.0, class_weight='balanced', e...ffle=True, tol=None, verbose=0, warm_start=False), X=array([[0.02124481, 0.23489509, 0.4624497 , ...,....., 0.        , 0.        ,
        0.        ]]), y=array([0.13433433, 0.29992203, 0.1640804 , ..., 0.19548016, 0.16127689,
       0.13893983]), alpha=1.0, C=1.0, loss='perceptron', learning_rate='constant', max_iter=5, classes=array([0.        , 0.00057008, 0.02753011, ..., 0.33929943, 0.34458077,
       0.35229819]), sample_weight=None, coef_init=None, intercept_init=None)
    370                      coef_init, intercept_init):
    371         X, y = check_X_y(X, y, 'csr', dtype=np.float64, order="C")
    372 
    373         n_samples, n_features = X.shape
    374 
--> 375         _check_partial_fit_first_call(self, classes)
        self = Perceptron(alpha=1.0, class_weight='balanced', e...ffle=True, tol=None, verbose=0, warm_start=False)
        classes = array([0.        , 0.00057008, 0.02753011, ..., 0.33929943, 0.34458077,
       0.35229819])
    376 
    377         n_classes = self.classes_.shape[0]
    378 
    379         # Allocate datastructures from input arguments

...........................................................................
C:\Users\User\Anaconda3\envs\py2\lib\site-packages\sklearn\utils\multiclass.py in _check_partial_fit_first_call(clf=Perceptron(alpha=1.0, class_weight='balanced', e...ffle=True, tol=None, verbose=0, warm_start=False), classes=array([0.        , 0.00057008, 0.02753011, ..., 0.33929943, 0.34458077,
       0.35229819]))
    316                     "`classes=%r` is not the same as on last call "
    317                     "to partial_fit, was: %r" % (classes, clf.classes_))
    318 
    319         else:
    320             # This is the first call to partial_fit
--> 321             clf.classes_ = unique_labels(classes)
        clf.classes_ = undefined
        classes = array([0.        , 0.00057008, 0.02753011, ..., 0.33929943, 0.34458077,
       0.35229819])
    322             return True
    323 
    324     # classes is None and clf.classes_ has already previously been set:
    325     # nothing to do

...........................................................................
C:\Users\User\Anaconda3\envs\py2\lib\site-packages\sklearn\utils\multiclass.py in unique_labels(*ys=(array([0.        , 0.00057008, 0.02753011, ..., 0.33929943, 0.34458077,
       0.35229819]),))
     92                          "different numbers of labels")
     93 
     94     # Get the unique set of labels
     95     _unique_labels = _FN_UNIQUE_LABELS.get(label_type, None)
     96     if not _unique_labels:
---> 97         raise ValueError("Unknown label type: %s" % repr(ys))
        ys = (array([0.        , 0.00057008, 0.02753011, ..., 0.33929943, 0.34458077,
       0.35229819]),)
     98 
     99     ys_labels = set(chain.from_iterable(_unique_labels(y) for y in ys))
    100 
    101     # Check that we don't mix string type with number type

ValueError: Unknown label type: (array([0.        , 0.00057008, 0.02753011, ..., 0.33929943, 0.34458077,
       0.35229819]),)
___________________________________________________________________________

In [None]:
#score_percep = rmse_CV(best_percep, X_train, y_train)

### Gaussian Process Regression

> **NOTE:** The optimization failed

In [13]:
#from sklearn.gaussian_process import GaussianProcessRegressor

#Create Regressor
#gpr = GaussianProcessRegressor()

#Create Hyperparamter Search Space
#kernel = ['rbf', 'linear', 'poly', 'sigmoid']
#alpha = [1.0, 0.1, 0.01, 0.001, 0.0001, 0.0]
#optimizer = ['fmin_l_bfgs_b', None]

#Create hyperparameter options
#gpr_param = dict(alpha=alpha, optimizer=optimizer)


#Apply Grid Search To Determine the best model
#s_time = time.time()
#best_gpr= GridSearch(gpr, gpr_param, X_train, y_train)
#f_time = time.time()
#print "Training and Fine-Tune Time: {:3f}".format((s_time - f_time))

#gpr.fit(X,y)


JoblibLinAlgError: JoblibLinAlgError
___________________________________________________________________________
Multiprocessing exception:
...........................................................................
C:\Users\User\Anaconda3\envs\py2\lib\runpy.py in _run_module_as_main(mod_name='ipykernel_launcher', alter_argv=1)
    169     pkg_name = mod_name.rpartition('.')[0]
    170     main_globals = sys.modules["__main__"].__dict__
    171     if alter_argv:
    172         sys.argv[0] = fname
    173     return _run_code(code, main_globals, None,
--> 174                      "__main__", fname, loader, pkg_name)
        fname = r'C:\Users\User\Anaconda3\envs\py2\lib\site-packages\ipykernel_launcher.py'
        loader = <pkgutil.ImpLoader instance>
        pkg_name = ''
    175 
    176 def run_module(mod_name, init_globals=None,
    177                run_name=None, alter_sys=False):
    178     """Execute a module's code without importing it

...........................................................................
C:\Users\User\Anaconda3\envs\py2\lib\runpy.py in _run_code(code=<code object <module> at 0000000002527AB0, file ...lib\site-packages\ipykernel_launcher.py", line 5>, run_globals={'__builtins__': <module '__builtin__' (built-in)>, '__doc__': 'Entry point for launching an IPython kernel.\n\nTh...orts until\nafter removing the cwd from sys.path.\n', '__file__': r'C:\Users\User\Anaconda3\envs\py2\lib\site-packages\ipykernel_launcher.py', '__loader__': <pkgutil.ImpLoader instance>, '__name__': '__main__', '__package__': '', 'app': <module 'ipykernel.kernelapp' from 'C:\Users\Use...s\py2\lib\site-packages\ipykernel\kernelapp.pyc'>, 'sys': <module 'sys' (built-in)>}, init_globals=None, mod_name='__main__', mod_fname=r'C:\Users\User\Anaconda3\envs\py2\lib\site-packages\ipykernel_launcher.py', mod_loader=<pkgutil.ImpLoader instance>, pkg_name='')
     67         run_globals.update(init_globals)
     68     run_globals.update(__name__ = mod_name,
     69                        __file__ = mod_fname,
     70                        __loader__ = mod_loader,
     71                        __package__ = pkg_name)
---> 72     exec code in run_globals
        code = <code object <module> at 0000000002527AB0, file ...lib\site-packages\ipykernel_launcher.py", line 5>
        run_globals = {'__builtins__': <module '__builtin__' (built-in)>, '__doc__': 'Entry point for launching an IPython kernel.\n\nTh...orts until\nafter removing the cwd from sys.path.\n', '__file__': r'C:\Users\User\Anaconda3\envs\py2\lib\site-packages\ipykernel_launcher.py', '__loader__': <pkgutil.ImpLoader instance>, '__name__': '__main__', '__package__': '', 'app': <module 'ipykernel.kernelapp' from 'C:\Users\Use...s\py2\lib\site-packages\ipykernel\kernelapp.pyc'>, 'sys': <module 'sys' (built-in)>}
     73     return run_globals
     74 
     75 def _run_module_code(code, init_globals=None,
     76                     mod_name=None, mod_fname=None,

...........................................................................
C:\Users\User\Anaconda3\envs\py2\lib\site-packages\ipykernel_launcher.py in <module>()
     11     # This is added back by InteractiveShellApp.init_path()
     12     if sys.path[0] == '':
     13         del sys.path[0]
     14 
     15     from ipykernel import kernelapp as app
---> 16     app.launch_new_instance()

...........................................................................
C:\Users\User\Anaconda3\envs\py2\lib\site-packages\traitlets\config\application.py in launch_instance(cls=<class 'ipykernel.kernelapp.IPKernelApp'>, argv=None, **kwargs={})
    653 
    654         If a global instance already exists, this reinitializes and starts it
    655         """
    656         app = cls.instance(**kwargs)
    657         app.initialize(argv)
--> 658         app.start()
        app.start = <bound method IPKernelApp.start of <ipykernel.kernelapp.IPKernelApp object>>
    659 
    660 #-----------------------------------------------------------------------------
    661 # utility functions, for convenience
    662 #-----------------------------------------------------------------------------

...........................................................................
C:\Users\User\Anaconda3\envs\py2\lib\site-packages\ipykernel\kernelapp.py in start(self=<ipykernel.kernelapp.IPKernelApp object>)
    481         if self.poller is not None:
    482             self.poller.start()
    483         self.kernel.start()
    484         self.io_loop = ioloop.IOLoop.current()
    485         try:
--> 486             self.io_loop.start()
        self.io_loop.start = <bound method ZMQIOLoop.start of <zmq.eventloop.ioloop.ZMQIOLoop object>>
    487         except KeyboardInterrupt:
    488             pass
    489 
    490 launch_new_instance = IPKernelApp.launch_instance

...........................................................................
C:\Users\User\Anaconda3\envs\py2\lib\site-packages\zmq\eventloop\ioloop.py in start(self=<zmq.eventloop.ioloop.ZMQIOLoop object>)
    172             )
    173         return loop
    174     
    175     def start(self):
    176         try:
--> 177             super(ZMQIOLoop, self).start()
        self.start = <bound method ZMQIOLoop.start of <zmq.eventloop.ioloop.ZMQIOLoop object>>
    178         except ZMQError as e:
    179             if e.errno == ETERM:
    180                 # quietly return on ETERM
    181                 pass

...........................................................................
C:\Users\User\Anaconda3\envs\py2\lib\site-packages\tornado\ioloop.py in start(self=<zmq.eventloop.ioloop.ZMQIOLoop object>)
    883                 self._events.update(event_pairs)
    884                 while self._events:
    885                     fd, events = self._events.popitem()
    886                     try:
    887                         fd_obj, handler_func = self._handlers[fd]
--> 888                         handler_func(fd_obj, events)
        handler_func = <function null_wrapper>
        fd_obj = <zmq.sugar.socket.Socket object>
        events = 1
    889                     except (OSError, IOError) as e:
    890                         if errno_from_exception(e) == errno.EPIPE:
    891                             # Happens when the client closes the connection
    892                             pass

...........................................................................
C:\Users\User\Anaconda3\envs\py2\lib\site-packages\tornado\stack_context.py in null_wrapper(*args=(<zmq.sugar.socket.Socket object>, 1), **kwargs={})
    272         # Fast path when there are no active contexts.
    273         def null_wrapper(*args, **kwargs):
    274             try:
    275                 current_state = _state.contexts
    276                 _state.contexts = cap_contexts[0]
--> 277                 return fn(*args, **kwargs)
        args = (<zmq.sugar.socket.Socket object>, 1)
        kwargs = {}
    278             finally:
    279                 _state.contexts = current_state
    280         null_wrapper._wrapped = True
    281         return null_wrapper

...........................................................................
C:\Users\User\Anaconda3\envs\py2\lib\site-packages\zmq\eventloop\zmqstream.py in _handle_events(self=<zmq.eventloop.zmqstream.ZMQStream object>, fd=<zmq.sugar.socket.Socket object>, events=1)
    435             # dispatch events:
    436             if events & IOLoop.ERROR:
    437                 gen_log.error("got POLLERR event on ZMQStream, which doesn't make sense")
    438                 return
    439             if events & IOLoop.READ:
--> 440                 self._handle_recv()
        self._handle_recv = <bound method ZMQStream._handle_recv of <zmq.eventloop.zmqstream.ZMQStream object>>
    441                 if not self.socket:
    442                     return
    443             if events & IOLoop.WRITE:
    444                 self._handle_send()

...........................................................................
C:\Users\User\Anaconda3\envs\py2\lib\site-packages\zmq\eventloop\zmqstream.py in _handle_recv(self=<zmq.eventloop.zmqstream.ZMQStream object>)
    467                 gen_log.error("RECV Error: %s"%zmq.strerror(e.errno))
    468         else:
    469             if self._recv_callback:
    470                 callback = self._recv_callback
    471                 # self._recv_callback = None
--> 472                 self._run_callback(callback, msg)
        self._run_callback = <bound method ZMQStream._run_callback of <zmq.eventloop.zmqstream.ZMQStream object>>
        callback = <function null_wrapper>
        msg = [<zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>]
    473                 
    474         # self.update_state()
    475         
    476 

...........................................................................
C:\Users\User\Anaconda3\envs\py2\lib\site-packages\zmq\eventloop\zmqstream.py in _run_callback(self=<zmq.eventloop.zmqstream.ZMQStream object>, callback=<function null_wrapper>, *args=([<zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>],), **kwargs={})
    409         close our socket."""
    410         try:
    411             # Use a NullContext to ensure that all StackContexts are run
    412             # inside our blanket exception handler rather than outside.
    413             with stack_context.NullContext():
--> 414                 callback(*args, **kwargs)
        callback = <function null_wrapper>
        args = ([<zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>],)
        kwargs = {}
    415         except:
    416             gen_log.error("Uncaught exception, closing connection.",
    417                           exc_info=True)
    418             # Close the socket on an uncaught exception from a user callback

...........................................................................
C:\Users\User\Anaconda3\envs\py2\lib\site-packages\tornado\stack_context.py in null_wrapper(*args=([<zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>],), **kwargs={})
    272         # Fast path when there are no active contexts.
    273         def null_wrapper(*args, **kwargs):
    274             try:
    275                 current_state = _state.contexts
    276                 _state.contexts = cap_contexts[0]
--> 277                 return fn(*args, **kwargs)
        args = ([<zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>],)
        kwargs = {}
    278             finally:
    279                 _state.contexts = current_state
    280         null_wrapper._wrapped = True
    281         return null_wrapper

...........................................................................
C:\Users\User\Anaconda3\envs\py2\lib\site-packages\ipykernel\kernelbase.py in dispatcher(msg=[<zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>])
    278         if self.control_stream:
    279             self.control_stream.on_recv(self.dispatch_control, copy=False)
    280 
    281         def make_dispatcher(stream):
    282             def dispatcher(msg):
--> 283                 return self.dispatch_shell(stream, msg)
        msg = [<zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>]
    284             return dispatcher
    285 
    286         for s in self.shell_streams:
    287             s.on_recv(make_dispatcher(s), copy=False)

...........................................................................
C:\Users\User\Anaconda3\envs\py2\lib\site-packages\ipykernel\kernelbase.py in dispatch_shell(self=<ipykernel.ipkernel.IPythonKernel object>, stream=<zmq.eventloop.zmqstream.ZMQStream object>, msg={'buffers': [], 'content': {u'allow_stdin': True, u'code': u'from sklearn.gaussian_process import GaussianP...f}".format((s_time - f_time))\n\n#gpr.fit(X,y)\n', u'silent': False, u'stop_on_error': True, u'store_history': True, u'user_expressions': {}}, 'header': {u'date': datetime.datetime(2018, 3, 27, 1, 56, 17, 869000, tzinfo=tzutc()), u'msg_id': u'447841c7143445a08d1c3654877e0129', u'msg_type': u'execute_request', u'session': u'3f5f6fd3257847f58804872a628c1182', u'username': u'username', u'version': u'5.2'}, 'metadata': {}, 'msg_id': u'447841c7143445a08d1c3654877e0129', 'msg_type': u'execute_request', 'parent_header': {}})
    228             self.log.warn("Unknown message type: %r", msg_type)
    229         else:
    230             self.log.debug("%s: %s", msg_type, msg)
    231             self.pre_handler_hook()
    232             try:
--> 233                 handler(stream, idents, msg)
        handler = <bound method IPythonKernel.execute_request of <ipykernel.ipkernel.IPythonKernel object>>
        stream = <zmq.eventloop.zmqstream.ZMQStream object>
        idents = ['3f5f6fd3257847f58804872a628c1182']
        msg = {'buffers': [], 'content': {u'allow_stdin': True, u'code': u'from sklearn.gaussian_process import GaussianP...f}".format((s_time - f_time))\n\n#gpr.fit(X,y)\n', u'silent': False, u'stop_on_error': True, u'store_history': True, u'user_expressions': {}}, 'header': {u'date': datetime.datetime(2018, 3, 27, 1, 56, 17, 869000, tzinfo=tzutc()), u'msg_id': u'447841c7143445a08d1c3654877e0129', u'msg_type': u'execute_request', u'session': u'3f5f6fd3257847f58804872a628c1182', u'username': u'username', u'version': u'5.2'}, 'metadata': {}, 'msg_id': u'447841c7143445a08d1c3654877e0129', 'msg_type': u'execute_request', 'parent_header': {}}
    234             except Exception:
    235                 self.log.error("Exception in message handler:", exc_info=True)
    236             finally:
    237                 self.post_handler_hook()

...........................................................................
C:\Users\User\Anaconda3\envs\py2\lib\site-packages\ipykernel\kernelbase.py in execute_request(self=<ipykernel.ipkernel.IPythonKernel object>, stream=<zmq.eventloop.zmqstream.ZMQStream object>, ident=['3f5f6fd3257847f58804872a628c1182'], parent={'buffers': [], 'content': {u'allow_stdin': True, u'code': u'from sklearn.gaussian_process import GaussianP...f}".format((s_time - f_time))\n\n#gpr.fit(X,y)\n', u'silent': False, u'stop_on_error': True, u'store_history': True, u'user_expressions': {}}, 'header': {u'date': datetime.datetime(2018, 3, 27, 1, 56, 17, 869000, tzinfo=tzutc()), u'msg_id': u'447841c7143445a08d1c3654877e0129', u'msg_type': u'execute_request', u'session': u'3f5f6fd3257847f58804872a628c1182', u'username': u'username', u'version': u'5.2'}, 'metadata': {}, 'msg_id': u'447841c7143445a08d1c3654877e0129', 'msg_type': u'execute_request', 'parent_header': {}})
    394         if not silent:
    395             self.execution_count += 1
    396             self._publish_execute_input(code, parent, self.execution_count)
    397 
    398         reply_content = self.do_execute(code, silent, store_history,
--> 399                                         user_expressions, allow_stdin)
        user_expressions = {}
        allow_stdin = True
    400 
    401         # Flush output before sending the reply.
    402         sys.stdout.flush()
    403         sys.stderr.flush()

...........................................................................
C:\Users\User\Anaconda3\envs\py2\lib\site-packages\ipykernel\ipkernel.py in do_execute(self=<ipykernel.ipkernel.IPythonKernel object>, code=u'from sklearn.gaussian_process import GaussianP...f}".format((s_time - f_time))\n\n#gpr.fit(X,y)\n', silent=False, store_history=True, user_expressions={}, allow_stdin=True)
    203 
    204         self._forward_input(allow_stdin)
    205 
    206         reply_content = {}
    207         try:
--> 208             res = shell.run_cell(code, store_history=store_history, silent=silent)
        res = undefined
        shell.run_cell = <bound method ZMQInteractiveShell.run_cell of <ipykernel.zmqshell.ZMQInteractiveShell object>>
        code = u'from sklearn.gaussian_process import GaussianP...f}".format((s_time - f_time))\n\n#gpr.fit(X,y)\n'
        store_history = True
        silent = False
    209         finally:
    210             self._restore_input()
    211 
    212         if res.error_before_exec is not None:

...........................................................................
C:\Users\User\Anaconda3\envs\py2\lib\site-packages\ipykernel\zmqshell.py in run_cell(self=<ipykernel.zmqshell.ZMQInteractiveShell object>, *args=(u'from sklearn.gaussian_process import GaussianP...f}".format((s_time - f_time))\n\n#gpr.fit(X,y)\n',), **kwargs={'silent': False, 'store_history': True})
    532             )
    533         self.payload_manager.write_payload(payload)
    534 
    535     def run_cell(self, *args, **kwargs):
    536         self._last_traceback = None
--> 537         return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
        self.run_cell = <bound method ZMQInteractiveShell.run_cell of <ipykernel.zmqshell.ZMQInteractiveShell object>>
        args = (u'from sklearn.gaussian_process import GaussianP...f}".format((s_time - f_time))\n\n#gpr.fit(X,y)\n',)
        kwargs = {'silent': False, 'store_history': True}
    538 
    539     def _showtraceback(self, etype, evalue, stb):
    540         # try to preserve ordering of tracebacks and print statements
    541         sys.stdout.flush()

...........................................................................
C:\Users\User\Anaconda3\envs\py2\lib\site-packages\IPython\core\interactiveshell.py in run_cell(self=<ipykernel.zmqshell.ZMQInteractiveShell object>, raw_cell=u'from sklearn.gaussian_process import GaussianP...f}".format((s_time - f_time))\n\n#gpr.fit(X,y)\n', store_history=True, silent=False, shell_futures=True)
   2713                 self.displayhook.exec_result = result
   2714 
   2715                 # Execute the user code
   2716                 interactivity = "none" if silent else self.ast_node_interactivity
   2717                 has_raised = self.run_ast_nodes(code_ast.body, cell_name,
-> 2718                    interactivity=interactivity, compiler=compiler, result=result)
        interactivity = 'last_expr'
        compiler = <IPython.core.compilerop.CachingCompiler instance>
   2719                 
   2720                 self.last_execution_succeeded = not has_raised
   2721 
   2722                 # Reset this so later displayed values do not modify the

...........................................................................
C:\Users\User\Anaconda3\envs\py2\lib\site-packages\IPython\core\interactiveshell.py in run_ast_nodes(self=<ipykernel.zmqshell.ZMQInteractiveShell object>, nodelist=[<_ast.ImportFrom object>, <_ast.Assign object>, <_ast.Assign object>, <_ast.Assign object>, <_ast.Assign object>, <_ast.Assign object>, <_ast.Assign object>, <_ast.Assign object>, <_ast.Print object>], cell_name='<ipython-input-13-62b6638ffeb1>', interactivity='none', compiler=<IPython.core.compilerop.CachingCompiler instance>, result=<ExecutionResult object at b86b2b0, execution_co..._before_exec=None error_in_exec=None result=None>)
   2817 
   2818         try:
   2819             for i, node in enumerate(to_run_exec):
   2820                 mod = ast.Module([node])
   2821                 code = compiler(mod, cell_name, "exec")
-> 2822                 if self.run_code(code, result):
        self.run_code = <bound method ZMQInteractiveShell.run_code of <ipykernel.zmqshell.ZMQInteractiveShell object>>
        code = <code object <module> at 000000000B37BC30, file "<ipython-input-13-62b6638ffeb1>", line 17>
        result = <ExecutionResult object at b86b2b0, execution_co..._before_exec=None error_in_exec=None result=None>
   2823                     return True
   2824 
   2825             for i, node in enumerate(to_run_interactive):
   2826                 mod = ast.Interactive([node])

...........................................................................
C:\Users\User\Anaconda3\envs\py2\lib\site-packages\IPython\core\interactiveshell.py in run_code(self=<ipykernel.zmqshell.ZMQInteractiveShell object>, code_obj=<code object <module> at 000000000B37BC30, file "<ipython-input-13-62b6638ffeb1>", line 17>, result=<ExecutionResult object at b86b2b0, execution_co..._before_exec=None error_in_exec=None result=None>)
   2877         outflag = 1  # happens in more places, so it's easier as default
   2878         try:
   2879             try:
   2880                 self.hooks.pre_run_code_hook()
   2881                 #rprint('Running code', repr(code_obj)) # dbg
-> 2882                 exec(code_obj, self.user_global_ns, self.user_ns)
        code_obj = <code object <module> at 000000000B37BC30, file "<ipython-input-13-62b6638ffeb1>", line 17>
        self.user_global_ns = {'GaussianProcessRegressor': <class 'sklearn.gaussian_process.gpr.GaussianProcessRegressor'>, 'GridSearch': <function GridSearch>, 'GridSearchCV': <class 'sklearn.model_selection._search.GridSearchCV'>, 'Id': 0          1
1          2
2          3
3        ...1459    1460
Name: Id, Length: 1460, dtype: int64, 'In': ['', u"# Import libraries necessary for this project\...books\nget_ipython().magic(u'matplotlib inline')", u'#Load the saved file to verify\ntrain_data = p...s=1, inplace=True)\n\ndisplay(train_data.head())', u'#Split TRAINING SET into train and test sets\n...n\ndisplay(y_train.shape)\ndisplay(y_test.shape)', u'from sklearn.model_selection import cross_val_...\n    print \'Standard Deviation: \', rmse.std()', u'from sklearn.metrics import mean_squared_error...   return np.sqrt(mean_squared_error(y, y_pred))', u'from sklearn.gaussian_process import GaussianP...:3f}".format((s_time - f_time))\n\n#gpr.fit(X,y)', u"# Import libraries necessary for this project\...books\nget_ipython().magic(u'matplotlib inline')", u'#Load the saved file to verify\ntrain_data = p...s=1, inplace=True)\n\ndisplay(train_data.head())', u'#Split TRAINING SET into train and test sets\n...n\ndisplay(y_train.shape)\ndisplay(y_test.shape)', u'from sklearn.model_selection import cross_val_...\n    print \'Standard Deviation: \', rmse.std()', u'from sklearn.metrics import mean_squared_error...   return np.sqrt(mean_squared_error(y, y_pred))', u'from sklearn.model_selection import GridSearch..._params_)\n    return best_model.best_estimator_', u'from sklearn.gaussian_process import GaussianP...:3f}".format((s_time - f_time))\n\n#gpr.fit(X,y)'], 'Out': {}, 'X_test':       MSSubClass   LotArea  OverallQual  Overall...                    0  

[482 rows x 220 columns], 'X_train':       MSSubClass    LotArea  OverallQual  Overal...                    0  

[978 rows x 220 columns], '_': '', '__': '', ...}
        self.user_ns = {'GaussianProcessRegressor': <class 'sklearn.gaussian_process.gpr.GaussianProcessRegressor'>, 'GridSearch': <function GridSearch>, 'GridSearchCV': <class 'sklearn.model_selection._search.GridSearchCV'>, 'Id': 0          1
1          2
2          3
3        ...1459    1460
Name: Id, Length: 1460, dtype: int64, 'In': ['', u"# Import libraries necessary for this project\...books\nget_ipython().magic(u'matplotlib inline')", u'#Load the saved file to verify\ntrain_data = p...s=1, inplace=True)\n\ndisplay(train_data.head())', u'#Split TRAINING SET into train and test sets\n...n\ndisplay(y_train.shape)\ndisplay(y_test.shape)', u'from sklearn.model_selection import cross_val_...\n    print \'Standard Deviation: \', rmse.std()', u'from sklearn.metrics import mean_squared_error...   return np.sqrt(mean_squared_error(y, y_pred))', u'from sklearn.gaussian_process import GaussianP...:3f}".format((s_time - f_time))\n\n#gpr.fit(X,y)', u"# Import libraries necessary for this project\...books\nget_ipython().magic(u'matplotlib inline')", u'#Load the saved file to verify\ntrain_data = p...s=1, inplace=True)\n\ndisplay(train_data.head())', u'#Split TRAINING SET into train and test sets\n...n\ndisplay(y_train.shape)\ndisplay(y_test.shape)', u'from sklearn.model_selection import cross_val_...\n    print \'Standard Deviation: \', rmse.std()', u'from sklearn.metrics import mean_squared_error...   return np.sqrt(mean_squared_error(y, y_pred))', u'from sklearn.model_selection import GridSearch..._params_)\n    return best_model.best_estimator_', u'from sklearn.gaussian_process import GaussianP...:3f}".format((s_time - f_time))\n\n#gpr.fit(X,y)'], 'Out': {}, 'X_test':       MSSubClass   LotArea  OverallQual  Overall...                    0  

[482 rows x 220 columns], 'X_train':       MSSubClass    LotArea  OverallQual  Overal...                    0  

[978 rows x 220 columns], '_': '', '__': '', ...}
   2883             finally:
   2884                 # Reset our crash handler in place
   2885                 sys.excepthook = old_excepthook
   2886         except SystemExit as e:

...........................................................................
C:\Users\User\Desktop\Machine Learning ND\Capstone Project\Capstone Project\<ipython-input-13-62b6638ffeb1> in <module>()
     12 gpr_param = dict(alpha=alpha, optimizer=optimizer)
     13 
     14 
     15 #Apply Grid Search To Determine the best model
     16 s_time = time.time()
---> 17 best_gpr= GridSearch(gpr, gpr_param, X_train, y_train)
     18 f_time = time.time()
     19 print "Training and Fine-Tune Time: {:3f}".format((s_time - f_time))
     20 
     21 #gpr.fit(X,y)

...........................................................................
C:\Users\User\Desktop\Machine Learning ND\Capstone Project\Capstone Project\<ipython-input-12-366611827180> in GridSearch(reg=GaussianProcessRegressor(alpha=1e-10, copy_X_tra...    optimizer='fmin_l_bfgs_b', random_state=None), parameters={'alpha': [1.0, 0.1, 0.01, 0.001, 0.0001, 0.0], 'optimizer': ['fmin_l_bfgs_b', None]}, X=      MSSubClass    LotArea  OverallQual  Overal...                    0  

[978 rows x 220 columns], y=615     11.831386
613     11.898195
1303    12.3...6816
Name: SalePrice, Length: 978, dtype: float64)
      1 from sklearn.model_selection import GridSearchCV
      2 
      3 def GridSearch(reg, parameters, X, y):
      4     GSCV = GridSearchCV(reg, parameters, verbose=0, cv=5, n_jobs=-1)
----> 5     best_model = GSCV.fit(X, y)
      6     display(best_model)
      7     display(best_model.best_params_)
      8     return best_model.best_estimator_

...........................................................................
C:\Users\User\Anaconda3\envs\py2\lib\site-packages\sklearn\model_selection\_search.py in fit(self=GridSearchCV(cv=5, error_score='raise',
       e...ain_score='warn',
       scoring=None, verbose=0), X=      MSSubClass    LotArea  OverallQual  Overal...                    0  

[978 rows x 220 columns], y=615     11.831386
613     11.898195
1303    12.3...6816
Name: SalePrice, Length: 978, dtype: float64, groups=None, **fit_params={})
    634                                   return_train_score=self.return_train_score,
    635                                   return_n_test_samples=True,
    636                                   return_times=True, return_parameters=False,
    637                                   error_score=self.error_score)
    638           for parameters, (train, test) in product(candidate_params,
--> 639                                                    cv.split(X, y, groups)))
        cv.split = <bound method KFold.split of KFold(n_splits=5, random_state=None, shuffle=False)>
        X =       MSSubClass    LotArea  OverallQual  Overal...                    0  

[978 rows x 220 columns]
        y = 615     11.831386
613     11.898195
1303    12.3...6816
Name: SalePrice, Length: 978, dtype: float64
        groups = None
    640 
    641         # if one choose to see train score, "out" will contain train score info
    642         if self.return_train_score:
    643             (train_score_dicts, test_score_dicts, test_sample_counts, fit_time,

...........................................................................
C:\Users\User\Anaconda3\envs\py2\lib\site-packages\sklearn\externals\joblib\parallel.py in __call__(self=Parallel(n_jobs=-1), iterable=<generator object <genexpr>>)
    784             if pre_dispatch == "all" or n_jobs == 1:
    785                 # The iterable was consumed all at once by the above for loop.
    786                 # No need to wait for async callbacks to trigger to
    787                 # consumption.
    788                 self._iterating = False
--> 789             self.retrieve()
        self.retrieve = <bound method Parallel.retrieve of Parallel(n_jobs=-1)>
    790             # Make sure that we get a last message telling us we are done
    791             elapsed_time = time.time() - self._start_time
    792             self._print('Done %3i out of %3i | elapsed: %s finished',
    793                         (len(self._output), len(self._output),

---------------------------------------------------------------------------
Sub-process traceback:
---------------------------------------------------------------------------
LinAlgError                                        Mon Mar 26 22:56:33 2018
PID: 6872        Python 2.7.14: C:\Users\User\Anaconda3\envs\py2\python.exe
...........................................................................
C:\Users\User\Anaconda3\envs\py2\lib\site-packages\sklearn\externals\joblib\parallel.py in __call__(self=<sklearn.externals.joblib.parallel.BatchedCalls object>)
    126     def __init__(self, iterator_slice):
    127         self.items = list(iterator_slice)
    128         self._size = len(self.items)
    129 
    130     def __call__(self):
--> 131         return [func(*args, **kwargs) for func, args, kwargs in self.items]
        func = <function _fit_and_score>
        args = (GaussianProcessRegressor(alpha=0.0, copy_X_train...    optimizer='fmin_l_bfgs_b', random_state=None),       MSSubClass    LotArea  OverallQual  Overal...                    0  

[978 rows x 220 columns], 615     11.831386
613     11.898195
1303    12.3...6816
Name: SalePrice, Length: 978, dtype: float64, {'score': <function _passthrough_scorer>}, array([196, 197, 198, 199, 200, 201, 202, 203, 2..., 970, 971, 972, 973, 974, 975,
       976, 977]), array([  0,   1,   2,   3,   4,   5,   6,   7,  ..., 188, 189, 190, 191, 192, 193, 194,
       195]), 0, {'alpha': 0.0, 'optimizer': 'fmin_l_bfgs_b'})
        kwargs = {'error_score': 'raise', 'fit_params': {}, 'return_n_test_samples': True, 'return_parameters': False, 'return_times': True, 'return_train_score': 'warn'}
        self.items = [(<function _fit_and_score>, (GaussianProcessRegressor(alpha=0.0, copy_X_train...    optimizer='fmin_l_bfgs_b', random_state=None),       MSSubClass    LotArea  OverallQual  Overal...                    0  

[978 rows x 220 columns], 615     11.831386
613     11.898195
1303    12.3...6816
Name: SalePrice, Length: 978, dtype: float64, {'score': <function _passthrough_scorer>}, array([196, 197, 198, 199, 200, 201, 202, 203, 2..., 970, 971, 972, 973, 974, 975,
       976, 977]), array([  0,   1,   2,   3,   4,   5,   6,   7,  ..., 188, 189, 190, 191, 192, 193, 194,
       195]), 0, {'alpha': 0.0, 'optimizer': 'fmin_l_bfgs_b'}), {'error_score': 'raise', 'fit_params': {}, 'return_n_test_samples': True, 'return_parameters': False, 'return_times': True, 'return_train_score': 'warn'})]
    132 
    133     def __len__(self):
    134         return self._size
    135 

...........................................................................
C:\Users\User\Anaconda3\envs\py2\lib\site-packages\sklearn\model_selection\_validation.py in _fit_and_score(estimator=GaussianProcessRegressor(alpha=0.0, copy_X_train...    optimizer='fmin_l_bfgs_b', random_state=None), X=      MSSubClass    LotArea  OverallQual  Overal...                    0  

[978 rows x 220 columns], y=615     11.831386
613     11.898195
1303    12.3...6816
Name: SalePrice, Length: 978, dtype: float64, scorer={'score': <function _passthrough_scorer>}, train=array([196, 197, 198, 199, 200, 201, 202, 203, 2..., 970, 971, 972, 973, 974, 975,
       976, 977]), test=array([  0,   1,   2,   3,   4,   5,   6,   7,  ..., 188, 189, 190, 191, 192, 193, 194,
       195]), verbose=0, parameters={'alpha': 0.0, 'optimizer': 'fmin_l_bfgs_b'}, fit_params={}, return_train_score='warn', return_parameters=False, return_n_test_samples=True, return_times=True, error_score='raise')
    453 
    454     try:
    455         if y_train is None:
    456             estimator.fit(X_train, **fit_params)
    457         else:
--> 458             estimator.fit(X_train, y_train, **fit_params)
        estimator.fit = <bound method GaussianProcessRegressor.fit of Ga...   optimizer='fmin_l_bfgs_b', random_state=None)>
        X_train =       MSSubClass    LotArea  OverallQual  Overal...                    0  

[782 rows x 220 columns]
        y_train = 923     12.170451
182     11.695255
987     12.8...6816
Name: SalePrice, Length: 782, dtype: float64
        fit_params = {}
    459 
    460     except Exception as e:
    461         # Note fit time as time until error
    462         fit_time = time.time() - start_time

...........................................................................
C:\Users\User\Anaconda3\envs\py2\lib\site-packages\sklearn\gaussian_process\gpr.py in fit(self=GaussianProcessRegressor(alpha=0.0, copy_X_train...    optimizer='fmin_l_bfgs_b', random_state=None), X=array([[4.79579055, 8.9888205 , 6.        , ...,....., 0.        , 1.        ,
        0.        ]]), y=array([12.17045065, 11.69525536, 12.88712953, 11...6, 11.6526961 ,
       12.15452142, 12.06681633]))
    242         # Precompute quantities required for predictions which are independent
    243         # of actual query points
    244         K = self.kernel_(self.X_train_)
    245         K[np.diag_indices_from(K)] += self.alpha
    246         try:
--> 247             self.L_ = cholesky(K, lower=True)  # Line 2
        self.L_ = undefined
        K = array([[1., 0., 0., ..., 0., 0., 0.],
       [0...., 1., 0.],
       [0., 0., 0., ..., 0., 0., 1.]])
    248         except np.linalg.LinAlgError as exc:
    249             exc.args = ("The kernel, %s, is not returning a "
    250                         "positive definite matrix. Try gradually "
    251                         "increasing the 'alpha' parameter of your "

...........................................................................
C:\Users\User\Anaconda3\envs\py2\lib\site-packages\scipy\linalg\decomp_cholesky.py in cholesky(a=array([[1., 0., 0., ..., 0., 0., 0.],
       [0...., 1., 0.],
       [0., 0., 0., ..., 0., 0., 1.]]), lower=True, overwrite_a=False, check_finite=True)
     86     array([[ 1.+0.j,  0.-2.j],
     87            [ 0.+2.j,  5.+0.j]])
     88 
     89     """
     90     c, lower = _cholesky(a, lower=lower, overwrite_a=overwrite_a, clean=True,
---> 91                          check_finite=check_finite)
        check_finite = True
     92     return c
     93 
     94 
     95 def cho_factor(a, lower=False, overwrite_a=False, check_finite=True):

...........................................................................
C:\Users\User\Anaconda3\envs\py2\lib\site-packages\scipy\linalg\decomp_cholesky.py in _cholesky(a=array([[1., 0., 0., ..., 0., 0., 0.],
       [0...., 1., 0.],
       [0., 0., 0., ..., 0., 0., 1.]]), lower=True, overwrite_a=False, clean=True, check_finite=True)
     35     overwrite_a = overwrite_a or _datacopied(a1, a)
     36     potrf, = get_lapack_funcs(('potrf',), (a1,))
     37     c, info = potrf(a1, lower=lower, overwrite_a=overwrite_a, clean=clean)
     38     if info > 0:
     39         raise LinAlgError("%d-th leading minor of the array is not positive "
---> 40                           "definite" % info)
        info = 721
     41     if info < 0:
     42         raise ValueError('LAPACK reported an illegal value in {}-th argument'
     43                          'on entry to "POTRF".'.format(-info))
     44     return c, lower

LinAlgError: ("The kernel, 1**2 * RBF(length_scale=1), is not returning a positive definite matrix. Try gradually increasing the 'alpha' parameter of your GaussianProcessRegressor estimator.", '721-th leading minor of the array is not positive definite')
___________________________________________________________________________

In [None]:
#score_gpr = rmse_CV(best_gpr, X_test, y_test)

### Gradient Boosting Regressor

In [20]:
from sklearn.ensemble import GradientBoostingRegressor

#Create Regressor
GBR = GradientBoostingRegressor()

#Create Hyperparamter Search Space
loss = ['ls', 'lad', 'huber', 'quantile']
criterion = ['mse']
alpha = [0.9, 0.5, 0.1, 0.05, 0.01, 0.001, 0.0001]
max_features = ['auto', 'sqrt', 'log2', None]
max_depth = [2, 4, 6, 8, 10, 12]

#Create hyperparameter options
GBR_param = dict(loss=loss,criterion=criterion, alpha=alpha, max_features=max_features, max_depth=max_depth)

#Apply Grid Search To Determine the best model
s_time = time.time()
best_GBR= GridSearch(GBR, GBR_param, X_train, y_train)
f_time = time.time()
print "Training and Fine-Tune Time: {:3f}".format((s_time - f_time))



GridSearchCV(cv=5, error_score='raise',
       estimator=GradientBoostingRegressor(alpha=0.9, criterion='friedman_mse', init=None,
             learning_rate=0.1, loss='ls', max_depth=3, max_features=None,
             max_leaf_nodes=None, min_impurity_decrease=0.0,
             min_impurity_split=None, min_samples_leaf=1,
             min_samples_split=2, min_weight_fraction_leaf=0.0,
             n_estimators=100, presort='auto', random_state=None,
             subsample=1.0, verbose=0, warm_start=False),
       fit_params=None, iid=True, n_jobs=-1,
       param_grid={'max_features': ['auto', 'sqrt', 'log2', None], 'alpha': [0.9, 0.5, 0.1, 0.05, 0.01, 0.001, 0.0001], 'criterion': ['mse'], 'max_depth': [2, 4, 6, 8, 10, 12], 'loss': ['ls', 'lad', 'huber', 'quantile']},
       pre_dispatch='2*n_jobs', refit=True, return_train_score='warn',
       scoring=None, verbose=0)

{'alpha': 0.0001,
 'criterion': 'mse',
 'loss': 'ls',
 'max_depth': 4,
 'max_features': 'sqrt'}

Training and Fine-Tune Time: -1492.864000


In [21]:
score_GBR = rmse_CV(best_GBR, X_test, y_test)

RMSE Metric
Scores:  [0.13831081 0.15751812 0.13147632]
Mean: 0.1424350829195781
Standard Deviation:  0.011024247065513146


### Random Forest Regressor

In [22]:
#Random Forest regressor
from sklearn.ensemble import RandomForestRegressor

#Create Regressor
rand_F = RandomForestRegressor()

#Create Hyperparamter Search Space
criterion = ['mse']
max_features = ['auto', 'sqrt', 'log2', None]
max_depth = [2, 4, 6, 8, 10, 12]
n_jobs=[-1]

#Create hyperparameter options
rand_param = dict(criterion=criterion, max_features=max_features, max_depth=max_depth, n_jobs=n_jobs)

#Apply Grid Search To Determine the best model
s_time = time.time()
best_rand_F= GridSearch(rand_F, rand_param, X_train, y_train)
f_time = time.time()
print "Training and Fine-Tune Time: {:3f}".format((s_time - f_time))



GridSearchCV(cv=5, error_score='raise',
       estimator=RandomForestRegressor(bootstrap=True, criterion='mse', max_depth=None,
           max_features='auto', max_leaf_nodes=None,
           min_impurity_decrease=0.0, min_impurity_split=None,
           min_samples_leaf=1, min_samples_split=2,
           min_weight_fraction_leaf=0.0, n_estimators=10, n_jobs=1,
           oob_score=False, random_state=None, verbose=0, warm_start=False),
       fit_params=None, iid=True, n_jobs=-1,
       param_grid={'max_features': ['auto', 'sqrt', 'log2', None], 'n_jobs': [-1], 'criterion': ['mse'], 'max_depth': [2, 4, 6, 8, 10, 12]},
       pre_dispatch='2*n_jobs', refit=True, return_train_score='warn',
       scoring=None, verbose=0)

{'criterion': 'mse', 'max_depth': 10, 'max_features': None, 'n_jobs': -1}

Training and Fine-Tune Time: -17.057000


In [23]:
score_rand = rmse_CV(best_rand_F, X_test, y_test)

RMSE Metric
Scores:  [0.17184903 0.18673306 0.15451401]
Mean: 0.17103203417143517
Standard Deviation:  0.013166050755315364
