In [2]:
import pandas as pd
from sklearn import ensemble
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import mean_absolute_error

In [3]:
# Load Data
df = pd.read_csv("ml_house_data_set.csv")

In [4]:
# Removing insignificant features
del df['house_number']
del df['unit_number']
del df['street_name']
del df['zip_code']

In [9]:
# Replacing categorical data with one-hot encoded data
features_df = pd.get_dummies(df, columns = ['garage_type', 'city'])

In [11]:
del features_df['sale_price']

In [13]:
# Converting features into matrix / Array form 
X = features_df.values
y = df['sale_price'].values

In [14]:
# Splitting data into training set (70%) and test data (30%)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3, random_state = 0)

In [15]:
# Create a model
model = ensemble.GradientBoostingRegressor()

In [20]:
model

GradientBoostingRegressor()

In [16]:
# Parameters of the model 
param_grid = {
    'n_estimators': [500,1000,3000],
    'max_depth': [4,6],
    'min_samples_leaf': [3,5,9,17],
    'learning rate': [0.1, 0.05, 0.02, 0.01],
    'max_features': [1.0, 0.3, 0.1],
    'loss': ['ls', 'lad', 'huber'] 
}

In [21]:
param_grid

{'n_estimators': [500, 1000, 3000],
 'max_depth': [4, 6],
 'min_samples_leaf': [3, 5, 9, 17],
 'learning rate': [0.1, 0.05, 0.02, 0.01],
 'max_features': [1.0, 0.3, 0.1],
 'loss': ['ls', 'lad', 'huber']}

In [22]:
# Define grid search and run 4 jobs
gs_cv = GridSearchCV(model, param_grid, n_jobs = 4)

In [23]:
gs_cv

GridSearchCV(estimator=GradientBoostingRegressor(), n_jobs=4,
             param_grid={'learning rate': [0.1, 0.05, 0.02, 0.01],
                         'loss': ['ls', 'lad', 'huber'], 'max_depth': [4, 6],
                         'max_features': [1.0, 0.3, 0.1],
                         'min_samples_leaf': [3, 5, 9, 17],
                         'n_estimators': [500, 1000, 3000]})

In [24]:
# Running Grid Search only on training data
gs_cv.fit(X_train, y_train)

ValueError: Invalid parameter learning rate for estimator GradientBoostingRegressor(). Check the list of available parameters with `estimator.get_params().keys()`.