# Housing Prices in King County, WA: Artificial Neural Network
Goal
- Use Multi-layer Perceptron Neural Networks to create a model that predicts the sale price of homes given various attributes about the house

## Obtain Data

In [3]:
# global imports

# sklearn features
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import RandomizedSearchCV
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_squared_error
from sklearn.neural_network import MLPRegressor

In [2]:
# import dataframes 
%store -r dfs

# assign dataframes to variables
X_train = dfs[0]
X_val = dfs[1]
X_test = dfs[2]
y_train = dfs[3]
y_val = dfs[4]
y_test = dfs[5] 

# check importing data frames worked
df = [X_train, X_val, X_test, y_train, y_val, y_test]
for d in df:
    print(d.shape)

(3181, 18)
(682, 18)
(682, 18)
(3181, 1)
(682, 1)
(682, 1)


## Train Artificial Neural Network Model

In [4]:
mlp_model = MLPRegressor()

# function that evaluates the model
def evaluate(model, test_pred, test_resp):
    # predictions
    predict = model.predict(test_pred)
    # metrics on test data
    test_mae = mean_absolute_error(test_resp,  predict)
    test_mse = mean_squared_error(test_resp,  predict)
    # print results
    print("Mean Absolute Error: %s" %test_mae)
    print("Mean Squared Error: %s" %test_mse)

In [14]:
# evaluate the base model
base_model = MLPRegressor(random_state = 123, max_iter = 4000)
base_model.fit(X_train, y_train.values.ravel())
evaluate(base_model, X_val, y_val.values.ravel())

Mean Absolute Error: 165460.1885504284
Mean Squared Error: 59299792004.01469




In [7]:
# random search grid
random_grid = {
    'hidden_layer_sizes': [50, 100, 200, 300],
    'activation': ['identity', 'logistic', 'tanh', 'relu'],
    'solver': ['lbfgs', 'sgd', 'adam'],
    'alpha': [0.0001, 0.001, 0.01],
    'learning_rate': ['constant', 'invscaling', 'adaptive']}

# fit random search
mlp_random = RandomizedSearchCV(estimator = mlp_model, param_distributions = random_grid, n_iter = 
                               100, cv = 3, scoring = 'neg_mean_squared_error')
mlp_random.fit(X_train, y_train.values.ravel())

# output best parameters from random search
mlp_random.best_params_

  return ((y_true - y_pred) ** 2).mean() / 2


ValueError: Input contains NaN, infinity or a value too large for dtype('float64').

In [None]:
# evaluate best random search model
best_random = mlp_random.best_estimator_
evaluate(best_random, X_val, y_val.values.ravel())

In [None]:
# grid search parameters
params_grid = {
    'bootstrap': [False],
    'max_depth': [65, 75, 85],
    'max_features': ['sqrt'],
    'min_samples_leaf': [2,3],
    'min_samples_split': [3, 5, 7],
    'n_estimators': [1800, 2000, 2200]}

# fit grid search
mlp_grid = GridSearchCV(estimator = mlp_model, param_grid = params_grid, cv = 3, scoring = 
                       'neg_mean_squared_error')
mlp_grid.fit(X_train, y_train.values.ravel())

# output best parameters from grid search
mlp_grid.best_params_

In [None]:
# evaluate best grid search model
best_grid = mlp_grid.best_estimator_
evaluate(best_grid, X_val, y_val.values.ravel())

## Final Artificial Neural Network Model

In [None]:
# check model performance on the test dataset
final_model = MLPRegressor(n_estimators = 1800, min_samples_split = 5, min_samples_leaf = 
                                    2, max_features = 'sqrt', bootstrap = False, max_depth = 75)
final_model.fit(X_train, y_train.values.ravel())
evaluate(final_model, X_test, y_test.values.ravel())