# Introduction
<hr style="border:2px solid black"> </hr>


**What?** Regression of Boston house prices



# Import modules
<hr style="border:2px solid black"> </hr>

In [1]:
import numpy
import pandas
from keras.models import Sequential
from keras.layers import Dense
from keras.wrappers.scikit_learn import KerasRegressor
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import KFold
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline

### Develop a baseline NN model

In [7]:
# load dataset
"""
The problem that we will look at in this tutorial is the Boston house price dataset. The dataset describes 
properties of houses in Boston suburbs and is concerned with modeling the price of houses 
"""
dataframe = pandas.read_csv("../DATASETS/housingBoston.csv", delim_whitespace=True, header=None)
dataset = dataframe.values
# split into input (X) and output (Y) variables
X = dataset[:,0:13]
Y = dataset[:,13]


# define base model
def baseline_model():
    # create model
    model = Sequential()
    model.add(Dense(13, input_dim=13, kernel_initializer = "normal" , activation = "relu" ))
    model.add(Dense(1, kernel_initializer = "normal" ))
    # Compile model
    model.compile(loss = "mean_squared_error" , optimizer = "adam" )
    return model

# fix random seed for reproducibility
seed = 7
numpy.random.seed(seed)

# evaluate model with standardized dataset
estimator = KerasRegressor(build_fn = baseline_model, epochs = 100, batch_size = 5, verbose = 0)
kfold = KFold(n_splits = 10, shuffle = True)
results = cross_val_score(estimator, X, Y, cv=kfold)

# Report result
print("Baseline: %.2f (%.2f) MSE" % (results.mean(), results.std()))

Baseline: -21.06 (6.90) MSE


### Lift performance by standardizing the dataset

In [None]:
"""
An important concern with the Boston house price dataset is that the input attributes all vary in their 
scales because they measure di↵erent quantities. It is almost always good practice to prepare your data 
before modeling it using a neural network model.
"""

In [8]:
# load dataset
dataframe = pandas.read_csv("../DATASETS/housingBoston.csv", delim_whitespace=True, header=None)
dataset = dataframe.values
# split into input (X) and output (Y) variables
X = dataset[:,0:13]
Y = dataset[:,13]


# define base model
def baseline_model():
    # create model
    model = Sequential()
    model.add(Dense(13, input_dim=13, kernel_initializer = "normal" , activation = "relu" ))
    model.add(Dense(1, kernel_initializer = "normal" ))
    # Compile model
    model.compile(loss = "mean_squared_error" , optimizer = "adam" )
    return model

# fix random seed for reproducibility
seed = 7
numpy.random.seed(seed)
# evaluate model with standardized dataset
estimators = []
estimators.append(( "standardize" , StandardScaler()))
estimators.append(( "mlp" , KerasRegressor(build_fn=baseline_model, epochs = 50, batch_size=5, verbose=0)))
pipeline = Pipeline(estimators)

kfold = KFold(n_splits = 10, shuffle = True)
results = cross_val_score(pipeline, X, Y, cv=kfold)
print("Standardized: %.2f (%.2f) MSE" % (results.mean(), results.std()))

Standardized: -31.86 (37.07) MSE


### Evaluate a Deeper Network Topology

In [None]:
"""
One way to improve the performance of a neural network is to add more layers. This might allow the model to extract
and recombine higher order features embedded in the data. 13 inputs -> [13 -> 6] -> 1 output
"""

In [10]:
# load dataset
dataframe = pandas.read_csv(
    "../DATASETS/housingBoston.csv", delim_whitespace=True, header=None)
dataset = dataframe.values
# split into input (X) and output (Y) variables
X = dataset[:, 0:13]
Y = dataset[:, 13]


# define base model
def baseline_model():
    # create model
    model = Sequential()
    model.add(Dense(13, input_dim=13,
              kernel_initializer="normal", activation="relu"))
    model.add(Dense(6, kernel_initializer="normal", activation="relu"))
    model.add(Dense(1, kernel_initializer="normal"))
    # Compile model
    model.compile(loss="mean_squared_error", optimizer="adam")
    return model


# fix random seed for reproducibility
seed = 7
numpy.random.seed(seed)
# evaluate model with standardized dataset
estimators = []
estimators.append(("standardize", StandardScaler()))
estimators.append(("mlp", KerasRegressor(
    build_fn=baseline_model, epochs=50, batch_size=5, verbose=0)))
pipeline = Pipeline(estimators)

kfold = KFold(n_splits=10, shuffle=True)
results = cross_val_score(pipeline, X, Y, cv=kfold)
print("Larger: %.2f (%.2f) MSE" % (results.mean(), results.std()))

Larger: -15.12 (7.90) MSE


### Evaluate a Wider Network Topology

In [None]:
"""
Another approach to increasing the representational capacity of the model is to create a wider network. In 
this section we evaluate the e↵ect of keeping a shallow network architecture and nearly doubling the number 
of neurons in the one hidden layer. 13 inputs -> [20] -> 1 output
"""

In [11]:
# load dataset
dataframe = pandas.read_csv(
    "../DATASETS/housingBoston.csv", delim_whitespace=True, header=None)
dataset = dataframe.values
# split into input (X) and output (Y) variables
X = dataset[:, 0:13]
Y = dataset[:, 13]


# define base model
def baseline_model():
    # create model
    model = Sequential()
    model.add(Dense(20, input_dim=13,
              kernel_initializer="normal", activation="relu"))
    model.add(Dense(1, kernel_initializer="normal"))
    # Compile model
    model.compile(loss="mean_squared_error", optimizer="adam")
    return model


# fix random seed for reproducibility
seed = 7
numpy.random.seed(seed)

# evaluate model with standardized dataset
estimators = []
estimators.append(("standardize", StandardScaler()))
estimators.append(("mlp", KerasRegressor(
    build_fn=baseline_model, epochs=50, batch_size=5, verbose=0)))
pipeline = Pipeline(estimators)

kfold = KFold(n_splits=10, shuffle=True)
results = cross_val_score(pipeline, X, Y, cv=kfold)
print("Wider: %.2f (%.2f) MSE" % (results.mean(), results.std()))

Wider: -17.08 (7.76) MSE


# References
<hr style="border:2px solid black"> </hr>

- https://machinelearningmastery.com/regression-tutorial-keras-deep-learning-library-python/