In [1]:
import numpy
import pandas
from keras.models import Sequential
from keras.layers import Dense
from keras.wrappers.scikit_learn import KerasRegressor
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import KFold
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline

Using TensorFlow backend.


In [2]:
# load dataset
dataframe = pandas.read_csv("housing.csv", delim_whitespace=True, header=None)
dataset = dataframe.values

# split into input (X) and output (Y) variables
X = dataset[:,0:13]
Y = dataset[:,13]

In [5]:
(X[:3], Y[:3])

(array([[  6.32000000e-03,   1.80000000e+01,   2.31000000e+00,
           0.00000000e+00,   5.38000000e-01,   6.57500000e+00,
           6.52000000e+01,   4.09000000e+00,   1.00000000e+00,
           2.96000000e+02,   1.53000000e+01,   3.96900000e+02,
           4.98000000e+00],
        [  2.73100000e-02,   0.00000000e+00,   7.07000000e+00,
           0.00000000e+00,   4.69000000e-01,   6.42100000e+00,
           7.89000000e+01,   4.96710000e+00,   2.00000000e+00,
           2.42000000e+02,   1.78000000e+01,   3.96900000e+02,
           9.14000000e+00],
        [  2.72900000e-02,   0.00000000e+00,   7.07000000e+00,
           0.00000000e+00,   4.69000000e-01,   7.18500000e+00,
           6.11000000e+01,   4.96710000e+00,   2.00000000e+00,
           2.42000000e+02,   1.78000000e+01,   3.92830000e+02,
           4.03000000e+00]]), array([ 24. ,  21.6,  34.7]))

In [6]:
# define base mode
def baseline_model():
    # create model
    model = Sequential()
    model.add(Dense(13, input_dim=13, init='normal', activation='relu'))
    model.add(Dense(1, init='normal'))
    
    # compile model
    model.compile(loss='mean_squared_error', optimizer='adam')
    return model

In [7]:
# fix random seed for reproducibility
seed = 7
numpy.random.seed(seed)

# evaluate model with standardized dataset
estimator = KerasRegressor(build_fn=baseline_model, nb_epoch=100, batch_size=5, verbose=0)

In [8]:
kfold = KFold(n_splits=10, random_state=seed)
results = cross_val_score(estimator, X, Y, cv=kfold)
print("Results: %.2f (%.2f) MSE" % (results.mean(), results.std()))

Results: 32.92 (28.02) MSE


In [9]:
# evaluate model with standardized dataset
estimators = []
estimators.append(('standardize', StandardScaler()))
estimators.append(('mlp', KerasRegressor(build_fn=baseline_model, nb_epoch=50, batch_size=5, verbose=0)))
pipeline = Pipeline(estimators)
kfold = KFold(n_splits=10, random_state=seed)
results = cross_val_score(pipeline, X, Y, cv=kfold)
print("Standardized: %.2f (%.2f) MSE" % (results.mean(), results.std()))

Standardized: 29.10 (28.26) MSE


In [10]:
def larger_model():
    # create model
    model = Sequential()
    model.add(Dense(13, input_dim=13, init='normal', activation='relu'))
    model.add(Dense(6, init='normal', activation='relu'))
    model.add(Dense(1, init='normal'))
    
    # compile model
    model.compile(loss='mean_squared_error', optimizer='adam')
    return model

In [12]:
estimators = []
estimators.append(('standardize', StandardScaler()))
estimators.append(('mlp', KerasRegressor(build_fn=larger_model, nb_epoch=50, batch_size=5, verbose=0)))
pipeline = Pipeline(estimators)
kfold = KFold(n_splits=10, random_state=seed)
results = cross_val_score(pipeline, X, Y, cv=kfold)
print("Larger: %.2f (%.2f) MSE" % (results.mean(), results.std()))

Larger: 23.22 (24.83) MSE


In [13]:
def wider_model():
    # create model
    model = Sequential()
    model.add(Dense(20, input_dim=13, init='normal', activation='relu'))
    model.add(Dense(1, init='normal'))
    
    # compile model
    model.compile(loss='mean_squared_error', optimizer='adam')
    return model

In [14]:
estimators = []
estimators.append(('standardize', StandardScaler()))
estimators.append(('mlp', KerasRegressor(build_fn=wider_model, nb_epoch=100, batch_size=5, verbose=0)))
pipeline = Pipeline(estimators)
kfold = KFold(n_splits=10, random_state=seed)
results = cross_val_score(pipeline, X, Y, cv=kfold)
print("Wider: %.2f (%.2f) MSE" % (results.mean(), results.std()))

Wider: 21.89 (21.49) MSE


In [15]:
def wider_larger_model():
    # create model
    model = Sequential()
    model.add(Dense(20, input_dim=13, init='normal', activation='relu'))
    model.add(Dense(10, init='normal', activation='relu'))
    model.add(Dense(1, init='normal'))
    
    # compile model
    model.compile(loss='mean_squared_error', optimizer='adam')
    return model

In [16]:
estimators = []
estimators.append(('standardize', StandardScaler()))
estimators.append(('mlp', KerasRegressor(build_fn=wider_larger_model, nb_epoch=100, batch_size=5, verbose=0)))
pipeline = Pipeline(estimators)
kfold = KFold(n_splits=10, random_state=seed)
results = cross_val_score(pipeline, X, Y, cv=kfold)
print("Wider and Larger: %.2f (%.2f) MSE" % (results.mean(), results.std()))

Wider and Larger: 21.53 (23.68) MSE
