We can use scikit-learn’s Pipeline to perform the standardization during the
model evaluation process, within each fold of the cross validation. This ensures that there is
no data leakage from each testset cross validation fold into the training data. The code below
creates a scikit-learn Pipeline that first standardizes the dataset then creates and evaluates
the baseline neural network model.

In [69]:
import numpy
import pandas
from keras.models import Sequential
from keras.layers import Dense
from keras.wrappers.scikit_learn import KerasRegressor
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import KFold
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline

In [70]:
dataframe= pandas.read_csv("housing.csv",delim_whitespace=True,header=None)
dataset=dataframe.values
X=dataset[:,0:13]
Y=dataset[:,13]


In [71]:
#define model
def baseline_model():
    # create model
    model = Sequential()
    model.add(Dense(13, input_dim=13, init= 'normal' , activation=  'relu' ))
    model.add(Dense(1, init= 'normal' ))
    # Compile model
    model.compile(loss= 'mean_squared_error', optimizer= 'adam' )
    return model

In [72]:
seed=7
numpy.random.seed(seed)

In [73]:
#evaluate model
estimators = []
estimators.append(('standardize',StandardScaler()))
regressor = KerasRegressor(build_fn=baseline_model, nb_epoch=50, batch_size=5, verbose=0)
estimators.append(('mlp',regressor))
pipeline=Pipeline(estimators)

In [74]:
kfold = KFold(n_splits=10,random_state=seed)

In [75]:
#results: estimator (i.e., model) + input + model + kfold
results= cross_val_score(pipeline,X,Y,cv=kfold)

In [76]:
print (results.mean())
print (results.std())

29.0860718637
24.041598862
