In [26]:
from keras import models, layers
from keras import optimizers
from keras import losses, metrics

In [27]:
## load the Boston housing dataset
from keras.datasets import boston_housing
(train_data, train_targets), (test_data, test_targets) = boston_housing.load_data()

In [28]:
## peek at train data shape
print(train_data.shape)
print("---"*20)
print(train_data[0])
print("---"*20)
print(train_targets[:10])

(404, 13)
------------------------------------------------------------
[  1.23247   0.        8.14      0.        0.538     6.142    91.7
   3.9769    4.      307.       21.      396.9      18.72   ]
------------------------------------------------------------
[15.2 42.3 50.  21.1 17.7 18.5 11.3 15.6 15.6 14.4]


In [29]:
## peek at test data shape
print(test_data.shape)
print("---"*20)
print(test_data[0])
print("---"*20)
print(test_targets[:10])

(102, 13)
------------------------------------------------------------
[ 18.0846   0.      18.1      0.       0.679    6.434  100.       1.8347
  24.     666.      20.2     27.25    29.05  ]
------------------------------------------------------------
[ 7.2 18.8 19.  27.  22.2 24.5 31.2 22.9 20.5 23.2]


In [30]:
## normalize the data
## calculate mean for all columns
mean = train_data.mean(axis=0)
mean.shape

(13,)

In [31]:
mean

array([3.74511057e+00, 1.14801980e+01, 1.11044307e+01, 6.18811881e-02,
       5.57355941e-01, 6.26708168e+00, 6.90106436e+01, 3.74027079e+00,
       9.44059406e+00, 4.05898515e+02, 1.84759901e+01, 3.54783168e+02,
       1.27408168e+01])

In [32]:
## subtract mean of each column from the data point
train_data = train_data - mean

In [33]:
# calculate standard deviation along each column and subtract from data point
std = train_data.std(axis=0)
train_data = train_data/std

In [34]:
## normalize the test data using the previously calculated mean/std
test_data = test_data - mean
test_data = test_data/std

In [35]:
## build model
def build_model():
    model = models.Sequential()
    model.add(layers.Dense(64, activation='relu', input_shape=(train_data.shape[1],)))
    model.add(layers.Dense(64, activation='relu'))
    model.add(layers.Dense(1))
    model.compile(optimizer=optimizers.RMSprop(lr=0.001), loss=losses.mse, metrics=[metrics.mae])
    return model

In [None]:
## k-fold validation
import numpy as np

k=4
num_val_samples = len(train_data) // k
num_epochs = 100
all_scores = []

for i in range(k):
    print ('processing fold #', i)
    val_data = train_data[i * num_val_samples: (i+1) * num_val_samples]
    val_targets = train_targets[i * num_val_samples: (i+1) * num_val_samples]
    
    partial_train_data = np.concatenate(
    [train_data[:i*num_val_samples], 
    train_data[(i+1)*num_val_samples:]],
    axis=0)
    
    partial_train_targets = np.concatenate(
    [train_targets[:i*num_val_samples], 
    train_targets[(i+1)*num_val_samples:]],
    axis=0)
    
    model = build_model()
    model.fit(partial_train_data, partial_train_targets, epochs=num_epochs, batch_size=1)
    
    val_mse, val_mae = model.evaluate(val_data, val_targets, verbose=0)
    all_scores.append(val_mae)

In [38]:
all_scores

[2.0581326413862775, 2.2001309324019025, 2.8750618802438868, 2.410775157484678]

In [39]:
np.mean(all_scores)

2.386025152879186