# <font color=darkgreen> Building a Deep Neural Network for the Boston Housing Price dataset-Worksheet-v2.0 </font>

Details found at: https://www.cs.toronto.edu/~delve/data/boston/bostonDetail.html

This dataset is relatively smaller than the previous datasets with only 506 data points (404 for training and 102 for testing).
Each data point has a set of 13 features.

In [1]:
import os
import sys
import numpy as np
import tensorflow as tf
import tensorflow.keras as keras
import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
from tensorflow.keras.datasets import boston_housing
(train_data, train_labels), (test_data, test_labels) = boston_housing.load_data()

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/boston_housing.npz


In [3]:
print(train_data.shape)
print(test_data.shape)

(404, 13)
(102, 13)


In [4]:
# View the training labels
# Prices are in 10,000s
print(train_labels[1:5])

[42.3 50.  21.1 17.7]


In [5]:
# print one sample data
print(train_data[0])
print(len(train_data[0]))

[  1.23247   0.        8.14      0.        0.538     6.142    91.7
   3.9769    4.      307.       21.      396.9      18.72   ]
13


In [6]:
# Normalize the data by subtracting the mean from each data point and
# dividing by the standard deviation of the data

mean = train_data.mean(axis=0) # since we want the mean for each feature column
print('Mean =', mean)
train_data -= mean

std_dev = train_data.std(axis=0)
print('Std Dev = ', std_dev)
train_data /= std_dev

# Likewise prepare the test data (pre-processing)
test_data -= mean
test_data /= std_dev

Mean = [3.74511057e+00 1.14801980e+01 1.11044307e+01 6.18811881e-02
 5.57355941e-01 6.26708168e+00 6.90106436e+01 3.74027079e+00
 9.44059406e+00 4.05898515e+02 1.84759901e+01 3.54783168e+02
 1.27408168e+01]
Std Dev =  [9.22929073e+00 2.37382770e+01 6.80287253e+00 2.40939633e-01
 1.17147847e-01 7.08908627e-01 2.79060634e+01 2.02770050e+00
 8.68758849e+00 1.66168506e+02 2.19765689e+00 9.39946015e+01
 7.24556085e+00]


In [7]:
model = keras.Sequential([ 
        tf.keras.layers.Dense(64, activation='relu'),
        tf.keras.layers.Dense(64, activation='relu'),
        tf.keras.layers.Dense(1)
    ])
model.compile(optimizer='rmsprop', loss='mse', metrics=['mae']) # observe the loss and metrics

2022-02-26 08:50:14.688469: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-02-26 08:50:14.807253: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-02-26 08:50:14.808041: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-02-26 08:50:14.809718: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compil

In [8]:
model.fit(train_data, train_labels, epochs=30, batch_size=16)

2022-02-26 08:50:17.083232: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:185] None of the MLIR Optimization Passes are enabled (registered 2)


Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


<keras.callbacks.History at 0x7faa75daa190>

In [9]:
[mse, mae] = model.evaluate(test_data, test_labels) 



In [10]:
# We are off by 1.12 units i.e. the predicted price is lesser than $11200
ypred = model.predict(test_data)
print('Actual Price = {} and Predicted Price = {}'.format(test_labels[1], ypred[1]))

Actual Price = 18.8 and Predicted Price = [18.39285]


### Understanding K-fold validation

In [11]:
# We create a fucntion to make it easy for multiple calls
def build_model():   
    model = keras.Sequential([ 
        tf.keras.layers.Dense(64, activation='relu'),
        tf.keras.layers.Dense(64, activation='relu'),
        tf.keras.layers.Dense(1)
    ])
    model.compile(optimizer='rmsprop', loss='mse', metrics=['mae']) # observe the loss and metrics
    return model

In [12]:
# Let us visit K-fold validation
k = 4
num_val_samples = len(train_data) // k
num_epochs = 100
all_scores = []
for i in range(k):
    print('processing fold #%d' % i)
    val_data = train_data[i * num_val_samples: (i + 1) * num_val_samples] 
    val_labels = train_labels[i * num_val_samples: (i + 1) * num_val_samples]
    partial_train_data = np.concatenate( 
        [train_data[:i * num_val_samples],
         train_data[(i + 1) * num_val_samples:]],
        axis=0)
    partial_train_labels = np.concatenate(
        [train_labels[:i * num_val_samples],
         train_labels[(i + 1) * num_val_samples:]],
        axis=0)
    
model = build_model() 
model.fit(partial_train_data, partial_train_labels, 
          epochs=num_epochs, batch_size=1, verbose=0)
val_mse, val_mae = model.evaluate(val_data, val_labels, verbose=0) 
all_scores.append(val_mae)

processing fold #0
processing fold #1
processing fold #2
processing fold #3


In [13]:
print(all_scores)
print(np.mean(all_scores))

[2.5299148559570312]
2.5299148559570312


In [14]:
model = build_model() 
model.fit(train_data, train_labels, epochs=80, batch_size=16, verbose=0)
test_mse_score, test_mae_score = model.evaluate(test_data, test_labels)
print(test_mse_score, test_mae_score)

16.628219604492188 2.6874189376831055
