In [1]:
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt

## Loading the Dataset

In [2]:
(train_data, train_target), (test_data, test_target) = tf.keras.datasets.boston_housing.load_data()
print(f"Train Data Size - {train_data.shape}")
print(f"Train Label Size - {train_target.shape}")
print(f"Test Data Size - {test_data.shape}")
print(f"Test Label Size - {test_target.shape}")

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/boston_housing.npz
Train Data Size - (404, 13)
Train Label Size - (404,)
Test Data Size - (102, 13)
Test Label Size - (102,)


In [3]:
train_data[0]

array([  1.23247,   0.     ,   8.14   ,   0.     ,   0.538  ,   6.142  ,
        91.7    ,   3.9769 ,   4.     , 307.     ,  21.     , 396.9    ,
        18.72   ])

In [4]:
test_data[0]

array([ 18.0846,   0.    ,  18.1   ,   0.    ,   0.679 ,   6.434 ,
       100.    ,   1.8347,  24.    , 666.    ,  20.2   ,  27.25  ,
        29.05  ])

In [5]:
test_target[0]

7.2

In [6]:
train_target[0]

15.2

## Normalizing the Dataset

In [7]:
mean = train_data.mean(axis = 0)
train_data -= mean
std = train_data.std(axis = 0)
train_data /= std

test_data -= mean
test_data /= std

In [8]:
train_data[0]

array([-0.27224633, -0.48361547, -0.43576161, -0.25683275, -0.1652266 ,
       -0.1764426 ,  0.81306188,  0.1166983 , -0.62624905, -0.59517003,
        1.14850044,  0.44807713,  0.8252202 ])

## Building the Model

In [9]:
def build_model():
    model = tf.keras.models.Sequential()
    model.add(tf.keras.layers.Dense(units = 64, 
                                    activation = 'relu', 
                                    input_shape = (13, )))
    model.add(tf.keras.layers.Dense(units = 64, 
                                    activation = 'relu'))
    model.add(tf.keras.layers.Dense(units = 1))
    model.compile(optimizer = 'rmsprop',
                  loss = 'mse',
                  metrics = ['mae'])
    return model

## Validating With K-Fold Cross Validation

In [11]:
k = 4
num_val_samples = len(train_data) // k
num_epochs = 100
all_scores = list()

# Prepare the validation data
for i in range(k):
    print(f"Processing Fold: {i+1}")
    val_data = train_data[i * num_val_samples: (i + 1) * num_val_samples]
    val_targets = train_target[i * num_val_samples: (i + 1) * num_val_samples]
    
    partial_train_data = np.concatenate(
            [train_data[:i * num_val_samples],
            train_data[(i + 1) * num_val_samples:]],
            axis = 0)
    partial_train_target = np.concatenate(
            [train_target[:i * num_val_samples],
            train_target[(i + 1) * num_val_samples:]],
            axis = 0)
    model = build_model()
    model.fit(partial_train_data, 
              partial_train_target,
              epochs = num_epochs,
              batch_size = 1,
              verbose = 0)
    val_mse, val_mae = model.evaluate(val_data, val_targets, verbose=0)
    all_scores.append(val_mae)
    
print(f"Mean of Scores: \n {np.mean(all_scores)}")

Processing Fold: 1
Processing Fold: 2
Processing Fold: 3
Processing Fold: 4
Mean of Scores: 
 2.4960142970085144


In [12]:
print(f"All Scores: \n{all_scores}")

All Scores: 
[2.3173232078552246, 2.704639196395874, 2.487928867340088, 2.474165916442871]
