## Practical 5A

## K fold cross validation on Boston Housing Data

In [1]:
import numpy as np
import cv2
from keras import models
from keras import layers
from keras import optimizers
from keras import losses
from keras import metrics
from keras.utils.np_utils import to_categorical
from keras.datasets import boston_housing

In [2]:
# Load the data
(data_train, target_train), (data_test, target_test) = boston_housing.load_data()

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/boston_housing.npz


In [4]:
# View one sample from the features
print(data_train[0], target_train[0])

[  1.23247   0.        8.14      0.        0.538     6.142    91.7
   3.9769    4.      307.       21.      396.9      18.72   ] 15.2


In [12]:
def build_network():
    model = models.Sequential()
    model.add(layers.Dense(64, activation='relu', input_shape=(data_train.shape[1], )))
    model.add(layers.Dense(64, activation='relu'))
    model.add(layers.Dense(1))
    model.compile(optimizer='rmsprop', loss='mse', metrics=['mae']) # mse = mean squared error, mae = mean absolute error
    # Whenever we are applying regression, we use the mean squared and mean absolute error
    return model

In [7]:
# Data preprocessing for feeding into the neural network
# Types:
# Standardization, Normalization, Binarization or One hot encoding
# We will apply standardization in this case
# In it, first, we use mean removal so that all data becomes centered, then we do scaling, subtract standard deviation
mean = data_train.mean(axis=0)
data_train -= mean
std = data_train.std(axis=0)
data_train /= std

data_test -= mean
data_test /= std

K fold cross validation:
https://towardsdatascience.com/why-and-how-to-cross-validate-a-model-d6424b45261f

In [8]:
# Multifold validation when data is limited (K fold cross validation)
k = 4
val_samples_num = len(data_train) // k
num_epochs = 100
scores_all = []

In [13]:
for i in range(k):
    val_data = data_train[i*val_samples_num: (i+1)*val_samples_num]
    val_targets = target_train[i*val_samples_num: (i+1)*val_samples_num]
    part_data_train = np.concatenate([data_train[:i*val_samples_num], data_train[(i+1)*val_samples_num:]], axis=0)
    partial_train_targets = np.concatenate([target_train[:i*val_samples_num], target_train[(i+1)*val_samples_num:]], axis=0)
    model = build_network()
    model.fit(part_data_train, partial_train_targets, epochs=num_epochs, batch_size=1, verbose=0)
    val_mse, val_mae = model.evaluate(val_data, val_targets, verbose=0)
    scores_all.append(val_mae)

In [15]:
# Printing the scores from each folds
print(scores_all)

[2.2645530700683594, 2.8130197525024414, 2.6335978507995605, 2.4360902309417725]


In [16]:
# Printing mean of all the folds
print(np.mean(scores_all))

2.5368152260780334
