In [1]:
import tensorflow as tf
import numpy as np

from tensorflow import keras
from keras.datasets import boston_housing


#------------------- def methods -----------------------#


  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
Using TensorFlow backend.


In [2]:
# load data and analyze the type, rank and dimensionality for each rank 
(train_data, train_labels), (test_data, test_labels) = boston_housing.load_data()

print('Data types')
print('Train data and labels')
print(type(train_data))
print(type(train_labels))
print('\nTest data and labels')
print(type(test_data))
print(type(test_labels))

print('\nRank and dimensionality for each axis')
print('- train data:')
print(train_data.ndim)
print(train_data.shape)

print('- train labels:')
print(train_labels.ndim)
print(train_labels.shape)


print('\n- test data')
print(test_data.ndim)
print(test_data.shape)

print('- test labels:')
print(test_labels.ndim)
print(test_labels.shape)

Data types
Train data and labels
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>

Test data and labels
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>

Rank and dimensionality for each axis
- train data:
2
(404, 13)
- train labels:
1
(404,)

- test data
2
(102, 13)
- test labels:
1
(102,)


In [3]:
# having few data in training data involves that validation should be applied in a different way than done
# in binary and multiclassification, for instance using k-fold validation. Moreover the neural network shouldn't be so complex

In [4]:
print('Train data content')
print(train_data[0], ' - label: ', train_labels[0])

print('\nTest data content')
print(test_data[0], ' - label: ', test_labels[0])

Train data content
[  1.23247   0.        8.14      0.        0.538     6.142    91.7
   3.9769    4.      307.       21.      396.9      18.72   ]  - label:  15.2

Test data content
[ 18.0846   0.      18.1      0.       0.679    6.434  100.       1.8347
  24.     666.      20.2     27.25    29.05  ]  - label:  7.2


In [5]:
# Data is in a good format (tensor) so we don't need to apply transformations
# However we need to apply feature normalization because the values printed above are expressed on different numerical scales
# train_data[0][2] << train_data[0][11]

mean = train_data.mean(axis=0)
train_data -= mean
std = train_data.std(axis=0)
train_data /= std

test_data -= mean
test_data /= std

In [6]:
# We print again the data modified

print('Train data content')
print(train_data[0], ' - label: ', train_labels[0])

print('\nTest data content')
print(test_data[0], ' - label: ', test_labels[0])

Train data content
[-0.27224633 -0.48361547 -0.43576161 -0.25683275 -0.1652266  -0.1764426
  0.81306188  0.1166983  -0.62624905 -0.59517003  1.14850044  0.44807713
  0.8252202 ]  - label:  15.2

Test data content
[ 1.55369355 -0.48361547  1.0283258  -0.25683275  1.03838067  0.23545815
  1.11048828 -0.93976936  1.67588577  1.5652875   0.78447637 -3.48459553
  2.25092074]  - label:  7.2


In [7]:
# we an build our NN considering the problem we are going to face

# model definition 
from keras import models
from keras import layers

model = models.Sequential()
model.add(layers.Dense(64, activation='relu', input_shape=(13,))) # 13 features, see the x dimensionality for the training_data tensor 
model.add(layers.Dense(64, activation='relu'))
model.add(layers.Dense(1))

from keras import optimizers
          
model.compile(optimizer='rmsprop',
              loss='mse', 
              metrics=['mae']) 

Instructions for updating:
Colocations handled automatically by placer.


In [27]:
# Validation should be applied using k-fold approach because the data we could include in validation couldn't be
# generalize the data 
# ...

num_epochs = 20
k = 3 # Number of folds
dataPerFold = train_data.shape[0] // k
print("Data per fold: ", dataPerFold)
for tmpK in range(k): # 0 -> 2
    print("# ---------- #")
    print("Current fold: ", tmpK)
    if (tmpK < (k - 1)):
        lastIndex = (tmpK+1) * dataPerFold
    else:
        lastIndex = len(train_data)
    tmpValidationSet = train_data[tmpK * dataPerFold : lastIndex]
    tmpValidationLabels = train_labels[tmpK * dataPerFold : lastIndex]
    print("First index val set: ", tmpK * dataPerFold, "   Last index val set: ", lastIndex)
    #tmpTrainData doesn't contain the records included in range [k * dataPerFold, (k+1) * dataPerFold] or [k * dataPerFold, len(train_data)]
    #Now I create the temporary train_data and temporary train_labels
    tmpTrainData = np.zeros((train_data.shape[0] - (lastIndex - (tmpK * dataPerFold)), train_data.shape[1]))
    tmpTrainLabels = np.zeros((train_data.shape[0] - (lastIndex - (tmpK * dataPerFold))))
    print("Dim tmpTrainData: x -> ", tmpTrainData.shape[0], "   y -> ", tmpTrainData.shape[1])
    print("Dim tmpTrainLabels: y -> ", tmpTrainLabels.shape[0])
    for tmpIndex in range(tmpTrainData.shape[0]):
        for i in range(train_data.shape[0]):   
            if ((i < (k * dataPerFold)) or (i >= lastIndex)): 
                # train_data[i] added to tmpTrainData
                tmpTrainData[tmpIndex] = train_data[i]
                tmpTrainLabels[tmpIndex] = train_labels[i]
    '''
    # tmpTrainData tmpTrainLabels tmpValidationSet tmpValidationLabels
    # Here I use tmpTrainData and tmpTrainLabels for fitting and collecting the errors values
    tmpModel = createModel() # TO DO
    fitStatistics = tmpModel.fit() # TO DO
    # array of array
    all_mae_histories.append(fitStatistics.history['val_mean_absolute_error']) # == [epoch1 = n, epoch2 = m, ..., epoch20 = z]
    
    
# compute the mean value for each epoch in each fold
average_mae_history = [np.mean([x[i] for x in all_mae_histories]) for i in range(num_epochs)]
# TO DO -> plot (epochs, all_mae_histories)
'''

Data per fold:  134
# ---------- #
Current fold:  0
First index val set:  0    Last index val set:  134
Dim tmpTrainData: x ->  270    y ->  13
Dim tmpTrainLabels: y ->  270
# ---------- #
Current fold:  1
First index val set:  134    Last index val set:  268
Dim tmpTrainData: x ->  270    y ->  13
Dim tmpTrainLabels: y ->  270
# ---------- #
Current fold:  2
First index val set:  268    Last index val set:  404
Dim tmpTrainData: x ->  268    y ->  13
Dim tmpTrainLabels: y ->  268


In [9]:
fitStatistics = model.fit(train_data, train_labels, epochs=20, batch_size=16)

Instructions for updating:
Use tf.cast instead.
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [10]:
print(model.evaluate(test_data, test_labels))

[21.39389741187002, 3.1629059314727783]
