In [1]:
# Note : This project is very similar to the handwritten digits recognition notebook.
# It is used to compare the performance of models with different hidden layers.

In [2]:
# Networks implemented:
# (i) MLP with single input layout, single output layer.
# (ii) Add a single hidden layer to previous network
# (iii) Add two hidden layers to previous network.

In [7]:
import tensorflow as tf
import matplotlib.pyplot as plt
import numpy as np
from sklearn.preprocessing import MinMaxScaler

In [5]:
# Load the data using keras.
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.boston_housing.load_data()

print("x_train shape : {}\ny_train shape : {}\nx_test shape : {}\ny_test shape : {}\n"
      .format(x_train.shape, y_train.shape, x_test.shape, y_test.shape))

print("x_train : {}".format(x_train[0]))
print("y_train : {}".format(y_train[0]))

x_train shape : (404, 13)
y_train shape : (404,)
x_test shape : (102, 13)
y_test shape : (102,)

x_train : [  1.23247   0.        8.14      0.        0.538     6.142    91.7
   3.9769    4.      307.       21.      396.9      18.72   ]
y_train : 15.2


In [9]:
# Basic data processing, to make values in range of 0..1 to make it easier for network to process values.
mms = MinMaxScaler()
mms.fit(x_train)
x_train = mms.transform(x_train)
x_test = mms.transform(x_test)

In [11]:
num_input_layers = len(x_train[0])
print("num input layes : ", num_input_layers)

num input layes :  13


In [30]:
# Create the network using keras.

# Network one : Model with no hidden layers.
network = tf.keras.Sequential([
     tf.keras.layers.Dense(10, input_dim = num_input_layers,activation='relu'),  # Input layer with num_input_layers neurons
     tf.keras.layers.Dense(1, activation='linear')  # Output layer with 10 neurons 
])

network.summary()

# Brief explanation of the network : 
# Sequential groups a linear stack of layers into a tk.keras.Model.
# The network for input takes in number_of_inputs = num_input_layers.
# Next, when we do tf.keras.layers.Dense, we mean:
# We create a dense (fully connected) layer with 1 units and a relu activiation function.

# Compile the network.
network.compile(optimizer='rmsprop', loss='mse', metrics=['mae'])
network.fit(x_train, y_train, epochs=10)

Model: "sequential_9"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_14 (Dense)            (None, 10)                140       
                                                                 
 dense_15 (Dense)            (None, 1)                 11        
                                                                 
Total params: 151 (604.00 Byte)
Trainable params: 151 (604.00 Byte)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.src.callbacks.History at 0x2838f6548d0>

In [31]:
# Evaluation performance of network.
test_loss, test_acc = network.evaluate(x_test, y_test)
print('MAE : {}'.format(test_acc * 100.0))
print('Loss : {}'.format(test_loss))

MAE : 2026.1079788208008
Loss : 498.1654052734375


In [33]:
# Create the network using keras.

# Network two : Model with one hidden layers.
network = tf.keras.Sequential([
     tf.keras.layers.Dense(10, input_dim = num_input_layers,activation='relu'),  # Input layer with num_input_layers neurons
     tf.keras.layers.Dense(100, activation='relu'),
     tf.keras.layers.Dense(1, activation='linear')  # Output layer with 10 neurons 
])

network.summary()

# Brief explanation of the network : 
# Sequential groups a linear stack of layers into a tk.keras.Model.
# The network for input takes in number_of_inputs = num_input_layers.
# Next, when we do tf.keras.layers.Dense, we mean:
# We create a dense (fully connected) layer with 1 units and a relu activiation function.

# Compile the network.
network.compile(optimizer='rmsprop', loss='mse', metrics=['mae'])
network.fit(x_train, y_train, epochs=10)

Model: "sequential_10"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_16 (Dense)            (None, 10)                140       
                                                                 
 dense_17 (Dense)            (None, 100)               1100      
                                                                 
 dense_18 (Dense)            (None, 1)                 101       
                                                                 
Total params: 1341 (5.24 KB)
Trainable params: 1341 (5.24 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.src.callbacks.History at 0x2838f72f310>

In [34]:
# Evaluation performance of network.
test_loss, test_acc = network.evaluate(x_test, y_test)
print('MAE : {}'.format(test_acc * 100.0))
print('Loss : {}'.format(test_loss))

MAE : 995.799446105957
Loss : 162.3751678466797


In [35]:
# Create the network using keras.

# Network three : Model with 2 hidden layers.
network = tf.keras.Sequential([
     tf.keras.layers.Dense(10, input_dim = num_input_layers,activation='relu'),  # Input layer with num_input_layers neurons
     tf.keras.layers.Dense(100, activation='relu'),
     tf.keras.layers.Dense(25, activation='relu'),
     tf.keras.layers.Dense(1, activation='linear')  # Output layer with 10 neurons 
])

network.summary()

# Compile the network.
network.compile(optimizer='rmsprop', loss='mse', metrics=['mae'])
network.fit(x_train, y_train, epochs=10)

Model: "sequential_11"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_19 (Dense)            (None, 10)                140       
                                                                 
 dense_20 (Dense)            (None, 100)               1100      
                                                                 
 dense_21 (Dense)            (None, 25)                2525      
                                                                 
 dense_22 (Dense)            (None, 1)                 26        
                                                                 
Total params: 3791 (14.81 KB)
Trainable params: 3791 (14.81 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.src.callbacks.History at 0x2839064e450>

In [36]:
# Evaluation performance of network.
test_loss, test_acc = network.evaluate(x_test, y_test)
print('MAE : {}'.format(test_acc * 100.0))
print('Loss : {}'.format(test_loss))

MAE : 782.830810546875
Loss : 109.02396392822266


Analysis:

Model one   : MAE = 2026.1079, Loss : 498.165 \
Model two   : MAE = 995.7994, Loss : 162.375 \
Model three : Accuracy = 782.83, Loss : 109.023

From the data above, we can see that adding more layers here got us higher accuracy (and) a lower loss function (as seen in comparison of metrics of Model two and Model three).
However, as seen from the handwritten_digits_recognition notebook, this depends on a case by case basis, and is not the solution always (adding more layers was benefiical here, and in other projects led to poor results compared to fewer hiddne layers.)
 
The first network (no hidden layer with the least number of trainable parameters) as expected performs the worst compared to the other two layers. This implies that having too few trainable parameters is not suitable, as overfitting is very likely to occur, and the network isn't complex enough in this case.