#### Importing the necessary libraries and packages

In [12]:
import warnings
warnings.filterwarnings('ignore')
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split
import tensorflow as tf

In [31]:
print(tf.__version__)

2.18.0


#### Downloading the dataset

* California Housing dataset

In [2]:
housing = fetch_california_housing()

#### Inspecting the data and it's shape

In [3]:
housing.data, housing.data.shape

(array([[   8.3252    ,   41.        ,    6.98412698, ...,    2.55555556,
           37.88      , -122.23      ],
        [   8.3014    ,   21.        ,    6.23813708, ...,    2.10984183,
           37.86      , -122.22      ],
        [   7.2574    ,   52.        ,    8.28813559, ...,    2.80225989,
           37.85      , -122.24      ],
        ...,
        [   1.7       ,   17.        ,    5.20554273, ...,    2.3256351 ,
           39.43      , -121.22      ],
        [   1.8672    ,   18.        ,    5.32951289, ...,    2.12320917,
           39.43      , -121.32      ],
        [   2.3886    ,   16.        ,    5.25471698, ...,    2.61698113,
           39.37      , -121.24      ]]),
 (20640, 8))

#### Inspecting the data target and it's shape

In [4]:
housing.target, housing.target.shape

(array([4.526, 3.585, 3.521, ..., 0.923, 0.847, 0.894]), (20640,))

#### Splitting the dataset

- Splitting the dataset into training and testing dataset (75:25 percentage split)

In [5]:
X_train_full, X_test, y_train_full, y_test = train_test_split(housing.data, housing.target, random_state=42)

In [6]:
print(X_train_full.shape, y_train_full.shape)
print(X_test.shape, y_test.shape)

(15480, 8) (15480,)
(5160, 8) (5160,)


- Splitting the training dataset into training and validation dataset (75:25 percentage split)

In [7]:
X_train, X_valid, y_train, y_valid = train_test_split(X_train_full, y_train_full, random_state=42)

In [8]:
print(X_train.shape, y_train.shape)
print(X_valid.shape, y_valid.shape)

(11610, 8) (11610,)
(3870, 8) (3870,)


### Type 1 Model : Initializing the model

* Flatten() layer is not used in this example.
* Instead a Normalization layer is used as the first layer and does the same thing as Scikit-Learn's StandardScaler().
* Must be fitted to the training data using it's adapt() methods before calling the model's fit() method

The **Normalization layer** learns the feature means and standard deviations in the training data when you call the adapt() method. Yet when you display the model's summary, these statistics are listed as non trainable. This is because these parameters are not affected by gradient descent.

In [9]:
tf.random.set_seed(42)

In [21]:
norm_layer = tf.keras.layers.Normalization(input_shape=X_train.shape[1:])
model = tf.keras.Sequential([
    norm_layer,
    tf.keras.layers.Dense(50, activation='relu'),
    tf.keras.layers.Dense(50, activation='relu'),
    tf.keras.layers.Dense(50, activation='relu'),
    tf.keras.layers.Dense(1)])

#### Setting the optimizer

In [22]:
optimizer = tf.keras.optimizers.Adam(learning_rate=1e-3)

#### Setting the model's training metrics

In [23]:
model.compile(loss="mse", optimizer=optimizer, metrics=["RootMeanSquaredError"])

#### Adapt the training data to the normalization layer

In [24]:
norm_layer.adapt(X_train)

#### Train the model on training data using specified number of epochs

In [25]:
history = model.fit(X_train, y_train, epochs=20, validation_data=(X_valid, y_valid))

Epoch 1/20
[1m363/363[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - RootMeanSquaredError: 1.1591 - loss: 1.4237 - val_RootMeanSquaredError: 0.9103 - val_loss: 0.8287
Epoch 2/20
[1m363/363[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - RootMeanSquaredError: 0.6435 - loss: 0.4146 - val_RootMeanSquaredError: 0.6084 - val_loss: 0.3701
Epoch 3/20
[1m363/363[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - RootMeanSquaredError: 0.6060 - loss: 0.3674 - val_RootMeanSquaredError: 0.8837 - val_loss: 0.7809
Epoch 4/20
[1m363/363[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - RootMeanSquaredError: 0.5923 - loss: 0.3510 - val_RootMeanSquaredError: 1.1057 - val_loss: 1.2226
Epoch 5/20
[1m363/363[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - RootMeanSquaredError: 0.5820 - loss: 0.3389 - val_RootMeanSquaredError: 1.0742 - val_loss: 1.1540
Epoch 6/20
[1m363/363[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m

#### Evaluate the model's performance on testing data

In [26]:
mse_test, rmse_test = model.evaluate(X_test, y_test)

[1m162/162[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 652us/step - RootMeanSquaredError: 0.5293 - loss: 0.2803


In [27]:
print(mse_test, rmse_test)

0.2853373885154724 0.5341697931289673


#### Predict the output values for the first 3 datapoints

In [28]:
X_new = X_test[:3]
y_pred = model.predict(X_new)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 60ms/step


In [29]:
y_pred

array([[0.45630956],
       [1.4869951 ],
       [5.008497  ]], dtype=float32)

### Type 2 Model : Building a Wide and Deep neural network

* Non sequential neural network is a Wide and Deep Neural Network
* Heng-Tze Cheng et al (2016 paper)
* It connects all or part of the inputs directly to the output layer.
* This architecture makes it possible for the neural network to learn both deep patterns (using the deep path) and simple rules(through the short path)
* A regular MLP forces all the data to flow through the full stack of layers, thus simple patterns in the data may end up being distorted by this sequence of transformations 

#### Defining layers of the model

1. Normalization layer to standardize the inputs
2. Two Dense layers with 30 neurons each using the ReLU activation function
3. Concatenate layer
4. Dense layer with a single neuron for the output layer, without any activation function

In [33]:
normalization_layer = tf.keras.layers.Normalization()
hidden_layer1 = tf.keras.layers.Dense(30, activation='relu')
hidden_layer2 = tf.keras.layers.Dense(30, activation='relu')
concat_layer = tf.keras.layers.Concatenate()
output_layer = tf.keras.layers.Dense(1)

#### Establishing the flow of data in model layers

1. input_ - Input object. It is a specification of the kind of input the model will get, including it's shape and optionally its dtype, which defaults to 32 bits floats. A model may actually have multiple inputs. Input object is just a data specification.
2. Used the Normalization layer just like a function, passing it the Input object. It's called the functional API. No actual data is being processed yet. Only keras is being told how it should connect the layers together.The input and output are both symbolic. Normalized doesn't store any actual data, it's just used to construct the model.
3. concat_layer - concatenate the input and the second hidden layer's output.

In [34]:
input_ = tf.keras.layers.Input(shape=X_train.shape[1:])
normalized = normalization_layer(input_)
hidden1 = hidden_layer1(normalized)
hidden2 = hidden_layer2(hidden1)
concat = concat_layer([normalized, hidden2])
output = output_layer(concat)

#### Creation of Keras model from inputs and outputs

In [38]:
model = tf.keras.Model(inputs=[input_], outputs=[output])

#### Defining the optimizer

In [39]:
optimizer1 = tf.keras.optimizers.Adam(learning_rate=1e-3)

#### Model compilation using model training metrics

In [40]:
model.compile(loss="mse", optimizer=optimizer1, metrics=["RootMeanSquaredError"])

#### Fitting the model

In [41]:
history = model.fit(X_train, y_train, epochs=20, validation_data=(X_valid, y_valid))

Epoch 1/20
[1m363/363[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - RootMeanSquaredError: 98.5840 - loss: 13144.9893 - val_RootMeanSquaredError: 13.7108 - val_loss: 187.9848
Epoch 2/20
[1m363/363[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - RootMeanSquaredError: 2.6266 - loss: 6.9281 - val_RootMeanSquaredError: 12.9485 - val_loss: 167.6648
Epoch 3/20
[1m363/363[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 950us/step - RootMeanSquaredError: 1.9082 - loss: 3.6802 - val_RootMeanSquaredError: 11.5206 - val_loss: 132.7237
Epoch 4/20
[1m363/363[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 882us/step - RootMeanSquaredError: 1.5835 - loss: 2.5444 - val_RootMeanSquaredError: 10.2225 - val_loss: 104.4997
Epoch 5/20
[1m363/363[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 966us/step - RootMeanSquaredError: 1.3602 - loss: 1.8849 - val_RootMeanSquaredError: 8.9722 - val_loss: 80.5004
Epoch 6/20
[1m363/363[0m [32m━━━━━━━━━━━━━━

#### Evaluate the model's performance on testing data

In [42]:
mse_test, rmse_test = model.evaluate(X_test, y_test)

[1m162/162[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 845us/step - RootMeanSquaredError: 1.3568 - loss: 1.8413


In [43]:
print(mse_test, rmse_test)

1.860201120376587 1.3638919591903687


### Type 3 Model : Subset features different paths

* Send a subset of the features through the wide path and a different subset (possibly overlapping) through the deep path

#### Defining layers of the model

In [52]:
input_wide = tf.keras.layers.Input(shape=[5]) # features 0 to 4
input_deep = tf.keras.layers.Input(shape=[6]) # features 2 to 7

norm_layer_wide = tf.keras.layers.Normalization()
norm_layer_deep = tf.keras.layers.Normalization()

norm_wide = norm_layer_wide(input_wide)
norm_deep = norm_layer_deep(input_deep)

hidden1 = tf.keras.layers.Dense(30, activation='relu')(norm_deep)
hidden2 = tf.keras.layers.Dense(30, activation='relu')(hidden1)

concat = tf.keras.layers.concatenate([norm_wide, hidden2])
output = tf.keras.layers.Dense(1)(concat)

model = tf.keras.Model(inputs=[input_wide, input_deep], outputs=[output])

#### Defining the optimizer

In [53]:
optimizer2 = tf.keras.optimizers.Adam(learning_rate=1e-3)

#### Compiling the model with training metrics

In [54]:
model.compile(loss="mse", optimizer=optimizer2, metrics=["RootMeanSquaredError"])

#### Seperating the datasets

In [58]:
X_train_wide, X_train_deep = X_train[:, :5], X_train[:, 2:]
X_valid_wide, X_valid_deep = X_valid[:, :5], X_valid[:, 2:]
X_test_wide, X_test_deep = X_test[:, :5], X_test[:, 2:]
X_new_wide, X_new_deep = X_test_wide[:3], X_test_deep[:3]

#### Training the model

In [56]:
norm_layer_wide.adapt(X_train_wide)
norm_layer_deep.adapt(X_train_deep)

history = model.fit((X_train_wide, X_train_deep), y_train, epochs=20, validation_data=((X_valid_wide, X_valid_deep), y_valid))

Epoch 1/20
[1m363/363[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - RootMeanSquaredError: 1.4704 - loss: 2.2448 - val_RootMeanSquaredError: 1.4442 - val_loss: 2.0857
Epoch 2/20
[1m363/363[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 991us/step - RootMeanSquaredError: 0.7428 - loss: 0.5526 - val_RootMeanSquaredError: 0.8360 - val_loss: 0.6988
Epoch 3/20
[1m363/363[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 905us/step - RootMeanSquaredError: 0.6676 - loss: 0.4461 - val_RootMeanSquaredError: 0.6239 - val_loss: 0.3893
Epoch 4/20
[1m363/363[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 958us/step - RootMeanSquaredError: 0.6452 - loss: 0.4165 - val_RootMeanSquaredError: 0.6018 - val_loss: 0.3621
Epoch 5/20
[1m363/363[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - RootMeanSquaredError: 0.6309 - loss: 0.3983 - val_RootMeanSquaredError: 0.5956 - val_loss: 0.3547
Epoch 6/20
[1m363/363[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0

#### Evaluating the model against test dataset

In [59]:
mse_test = model.evaluate((X_test_wide, X_test_deep), y_test)
y_pred = model.predict((X_new_wide, X_new_deep))

[1m162/162[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 574us/step - RootMeanSquaredError: 0.5748 - loss: 0.3305
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 44ms/step


### Type 4 Model : Seperate Outputs

* Extra output is quite easy: connect it to the appropriate layer and add it to the model's list of outputs

#### Defining the 2 model outputs in output layer

In [71]:
output = tf.keras.layers.Dense(1)(concat)
aux_output = tf.keras.layers.Dense(1)(hidden2)

model = tf.keras.Model(inputs=[input_wide, input_deep], outputs=[output, aux_output])

#### Defining the optimizer

In [72]:
optimizer3 = tf.keras.optimizers.Adam(learning_rate=1e-3)

#### Define the model compilation with training parameters

In [73]:
model.compile(loss=("mse", "mse"), loss_weights=(0.9, 0.1), optimizer = optimizer3, metrics=["RootMeanSquaredError", "RootMeanSquaredError"])

#### Adapting the training data with normalization layer

In [74]:
norm_layer_wide.adapt(X_train_wide)
norm_layer_deep.adapt(X_train_deep)

#### Fitting the model

In [75]:
history = model.fit((X_train_wide, X_train_deep), (y_train, y_train), epochs=20, validation_data=((X_valid_wide, X_valid_deep), (y_valid, y_valid)))

Epoch 1/20
[1m363/363[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - dense_29_RootMeanSquaredError: 1.4116 - dense_29_loss: 1.8856 - dense_30_RootMeanSquaredError: 1.8370 - dense_30_loss: 0.3484 - loss: 2.2340 - val_dense_29_RootMeanSquaredError: 0.7331 - val_dense_29_loss: 0.4836 - val_dense_30_RootMeanSquaredError: 1.0084 - val_dense_30_loss: 0.1017 - val_loss: 0.5854
Epoch 2/20
[1m363/363[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - dense_29_RootMeanSquaredError: 0.7011 - dense_29_loss: 0.4427 - dense_30_RootMeanSquaredError: 0.8633 - dense_30_loss: 0.0746 - loss: 0.5173 - val_dense_29_RootMeanSquaredError: 0.6377 - val_dense_29_loss: 0.3659 - val_dense_30_RootMeanSquaredError: 0.8684 - val_dense_30_loss: 0.0754 - val_loss: 0.4414
Epoch 3/20
[1m363/363[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - dense_29_RootMeanSquaredError: 0.6355 - dense_29_loss: 0.3636 - dense_30_RootMeanSquaredError: 0.7202 - dense_30_loss: 0.0519 - 

#### Evaluating the model performance against testing dataset

In [76]:
eval_results = model.evaluate((X_test_wide, X_test_deep), (y_test, y_test))

weighted_sum_of_losses, main_loss, aux_loss, main_rmse, aux_rmse = eval_results

[1m162/162[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 935us/step - dense_29_RootMeanSquaredError: 0.5648 - dense_29_loss: 0.2871 - dense_30_RootMeanSquaredError: 0.6071 - dense_30_loss: 0.0369 - loss: 0.3240


In [77]:
print(weighted_sum_of_losses, main_loss, aux_loss, main_rmse, aux_rmse)

0.32088109850883484 0.28391894698143005 0.036691226065158844 0.5619576573371887 0.6055104732513428


#### Predicting the results using model

In [78]:
y_pred_main, y_pred_aux = model.predict((X_new_wide, X_new_deep))

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 65ms/step


In [79]:
y_pred_tuple = model.predict((X_new_wide, X_new_deep))

y_pred = dict(zip(model.output_names, y_pred_tuple))

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step


In [80]:
y_pred

{'dense_29': array([[0.49514577],
        [1.2503316 ],
        [3.6080847 ]], dtype=float32),
 'dense_30': array([[0.4965441],
        [1.1439607],
        [3.5039604]], dtype=float32)}