In [1]:
import numpy as np
import tensorflow as tf


from tensorflow import keras
from tensorflow.keras import layers

## Create Simulated Data

In [2]:
def create_data(n=100):
    """
        f(x1, x2) = 5 * x_1**2 + 4 * x_2**2 - 3 * x_1 * x_2
    """
    ret = np.zeros(shape=(n, 3)).astype("float32")
    x = np.random.rand(n, 2)
    y = (5 * x[:, 0]**2 + 4 * x[:, 1]**2 - 3 * x[:, 0] * x[:, 1]).reshape(-1, 1)
    ret[:, [0, 1]] = x
    ret[:, [2]] = y
    
    return ret

## Test Out Function

In [3]:
d = create_data(4)

In [4]:
d

array([[0.37237087, 0.23720594, 0.65338117],
       [0.53994864, 0.86439043, 3.0462265 ],
       [0.49579495, 0.5879997 , 1.7374558 ],
       [0.39533043, 0.8266355 , 2.534353  ]], dtype=float32)

In [5]:
val = round(5 * d[0, 0]**2 + 4 * d[0, 1]**2 - 3 * d[0, 0] * d[0, 1], 4)
exp_val = round(d[0, 2], 4)

val, exp_val

(0.6534, 0.6534)

## Create Network

The network that we will create will have two dense layers with ReLU activations. The goal is to see how many of these neurons become "dead".

### Train and Test Data

In [6]:
NUM_SAMPLES = 1000
TRAIN_FRAC = 0.60
VAL_FRAC = 0.20

data = create_data(n=1000)

train_idx = int(TRAIN_FRAC * NUM_SAMPLES)
val_idx = train_idx + int(VAL_FRAC * NUM_SAMPLES)

train_data = data[: train_idx]
val_data = data[train_idx: val_idx]
test_data = data[val_idx: ]

### Normalizer 

In [7]:
def normalize(data: np.array, mean: np.array=None, sd: np.array=None):
    if (mean is not None) and (sd is not None):
        assert data.shape[1] == len(mean) == len(sd)
    
    if mean is None:
        mean = data.mean(axis=0)
    if sd is None:
        sd = data.std(axis=0)
    
    normed = (data - mean) / sd
    
    return mean, sd, normed

In [8]:
mean, sd, norm_train_data = normalize(train_data)
_, _, norm_val_data = normalize(val_data, mean=mean, sd=sd)
_, _, norm_test_data = normalize(test_data, mean=mean, sd=sd)

### Model Layers

In [9]:
inputs = keras.Input(shape=2)
dense_1 = layers.Dense(units=10, activation='relu')(inputs)
dense_2 = layers.Dense(units=10, activation='relu')(dense_1)
output = layers.Dense(units=1, activation=None)(dense_2)

In [10]:
model = keras.Model(inputs=inputs, outputs=output)
model.compile(optimizer='rmsprop', loss='mean_squared_error')

In [11]:
model.summary()

Model: "model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(None, 2)]               0         
_________________________________________________________________
dense (Dense)                (None, 10)                30        
_________________________________________________________________
dense_1 (Dense)              (None, 10)                110       
_________________________________________________________________
dense_2 (Dense)              (None, 1)                 11        
Total params: 151
Trainable params: 151
Non-trainable params: 0
_________________________________________________________________


In [12]:
checkpoint_filepath = './checkpoint'

model_checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
    filepath=checkpoint_filepath,
    save_weights_only=True)

In [13]:
BATCH_SIZE = 32
EPOCHS = 10

train_dataset = tf.data.Dataset.from_tensor_slices((test_data[:, [0, 1]], 
                                                    test_data[:, [2]])).batch(BATCH_SIZE)

val_dataset = tf.data.Dataset.from_tensor_slices((val_data[:, [0, 1]], 
                                                  val_data[:, [2]])).batch(BATCH_SIZE)

history = model.fit(train_dataset,
                    batch_size=BATCH_SIZE, 
                    epochs=EPOCHS, 
                    validation_data=val_dataset, 
                    callbacks=[model_checkpoint_callback])

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [14]:
pred = model.predict(test_data[:, [0, 1]])

In [15]:
np.mean((pred - test_data[:, [2]])**2)

4.3786955

## Weights

In [16]:
num_layers = 3

layer_weights = {}
layer_biases = {}

for i in range(1, num_layers + 1):
    layer_weights[i] = model.layers[i].get_weights()[0]
    layer_biases[i] = model.layers[i].get_weights()[1]

In [17]:
layer_weights[1]

array([[ 0.3914852 ,  0.16362968,  0.33383545, -0.20063436,  0.7469228 ,
         0.76410955,  0.5564589 ,  0.6400655 , -0.68820596, -0.22900069],
       [-0.6152893 , -0.69731134,  0.44550797, -0.6975839 , -0.31563833,
        -0.42561817,  0.21841656, -0.49402374, -0.07197654, -0.3623813 ]],
      dtype=float32)

In [18]:
model.layers[3].get_weights()[0]

array([[-0.46205425],
       [ 0.13735126],
       [ 0.6482053 ],
       [-0.64298725],
       [-0.21215235],
       [-0.25053793],
       [-0.19961454],
       [-0.46673727],
       [ 0.25805578],
       [ 0.7932906 ]], dtype=float32)