In [113]:
import tensorflow as tf
import numpy as np

In [114]:
gpus = tf.config.experimental.list_physical_devices('GPU')
print("gpus: ", gpus)
if gpus:
    try:
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
    except RuntimeError as e:
        print(e)

gpus:  [PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]


In [115]:
from sklearn.datasets import load_iris

data = load_iris()
print("x shape: ", data.data.shape)
print("y shape: ", data.target.shape)

x shape:  (150, 4)
y shape:  (150,)


In [116]:
class MyDenseLayer(tf.keras.layers.Layer):
    def __init__(self, num_outputs, activation=lambda x: x):
        super(MyDenseLayer, self).__init__()
        self.num_outputs = num_outputs
        self.activation = activation

    # creating variables in build enable late variable creation
    # based on the shape of inputs the layer will operate on.
    def build(self, input_shape):
        #w_init = tf.random_normal_initializer()
        self.kernel = tf.Variable(initial_value=( np.random.randint(10, size=(input_shape[-1], self.num_outputs)) ),
                                  trainable=True, dtype='float32')
        #b_init = tf.zeros_initializer()
        self.bias = tf.Variable(initial_value=( np.random.randint(10, size=(self.num_outputs)) ),
                                trainable=True, dtype='float32')

    def call(self, inputs):
        wx = tf.tensordot(inputs, self.kernel, [[-1], [0]])
        b = self.bias
        return  self.activation(wx + b)

# Invoke __init__    
my_layer = MyDenseLayer(3)

# Invoke build() and call()
my_layer(np.array([[1.0,2.0]]))

print("kernel:")
print(my_layer.variables[0])
print("")

print("bias")
print(my_layer.variables[1])
print("")

print("trainable_variables")
print(my_layer.trainable_variables)
print("")

# trainable_weights is the same as trainable_variables
print("trainable_weights")
print(my_layer.trainable_weights)

kernel:
<tf.Variable 'my_dense_layer_21/Variable:0' shape=(2, 3) dtype=float32, numpy=
array([[8., 0., 1.],
       [2., 0., 9.]], dtype=float32)>

bias
<tf.Variable 'my_dense_layer_21/Variable:0' shape=(3,) dtype=float32, numpy=array([4., 5., 3.], dtype=float32)>

trainable_variables
[<tf.Variable 'my_dense_layer_21/Variable:0' shape=(2, 3) dtype=float32, numpy=
array([[8., 0., 1.],
       [2., 0., 9.]], dtype=float32)>, <tf.Variable 'my_dense_layer_21/Variable:0' shape=(3,) dtype=float32, numpy=array([4., 5., 3.], dtype=float32)>]

trainable_weights
[<tf.Variable 'my_dense_layer_21/Variable:0' shape=(2, 3) dtype=float32, numpy=
array([[8., 0., 1.],
       [2., 0., 9.]], dtype=float32)>, <tf.Variable 'my_dense_layer_21/Variable:0' shape=(3,) dtype=float32, numpy=array([4., 5., 3.], dtype=float32)>]


In [117]:
a = tf.Variable(initial_value=( np.random.randint(10, size=(3,1)) ),
                                  trainable=True, dtype='float32')
m = tf.constant([[1.,2.,3.], [4.,5.,6.]])
n = tf.constant([[7., 8.]])
with tf.GradientTape(persistent=True) as tape:
    b = tf.matmul(m, a)
    c = tf.matmul(n, b)
gradients_b_a = tape.gradient(b, a)
gradients_c_b = tape.gradient(c, b)
gradients_c_a = tape.gradient(c, a)
print("gradients_b_a")
print(gradients_b_a)
print("")
# gradient returns a sum of Jacobian matrix along
# b-axis, rather than return a Jacobian matrix.
# gradinet is designed to sum all gradients for 
# each sample in a batch, so it makes sense.

print("gradients_c_b")
print(gradients_c_b)
print("")

print("gradients_c_a")
print(gradients_c_a)

gradients_b_a
tf.Tensor(
[[5.]
 [7.]
 [9.]], shape=(3, 1), dtype=float32)

gradients_c_b
tf.Tensor(
[[7.]
 [8.]], shape=(2, 1), dtype=float32)

gradients_c_a
tf.Tensor(
[[39.]
 [54.]
 [69.]], shape=(3, 1), dtype=float32)


In [118]:
class MyModel(tf.keras.Model):
    def __init__(self):
        super(MyModel, self).__init__()
        self.d1 = MyDenseLayer(3, activation=tf.nn.softmax)
    
    def call(self, x):
        x = self.d1(x)
        return x

In [119]:
@tf.function
def normalize(x, y):
    mean = tf.math.reduce_mean(x, axis=0)
    std = tf.math.reduce_std(x, axis=0)
    x = (x - mean) / std
    return x, y
dataset = tf.data.Dataset.from_tensor_slices( (data.data, data.target) )
dataset = dataset.shuffle(len(dataset), seed=100)
dataset = dataset.map(lambda a, b: normalize(a, b))

### SparseCategorical does not need one-hot-encoding for y labels
#dataset = dataset.map( lambda a, b: (a, tf.one_hot(b, 3)) )

train_size = int(0.8 * len(dataset))
test_size = len(dataset) - train_size

dataset = dataset.shuffle(len(dataset))
tr_ds = dataset.take(train_size)
te_ds = dataset.skip(train_size)
print("tr_ds size: ", len(tr_ds))
print("te_ds size: ", len(te_ds))
tr_ds = tr_ds.batch(32)

tr_ds size:  120
te_ds size:  30


In [120]:
# If the input is not probability, from_logits=True and vice versa.
# SparseCategorical accepts label, probability as input
# ex: y_true = [1, 2], y_pred = [[0.05, 0.95, 0], [0.1, 0.8, 0.1]]
train_loss = tf.keras.metrics.Mean(name='train_loss')
train_accuracy =tf.keras.metrics.SparseCategoricalAccuracy(name='train_accuracy')
test_loss = tf.keras.metrics.Mean(name='test_loss')
test_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='test_accuracy')

loss_object = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=False)

optimizer = tf.keras.optimizers.Adam(learning_rate=0.1)

model = MyModel()

# Use tf.function to make graphs out of your programs.
# This will help you create performant and portable models.
# tf.function works best with TF ops; Numpy and Python 
# are converted to constants.
@tf.function
def train_step(x, y):
    with tf.GradientTape() as tape:
        y_pred = model(x, training=True)
        loss = loss_object(y, y_pred)
    gradients = tape.gradient(loss, model.trainable_variables)
    optimizer.apply_gradients(zip(gradients, model.trainable_variables))

    train_loss(loss)
    train_accuracy(y, y_pred)

@tf.function
def test_step(x, y):
    y_pred = model(x, training=False)
    loss = loss_object(y, y_pred)
    test_loss(loss)
    test_accuracy(y, y_pred)



In [121]:
EPOCHS = 20
for epoch in range(EPOCHS):
    # Reset the metrics at the start of the next epoch
    train_loss.reset_states()
    train_accuracy.reset_states()
    test_loss.reset_states()
    test_accuracy.reset_states()

    for tr_x, tr_y in tr_ds:
        train_step(tr_x, tr_y)

    for te_x, te_y in te_ds:
        test_step(te_x, te_y)  

    print(f'Epoch {epoch + 1}, ' 
          f'Loss: {train_loss.result():.4f}, '
          f'Accuracy: {train_accuracy.result() * 100:.2f},'
          f'Test Loss: {test_loss.result():.4f}, '
          f'Test Accuracy: {test_accuracy.result() * 100:.2f}')

Epoch 1, Loss: 0.4338, Accuracy: 77.50,Test Loss: 0.4105, Test Accuracy: 73.33
Epoch 2, Loss: 0.4019, Accuracy: 77.50,Test Loss: 0.3351, Test Accuracy: 90.00
Epoch 3, Loss: 0.3418, Accuracy: 87.50,Test Loss: 0.3460, Test Accuracy: 86.67
Epoch 4, Loss: 0.3340, Accuracy: 87.50,Test Loss: 0.2173, Test Accuracy: 100.00
Epoch 5, Loss: 0.3024, Accuracy: 88.33,Test Loss: 0.3291, Test Accuracy: 86.67
Epoch 6, Loss: 0.2716, Accuracy: 90.00,Test Loss: 0.3601, Test Accuracy: 83.33
Epoch 7, Loss: 0.2678, Accuracy: 90.83,Test Loss: 0.2582, Test Accuracy: 90.00
Epoch 8, Loss: 0.2558, Accuracy: 90.83,Test Loss: 0.2426, Test Accuracy: 90.00
Epoch 9, Loss: 0.2181, Accuracy: 93.33,Test Loss: 0.1753, Test Accuracy: 96.67
Epoch 10, Loss: 0.2220, Accuracy: 92.50,Test Loss: 0.2597, Test Accuracy: 86.67
Epoch 11, Loss: 0.2228, Accuracy: 92.50,Test Loss: 0.4323, Test Accuracy: 80.00
Epoch 12, Loss: 0.2048, Accuracy: 94.17,Test Loss: 0.2515, Test Accuracy: 90.00
Epoch 13, Loss: 0.1842, Accuracy: 93.33,Test Los

In [122]:
model.summary()
model.save_weights("my_checkpoint")
model.save("my_model")

Model: "my_model_11"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 my_dense_layer_22 (MyDenseL  multiple                 15        
 ayer)                                                           
                                                                 
Total params: 15
Trainable params: 15
Non-trainable params: 0
_________________________________________________________________
INFO:tensorflow:Assets written to: my_model\assets


In [123]:
new_model = tf.keras.models.load_model('my_model')

test_loss.reset_states()
test_accuracy.reset_states()
for te_x, te_y in te_ds:
        test_step(te_x, te_y) 

print(f'Test Loss: {test_loss.result():.4f}, '
      f'Test Accuracy: {test_accuracy.result() * 100:.2f}')

Test Loss: 0.1699, Test Accuracy: 93.33
