In [1]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.layers import Dense


class Critic(tf.keras.Model):
    def __init__(self, layer=0):
        super(Critic,self).__init__()
        self.l1 = Dense(2, input_shape=(1,), kernel_initializer='random_uniform',
                bias_initializer='random_uniform')
        self.l2 = Dense(2, kernel_initializer='random_uniform',
                bias_initializer='random_uniform')
        self.l3 = Dense(1, kernel_initializer='random_uniform',
                bias_initializer='random_uniform')
        
    def call(self, input):
        state, action = input
        features = tf.concat([state, action], axis=1)
        feat = tf.nn.relu(self.l1(features))
        feat = tf.nn.relu(self.l2(feat))
        value = self.l3(feat)
        return tf.nn.tanh(value)*2 #Tanh image is [-1,1]
    

You can pass the data as numpy arrays, or just lists...

In [5]:
cc = Critic()

inps = [[np.array([1.]), np.array([1.])],[np.array([2.]), np.array([1.])]]
print(cc(inps))

inps = [[[1.], [3.22]],[[22.],[33.]]]
print(cc(inps))

W1002 17:40:36.665384 140376949892928 base_layer.py:1814] Layer critic_2 is casting an input tensor from dtype float64 to the layer's dtype of float32, which is new behavior in TensorFlow 2.  The layer has dtype float32 because it's dtype defaults to floatx.


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



tf.Tensor(
[[-0.01764066]
 [-0.01764405]], shape=(2, 1), dtype=float32)
tf.Tensor(
[[-0.01757287]
 [-0.017393  ]], shape=(2, 1), dtype=float32)


In [14]:
def f():
    cc = Critic()
    inps = [[np.array([1.])],[np.array([1.])]]
    print(cc(inps))

for i in range(50):
    f()

W1002 17:42:57.859688 140376949892928 base_layer.py:1814] Layer critic_10 is casting an input tensor from dtype float64 to the layer's dtype of float32, which is new behavior in TensorFlow 2.  The layer has dtype float32 because it's dtype defaults to floatx.


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

W1002 17:42:57.884035 140376949892928 base_layer.py:1814] Layer critic_11 is casting an input tensor from dtype float64 to the layer's dtype of float32, which is new behavior in TensorFlow 2.  The layer has dtype float32 because it's dtype defaults to floatx.


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. I

tf.Tensor([[0.02506761]], shape=(1, 1), dtype=float32)
tf.Tensor([[-0.01804972]], shape=(1, 1), dtype=float32)
tf.Tensor([[-0.00223475]], shape=(1, 1), dtype=float32)
tf.Tensor([[0.05138217]], shape=(1, 1), dtype=float32)
tf.Tensor([[0.00934845]], shape=(1, 1), dtype=float32)
tf.Tensor([[0.01893435]], shape=(1, 1), dtype=float32)
tf.Tensor([[0.0135658]], shape=(1, 1), dtype=float32)
tf.Tensor([[-0.04010885]], shape=(1, 1), dtype=float32)
tf.Tensor([[0.02387712]], shape=(1, 1), dtype=float32)
tf.Tensor([[-0.03265539]], shape=(1, 1), dtype=float32)


W1002 17:42:58.083360 140376949892928 base_layer.py:1814] Layer critic_21 is casting an input tensor from dtype float64 to the layer's dtype of float32, which is new behavior in TensorFlow 2.  The layer has dtype float32 because it's dtype defaults to floatx.


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

W1002 17:42:58.108822 140376949892928 base_layer.py:1814] Layer critic_22 is casting an input tensor from dtype float64 to the layer's dtype of float32, which is new behavior in TensorFlow 2.  The layer has dtype float32 because it's dtype defaults to floatx.


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. I

W1002 17:42:58.265718 140376949892928 base_layer.py:1814] Layer critic_32 is casting an input tensor from dtype float64 to the layer's dtype of float32, which is new behavior in TensorFlow 2.  The layer has dtype float32 because it's dtype defaults to floatx.


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

W1002 17:42:58.279864 140376949892928 base_layer.py:1814] Layer critic_33 is casting an input tensor from dtype float64 to the layer's dtype of float32, which is new behavior in TensorFlow 2.  The layer has dtype float32 because it's dtype defaults to floatx.


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. I

tf.Tensor([[-0.05081596]], shape=(1, 1), dtype=float32)
tf.Tensor([[-0.02025365]], shape=(1, 1), dtype=float32)
tf.Tensor([[-0.06782174]], shape=(1, 1), dtype=float32)
tf.Tensor([[-0.04772029]], shape=(1, 1), dtype=float32)
tf.Tensor([[-0.04366359]], shape=(1, 1), dtype=float32)
tf.Tensor([[-0.07683292]], shape=(1, 1), dtype=float32)
tf.Tensor([[0.01974444]], shape=(1, 1), dtype=float32)
tf.Tensor([[-0.06457264]], shape=(1, 1), dtype=float32)
tf.Tensor([[-0.06838958]], shape=(1, 1), dtype=float32)
tf.Tensor([[0.02366356]], shape=(1, 1), dtype=float32)
tf.Tensor([[0.0581089]], shape=(1, 1), dtype=float32)
tf.Tensor([[-0.0031991]], shape=(1, 1), dtype=float32)
tf.Tensor([[-0.07574078]], shape=(1, 1), dtype=float32)


W1002 17:42:58.296494 140376949892928 base_layer.py:1814] Layer critic_34 is casting an input tensor from dtype float64 to the layer's dtype of float32, which is new behavior in TensorFlow 2.  The layer has dtype float32 because it's dtype defaults to floatx.


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

W1002 17:42:58.311892 140376949892928 base_layer.py:1814] Layer critic_35 is casting an input tensor from dtype float64 to the layer's dtype of float32, which is new behavior in TensorFlow 2.  The layer has dtype float32 because it's dtype defaults to floatx.


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. I

W1002 17:42:58.468213 140376949892928 base_layer.py:1814] Layer critic_45 is casting an input tensor from dtype float64 to the layer's dtype of float32, which is new behavior in TensorFlow 2.  The layer has dtype float32 because it's dtype defaults to floatx.


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

W1002 17:42:58.487110 140376949892928 base_layer.py:1814] Layer critic_46 is casting an input tensor from dtype float64 to the layer's dtype of float32, which is new behavior in TensorFlow 2.  The layer has dtype float32 because it's dtype defaults to floatx.


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. I

tf.Tensor([[-0.06018424]], shape=(1, 1), dtype=float32)
tf.Tensor([[0.00846331]], shape=(1, 1), dtype=float32)
tf.Tensor([[-0.01847083]], shape=(1, 1), dtype=float32)
tf.Tensor([[-0.00791233]], shape=(1, 1), dtype=float32)
tf.Tensor([[-0.03425859]], shape=(1, 1), dtype=float32)
tf.Tensor([[-0.0434526]], shape=(1, 1), dtype=float32)
tf.Tensor([[-0.08995082]], shape=(1, 1), dtype=float32)
tf.Tensor([[0.08627177]], shape=(1, 1), dtype=float32)
tf.Tensor([[-0.09693159]], shape=(1, 1), dtype=float32)
tf.Tensor([[0.01525124]], shape=(1, 1), dtype=float32)
tf.Tensor([[0.02961314]], shape=(1, 1), dtype=float32)
tf.Tensor([[0.05606789]], shape=(1, 1), dtype=float32)
tf.Tensor([[0.00173178]], shape=(1, 1), dtype=float32)


W1002 17:42:58.503176 140376949892928 base_layer.py:1814] Layer critic_47 is casting an input tensor from dtype float64 to the layer's dtype of float32, which is new behavior in TensorFlow 2.  The layer has dtype float32 because it's dtype defaults to floatx.


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

W1002 17:42:58.519472 140376949892928 base_layer.py:1814] Layer critic_48 is casting an input tensor from dtype float64 to the layer's dtype of float32, which is new behavior in TensorFlow 2.  The layer has dtype float32 because it's dtype defaults to floatx.


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. I

W1002 17:42:58.679008 140376949892928 base_layer.py:1814] Layer critic_58 is casting an input tensor from dtype float64 to the layer's dtype of float32, which is new behavior in TensorFlow 2.  The layer has dtype float32 because it's dtype defaults to floatx.


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

W1002 17:42:58.697941 140376949892928 base_layer.py:1814] Layer critic_59 is casting an input tensor from dtype float64 to the layer's dtype of float32, which is new behavior in TensorFlow 2.  The layer has dtype float32 because it's dtype defaults to floatx.


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. I

tf.Tensor([[0.02108723]], shape=(1, 1), dtype=float32)
tf.Tensor([[0.0831684]], shape=(1, 1), dtype=float32)
tf.Tensor([[0.04099162]], shape=(1, 1), dtype=float32)
tf.Tensor([[-0.09748317]], shape=(1, 1), dtype=float32)
tf.Tensor([[-0.07573962]], shape=(1, 1), dtype=float32)
tf.Tensor([[-0.0989245]], shape=(1, 1), dtype=float32)
tf.Tensor([[-0.0563133]], shape=(1, 1), dtype=float32)
tf.Tensor([[0.05813502]], shape=(1, 1), dtype=float32)
tf.Tensor([[-0.08458967]], shape=(1, 1), dtype=float32)
tf.Tensor([[0.03250144]], shape=(1, 1), dtype=float32)
tf.Tensor([[0.06618686]], shape=(1, 1), dtype=float32)
tf.Tensor([[0.04318689]], shape=(1, 1), dtype=float32)
tf.Tensor([[0.07936041]], shape=(1, 1), dtype=float32)
tf.Tensor([[-0.00523609]], shape=(1, 1), dtype=float32)


Let's take the derivatives of the critic. For this we define the loss and the optimizer...

In [203]:
loss = tf.keras.losses.MeanSquaredError()
optimizer = tf.keras.optimizers.Adam(learning_rate=0.5)

In [204]:
@tf.function
def step_critic(labels, states):
    with tf.GradientTape() as tape:
        l = loss(labels, cc(states))
        l = tf.reduce_mean(l)
    g = tape.gradient(l, cc.trainable_variables)
    optimizer.apply_gradients(zip(g, cc.trainable_variables))
    return

In [205]:
labels = np.random.random_sample(500)
dummy_states = [[], []]
for i in range(len(labels)):
    dummy_states[0].append([np.random.random()])
    dummy_states[1].append([np.random.random()])

In [206]:
step_critic(labels, dummy_states)

In [207]:
for i in cc.trainable_variables:
    print(i.numpy())

[[-0.46065544 -0.01383403]
 [-0.50364909 -0.00971141]]
[-0.48026149 -0.00611758]
[[-0.48284157  0.00892967]
 [-0.04214531  0.00741531]]
[-0.48643569 -0.01482327]
[[ 0.47273369]
 [-0.04306903]]
[0.51803801]


Now we define the actor and do the same!

In [175]:

class NN_policy(tf.keras.Model):
    def __init__(self, layer=0):
        super(NN_policy,self).__init__()
        self.l1 = Dense(2, input_shape=(1,), kernel_initializer='random_uniform',
                bias_initializer='random_uniform')
        self.l2 = Dense(2, kernel_initializer='random_uniform',
                bias_initializer='random_uniform')
        self.l3 = Dense(1, kernel_initializer='random_uniform',
                bias_initializer='random_uniform')
        
    def call(self, input):
        feat = tf.nn.relu(self.l1(input))
        feat = tf.nn.relu(self.l2(feat))
        action = tf.nn.relu(self.l3(feat))
        return tf.nn.tanh(action)*2 #Tanh image is [-1,1]

The thing is that we don't intend (for the first displacement) to give any input. Hence, we set the state to 0.

Notice that in this case of simple inputs, one should add a new axis (somebody knows why in the last case not ?)

In [176]:
pol = NN_policy()
dummy_state = np.array([0.])[...,tf.newaxis]
pol(dummy_state)

<tf.Tensor: id=14579, shape=(1, 1), dtype=float64, numpy=array([[0.02764966]])>

In [177]:
for i in pol.trainable_variables:
    print(i.numpy())

[[0.03244116 0.04590941]]
[-0.04215503 -0.03693446]
[[-0.04217463  0.00400336]
 [-0.04444724  0.00205822]]
[ 0.0229516  -0.02662618]
[[-0.02949878]
 [-0.02165013]]
[0.01450275]


If you want to get the numpy value (just the number) of the output of your neural net:

In [178]:
pol(dummy_state).numpy().flatten()[0]

0.02764965815306522

Notice that we use flatten()[0], but if your output is an array of more numbers, then you would not want to take only the first element ;)

Now we consider taking the gradient of the loss. For this we define the loss (we will take the MeanSquareError, but notice you can define your own loss, maybe later I write this down and see if it works)

We also define an optimizer. When you call for apply_gradients, you update your weights

In [179]:
loss_mse = tf.keras.losses.MeanSquaredError()
optimizer = tf.keras.optimizers.Adam(learning_rate=.5)

Now we define a function, with the @tf.function decorator (that enhaces things, apparently), where we take the gradient of the network. We check a few things. Notice that the output of this neural network will be deterministic (in the sense that we always feed it with the same input state)

In [192]:
print(loss_mse(dummy_state, dummy_state))
print(loss_mse(dummy_state+1, dummy_state))

tf.Tensor(0.0, shape=(), dtype=float64)
tf.Tensor(1.0, shape=(), dtype=float64)


Now apply this to a batch of data. Indeed, it's "deterministic"

In [185]:
labels = np.random.random_sample(300)[...,tf.newaxis]
dummy_states = np.zeros(len(labels))[...,tf.newaxis]

In [191]:
pol(dummy_states)

<tf.Tensor: id=14762, shape=(300, 1), dtype=float64, numpy=
array([[0.02764966],
       [0.02764966],
       [0.02764966],
       [0.02764966],
       [0.02764966],
       [0.02764966],
       [0.02764966],
       [0.02764966],
       [0.02764966],
       [0.02764966],
       [0.02764966],
       [0.02764966],
       [0.02764966],
       [0.02764966],
       [0.02764966],
       [0.02764966],
       [0.02764966],
       [0.02764966],
       [0.02764966],
       [0.02764966],
       [0.02764966],
       [0.02764966],
       [0.02764966],
       [0.02764966],
       [0.02764966],
       [0.02764966],
       [0.02764966],
       [0.02764966],
       [0.02764966],
       [0.02764966],
       [0.02764966],
       [0.02764966],
       [0.02764966],
       [0.02764966],
       [0.02764966],
       [0.02764966],
       [0.02764966],
       [0.02764966],
       [0.02764966],
       [0.02764966],
       [0.02764966],
       [0.02764966],
       [0.02764966],
       [0.02764966],
       [0.027649

In [193]:
@tf.function
def step_actor(labels):
    #consider labels to be an array or numpy array
    dummy_states = np.zeros(len(labels))[...,tf.newaxis]
    with tf.GradientTape() as tape:
        l = loss_mse(labels, pol(dummy_states))
        l = tf.reduce_mean(l)
    grad = tape.gradient(l,pol.trainable_variables)
    optimizer.apply_gradients(zip(grad, pol.trainable_variables))
    return

In [194]:
step_actor(labels)

In [195]:
for i in pol.trainable_variables:
    print(i.numpy())

[[0.03244116 0.04590941]]
[-0.04215503 -0.03693446]
[[-0.04217463  0.00400336]
 [-0.04444724  0.00205822]]
[-0.47702054 -0.02662618]
[[ 0.47046542]
 [-0.02165013]]
[0.51450193]


Finally, I encounter a problem for the update of the actor, I have to define a different loss function iterating... so I use tf.map_fn

In [249]:
t1 = np.random.random_sample(10)[...,tf.newaxis]
t2 = np.random.random_sample(10)[...,tf.newaxis]

tf.multiply(t1,t2)

<tf.Tensor: id=22930, shape=(10, 1), dtype=float64, numpy=
array([[0.00315722],
       [0.13081384],
       [0.23449677],
       [0.04016552],
       [0.20321879],
       [0.31601222],
       [0.29857452],
       [0.14374718],
       [0.23518598],
       [0.02842308]])>

In [254]:
for i,j in zip(t1.flatten(), t2.flatten()):
    print(i*j)

0.0031572235122459633
0.13081383961044973
0.23449677398091753
0.04016551690820266
0.20321879104287496
0.31601221834416504
0.2985745231337023
0.14374717596680398
0.23518597632881416
0.028423077301905696
