In [1]:
import tensorflow as tf
import numpy as np
from tensorflow.keras.layers import Dense


class Actor(tf.keras.Model):
    def __init__(self, state_shape, action_dim, max_action, units=[400, 300], name="Actor"):
        super().__init__(name=name)

        self.l1 = Dense(units[0], name="L1")
        self.l2 = Dense(units[1], name="L2")
        self.l3 = Dense(action_dim, name="L3")

        self.max_action = max_action

        with tf.device("/cpu:0"):
            self(tf.constant(np.zeros(shape=(1,)+state_shape, dtype=np.float32)))

    def call(self, inputs):
        features = tf.nn.relu(self.l1(inputs))
        features = tf.nn.relu(self.l2(features))
        features = self.l3(features)
        action = self.max_action * tf.nn.tanh(features)
        return action
    
class Critic(tf.keras.Model):
    def __init__(self, state_shape, action_dim, units=[400, 300], name="Critic"):
        super().__init__(name=name)

        self.l1 = Dense(units[0], name="L1")
        self.l2 = Dense(units[1], name="L2")
        self.l3 = Dense(1, name="L3")

        dummy_state = tf.constant(
            np.zeros(shape=(1,)+state_shape, dtype=np.float32))
        dummy_action = tf.constant(
            np.zeros(shape=[1, action_dim], dtype=np.float32))
        with tf.device("/cpu:0"):
            self([dummy_state, dummy_action])

    def call(self, inputs):
        states, actions = inputs
        features = tf.concat([states, actions], axis=1)
        features = tf.nn.relu(self.l1(features))
        features = tf.nn.relu(self.l2(features))
        features = self.l3(features)
        return features
    
def update_towards_net2(net1,net2,tau=.01):
    for source_variable,target_variable in zip(net1.trainable_variables, net2.trainable_variables):
        source_variable.assign(tau*source_variable + (1.0 - tau)*target_variable)
    return

In [3]:
critic = Actor((1,),1,1)

In [10]:
critic(np.array([[[2.]]]))

<tf.Tensor: id=367, shape=(1, 1, 1), dtype=float32, numpy=array([[[0.01983018]]], dtype=float32)>

In [12]:
f = []
for i in range(40):
    f.append([[[np.random.random()]]])
actions = np.array(f)

In [16]:
for i in critic(actions).numpy():
    print(i.flatten())

[0.00121733]
[0.00135996]
[0.00985319]
[0.00122663]
[0.0044041]
[0.00521271]
[0.00491943]
[0.0078315]
[0.0001586]
[0.00145058]
[0.00882651]
[0.0026566]
[0.00119674]
[0.00089821]
[0.00391003]
[0.00539965]
[0.00601971]
[0.00272013]
[0.00940686]
[0.00375981]
[0.00777483]
[0.0069843]
[0.00609641]
[0.00193624]
[0.00027401]
[0.00942813]
[0.0011277]
[0.00096804]
[0.00887218]
[0.00391494]
[0.00735118]
[0.00219783]
[0.00622189]
[0.0089083]
[0.00790127]
[0.00743638]
[0.00200745]
[0.00923134]
[0.00386242]
[0.00958476]
