Нейронная сеть `Prompter` состоит из двух плотных слоев.

In [None]:
# class Prompter(tf.keras.Model):
#     def __init__(self, output_dim, **kwargs):
#         super().__init__(**kwargs)
#         self.hidden = tf.keras.layers.Dense(6, activation="relu",
#                                              kernel_initializer="he_normal")
#         self.out = tf.keras.layers.Dense(output_dim, activation="sigmoid",
#                                              kernel_initializer="he_normal")
#     def call(self, inputs):

In [44]:
import tensorflow as tf
import numpy as np

channel_width = 8
n_actions = 5

def get_prompter_nn(channel_width):
    prompter_nn = tf.keras.Sequential()
    prompter_nn.add(tf.keras.layers.InputLayer(input_shape=[4]))
    prompter_nn.add(tf.keras.layers.Dense(6, activation="relu"))
    prompter_nn.add(tf.keras.layers.Dense(channel_width, activation="sigmoid"))
    return prompter_nn

def get_prisoner_nn(channel_width, n_actions):
    prisoner_nn = tf.keras.Sequential()
    prisoner_nn.add(tf.keras.layers.InputLayer(input_shape=[4+channel_width]))
    prisoner_nn.add(tf.keras.layers.Dense(channel_width, activation="relu"))
    prisoner_nn.add(tf.keras.layers.Dense(n_actions, activation="sigmoid"))
    return prisoner_nn
 
class CombinedModel(tf.keras.Model):
    def __init__(self, channel_width, n_actions, **kwargs):
        super().__init__(**kwargs)
        self.prompter_nn = get_prompter_nn(channel_width)
        self.prisoner_nn = get_prisoner_nn(channel_width, n_actions)
        
    def call(self, prompter_input, additional_input):
        prompter_output = self.prompter_nn(prompter_input)
        message = tf.where(prompter_output >= 0.5, 1.0, 0.0)
        vector = tf.convert_to_tensor(additional_input)
        prisoner_input = tf.concat([message, vector], axis=1)
        return self.prisoner_nn(prisoner_input)

def get_grad(model, prompter_input, additional_input):
    with tf.GradientTape() as tape:
        output = model(prompter_input, additional_input)
        action = np.argmax(output)
        print(f"{action = }")
        loss = ([1.] - output[0, action]) ** 2
    grad = tape.gradient(loss, model.trainable_variables)
    return grad

In [45]:
model = CombinedModel(channel_width, n_actions)
prompter_input = np.array([[1.3, 1.1, -0.3, 4.2]], dtype=np.float32)
additional_input = np.array([[1.1, -2.4, 2.4, 1.0]], dtype=np.float32)
model.call(prompter_input, additional_input)

get_grad(model, prompter_input, additional_input)

action = 1


[None,
 None,
 None,
 None,
 <tf.Tensor: shape=(12, 8), dtype=float32, numpy=
 array([[-0.03261342,  0.02006644, -0.03862052,  0.        ,  0.        ,
          0.        ,  0.        ,  0.02764634],
        [-0.03261342,  0.02006644, -0.03862052,  0.        ,  0.        ,
          0.        ,  0.        ,  0.02764634],
        [ 0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
          0.        ,  0.        ,  0.        ],
        [ 0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
          0.        ,  0.        ,  0.        ],
        [-0.03261342,  0.02006644, -0.03862052,  0.        ,  0.        ,
          0.        ,  0.        ,  0.02764634],
        [-0.03261342,  0.02006644, -0.03862052,  0.        ,  0.        ,
          0.        ,  0.        ,  0.02764634],
        [ 0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
          0.        ,  0.        ,  0.        ],
        [ 0.        ,  0.        ,  0.        ,  0.        , 

In [6]:
prompter_nn = get_prompter_nn(8)
prisoner_nn = get_prisoner_nn(8, 4)
prisoner_nn.summary()

Model: "sequential_4"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_8 (Dense)             (None, 8)                 104       
                                                                 
 dense_9 (Dense)             (None, 4)                 36        
                                                                 
Total params: 140 (560.00 Byte)
Trainable params: 140 (560.00 Byte)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [59]:
@tf.custom_gradient
def sigmoid(x):
    def grad(dy):
        return -dy
    return tf.sigmoid(x), grad

value = tf.Variable([3.0, -1.0, 0.0])
print(sigmoid(value))

with tf.GradientTape() as tape:
    y = sigmoid(value)

g = tape.gradient(y, value)
g

tf.Tensor([0.95257413 0.26894143 0.5       ], shape=(3,), dtype=float32)


<tf.Tensor: shape=(3,), dtype=float32, numpy=array([-1., -1., -1.], dtype=float32)>

In [46]:
import tensorflow as tf

@tf.custom_gradient
def sign_ste(x):
    def grad(dy):
        return dy  # STE approximation, passing gradient as is
    return tf.sign(x), grad

x = tf.Variable([0.5, -1.3, 2.0], dtype=tf.float32)

with tf.GradientTape() as tape:
    y = sign_ste(x)
    loss = tf.reduce_sum(y)

grad = tape.gradient(loss, x)
print("Gradients:", grad.numpy())


Gradients: [1. 1. 1.]
