In [2]:
import tensorflow as tf
from tensorflow.keras.layers import Dense
import numpy as np
tf.compat.v1.enable_eager_execution()
tf.executing_eagerly()

class Critic(tf.keras.Model):
    #input_dim: 1 if layer=0, 3 if layer= 2, for the Kennedy receiver ##
    def __init__(self, valreg=0.01, seed_val=0.1):
        super(Critic,self).__init__()

        self.mask = tf.keras.layers.Masking(mask_value=-4.,
                                  input_shape=(2, 2))
        self.lstm = tf.keras.layers.LSTM(250, return_sequences=True)

        self.l1 = Dense(50,kernel_initializer=tf.random_uniform_initializer(minval=-seed_val, maxval=seed_val),
        bias_initializer = tf.random_uniform_initializer(minval=-seed_val, maxval=seed_val),
        kernel_regularizer=tf.keras.regularizers.l1(valreg),
    activity_regularizer=tf.keras.regularizers.l2(valreg))

        self.l2 = Dense(50, kernel_regularizer=tf.keras.regularizers.l1(valreg),
    activity_regularizer=tf.keras.regularizers.l2(valreg),
    kernel_initializer=tf.random_uniform_initializer(minval=-seed_val, maxval=seed_val),
    bias_initializer = tf.random_uniform_initializer(minval=-seed_val, maxval=seed_val))

        self.l3 = Dense(1, kernel_regularizer=tf.keras.regularizers.l1(valreg),
    activity_regularizer=tf.keras.regularizers.l2(valreg),
    kernel_initializer=tf.random_uniform_initializer(minval=-seed_val, maxval=seed_val),
    bias_initializer = tf.random_uniform_initializer(minval=-seed_val, maxval=seed_val))




    def update_target_parameters(self,primary_net, tau=0.01):
        #### only
        prim_weights = primary_net.get_weights()
        targ_weights = self.get_weights()
        weights = []
        for i in tf.range(len(prim_weights)):
            weights.append(tau * prim_weights[i] + (1 - tau) * targ_weights[i])
        self.set_weights(weights)
        return

    def call(self, inputs):

        feat = self.mask(inputs)
        feat= self.lstm(feat)
        feat = tf.nn.relu(self.l1(feat))
        feat = tf.nn.relu(self.l2(feat))
        feat = tf.nn.sigmoid(self.l3(feat))
        return feat

    def get_self_greedy(self,b,ll):
        ''' this function takes a batch with its corresponding labels
        and retrieves what the true labels are according to network
        prodection on next states.

        For instance, my datapoint is [(\beta, pad), (n, guess)] and i want Max_g Q(\beta, n, guess)
        '''
        a = b.copy()
        l = ll.copy()
        preds1 = self(a)
        a[:,1][:,1] = -a[:,1][:,1]
        preds2 = self(a)
        both = tf.concat([preds1,preds2],1)
        maxs = np.squeeze(tf.math.reduce_max(both,axis=1))
        l[:,0] = np.expand_dims(maxs,axis=1)
        return l


    

    def give_favourite_guess(self,sequence):
        """"sequence should be [[beta, pad], [outcome, guess]] """
        pred_minus = self(h1a2)
        sequence[:,2] = 1.
        pred_plus = self(h1a2)
        both = tf.concat([pred_plus,pred_minus],1)
        maxs = tf.argmax(both,axis=1)
        guess = (-1)**maxs.numpy()[0]
        return guess

In [8]:
critic = Critic()

In [9]:
beta = 0.7
n = 0
guess = -1
pad_value = -4.
batch = []
sequential_info = [[beta, pad_value], [n, guess]]
batch.append(sequential_info)


In [10]:
inps = np.array(batch).astype(np.float32)

In [13]:
b = critic(inps)

In [14]:
b

<tf.Tensor: id=2003, shape=(1, 2, 1), dtype=float32, numpy=
array([[[0.49837634],
        [0.49858934]]], dtype=float32)>