In [1]:
import numpy as np


In [2]:
import tensorflow as tf
from tensorflow.keras.layers import Dense
import numpy as np

class Critic(tf.keras.Model):
    def __init__(self,nature, valreg=0.01, seed_val=0.3, pad_value=-7., dolinar_layers=2, tau=0.01):
        '''
        dolinar_layers= number of photodetections
        pad_value: value not considered by the lstm
        valreg: regularisation value
        seed_val: interval of random parameter inizialitaion.
        '''
        super(Critic,self).__init__()

        self.pad_value = pad_value
        self.nature = nature
        self.dolinar_layers = dolinar_layers
        self.mask = tf.keras.layers.Masking(mask_value=pad_value,
                                  input_shape=(self.dolinar_layers, 2)) #(beta1, pad), (n1, beta2), (n2, guess). In general i will have (layer+1)
        self.lstm = tf.keras.layers.LSTM(500, return_sequences=True)

        self.tau = tau
        self.l1 = Dense(250,kernel_initializer=tf.random_uniform_initializer(minval=-seed_val, maxval=seed_val),
        bias_initializer = tf.random_uniform_initializer(minval=-seed_val, maxval=seed_val),
        kernel_regularizer=tf.keras.regularizers.l1(valreg),
    activity_regularizer=tf.keras.regularizers.l2(valreg))

        self.l2 = Dense(100, kernel_regularizer=tf.keras.regularizers.l1(valreg),
    activity_regularizer=tf.keras.regularizers.l2(valreg),
    kernel_initializer=tf.random_uniform_initializer(minval=-seed_val, maxval=seed_val),
    bias_initializer = tf.random_uniform_initializer(minval=-seed_val, maxval=seed_val))

        self.l3 = Dense(100, kernel_regularizer=tf.keras.regularizers.l1(valreg),
    activity_regularizer=tf.keras.regularizers.l2(valreg),
    kernel_initializer=tf.random_uniform_initializer(minval=-seed_val, maxval=seed_val),
    bias_initializer = tf.random_uniform_initializer(minval=-seed_val, maxval=seed_val))

        self.l4 = Dense(1, kernel_regularizer=tf.keras.regularizers.l1(valreg),
    activity_regularizer=tf.keras.regularizers.l2(valreg),
    kernel_initializer=tf.random_uniform_initializer(minval=-seed_val, maxval=seed_val),
    bias_initializer = tf.random_uniform_initializer(minval=-seed_val, maxval=seed_val))



    def update_target_parameters(self,primary_net):
        #### only
        # for i,j in zip(self.get_weights(), primary_net.get_weights()):
        #     tf.assign(i, tau*j + (i-tau)*i )
        prim_weights = primary_net.get_weights()
        targ_weights = self.get_weights()
        weights = []
        for i in tf.range(len(prim_weights)):
            weights.append(self.tau * prim_weights[i] + (1 - self.tau) * targ_weights[i])
        self.set_weights(weights)
        return

    def call(self, inputs):
        feat = self.mask(inputs)
        feat= self.lstm(feat)
        # feat = tf.nn.dropout(feat, rate=0.01)
        feat = tf.nn.relu(self.l1(feat))
        # feat = tf.nn.dropout(feat, rate=0.01)
        feat = tf.nn.relu(self.l2(feat))
        feat = tf.nn.relu(self.l3(feat))
        feat = tf.nn.sigmoid(self.l4(feat))
        return feat


    def process_sequence(self,sample_buffer):
        """"
        sample_buffer: array of shape (N,2*self.layers +1), N>1

        gets data obtained from N experiments: data.shape = (N, 2L+1),
        where +1 accounts for the guess and 2L for (beta, outcome).

        [[a0, o1, a1, o2, a2, o3, a4]
         [same but other experiment]
        ]

        and returns an array of shape (experiments, self.layers, 2 ), as accepted by an RNN
        """
        batch_size = sample_buffer.shape[0]
        data = sample_buffer[:,0:(self.dolinar_layers+1+1)]
        padded_data = np.ones((batch_size,self.dolinar_layers+1, 2))*self.pad_value
        padded_data[:,0][:,0] = data[:,0]
        for k in range(1,self.dolinar_layers+1):
            padded_data[:,k] = data[:,[k,k+1]]

        rewards_obtained = np.zeros((batch_size, self.dolinar_layers+1))
        rewards_obtained[:,-1] = sample_buffer[:,-1]
        return padded_data, rewards_obtained


    def pad_single_sequence(self, seq):
        """"
        input: [a0, o1, a1, o2, a2, o3, a4]

        output: [[a0, pad], [o1, a1], [...]]

        the cool thing is that then you can put this to predict the greedy guess/action.
        """
        padded_data = np.ones((1,self.dolinar_layers+1, 2))*self.pad_value
        padded_data[0][0][0] = seq[0]
        #padded_data[0][0] = data[0]
        for k in range(1,self.dolinar_layers+1):
            padded_data[0][k] = seq[k:(k+2)]
        return padded_data

    def give_td_error_Kennedy_guess(self,batched_input,sequential_rews_with_zeros):
        # this function takes as input the actions as given by the target actor (but the first one!)
        #and outpus the correspoindg TD-errors for DDPG! To obtain them from sample of buffer
        #you call the method targeted_sequence from the actor_target and then the process_sequence
        #of this critic network.
        if self.nature != "target":
            raise AttributeError("I'm not the target!")
            return
        b = batched_input.copy()
        ll = sequential_rews_with_zeros.copy()
        for k in range(0,self.dolinar_layers-1):
            print(k)
            ll[:,k] = np.squeeze(self(b))[:,k+1] + ll[:,k]

        preds1 = self(b)
        b[:,-1][:,-1] = -b[:,1][:,1]
        preds2 = self(b)
        both = tf.concat([preds1,preds2],2)
        maxs = np.squeeze(tf.math.reduce_max(both,axis=2).numpy())
        ll[:,-2] = maxs[:,1] # This is the last befre the guess.. so the label is max_g Q(h-L, g)
        ll = np.expand_dims(ll,axis=1)
        return ll


    def give_favourite_guess(self,sequence_with_plus):
        """"
            important !! the 1!
        sequence should be [[beta, pad], [outcome, 1]] """
        pred_1 = self(sequence_with_plus)
        sequence_with_plus[:,1][:,1] = -sequence_with_plus[:,1][:,1]
        pred_2 = self(sequence_with_plus)
        both = tf.concat([pred_1,pred_2],2)
        maxs = np.squeeze(tf.argmax(both,axis=2).numpy())[1]

        guess = (-1)**maxs
        return  guess




In [3]:
class Actor(tf.keras.Model):
    def __init__(self, nature, valreg=0.01, seed_val=0.1, pad_value = -7.,
                 dolinar_layers=2,tau=0.01):
        super(Actor,self).__init__()
        self.dolinar_layers = dolinar_layers
        self.pad_value = pad_value
        self.nature = nature
        self.tau = tau

        if nature == "primary":
            self.lstm = tf.keras.layers.LSTM(500, return_sequences=True, stateful=True)
            self.mask = tf.keras.layers.Masking(mask_value=pad_value,
                                  input_shape=(1,None,1), dynamic=True)
        elif nature == "target":
            self.lstm = tf.keras.layers.LSTM(500, return_sequences=True, stateful=False)
            self.mask = tf.keras.layers.Masking(mask_value=pad_value,
                                  input_shape=(self.dolinar_layers, 1)) #'cause i feed altoghether.
        else:
            print("Hey! the character is either primary or target")
        self.l1 = Dense(250,kernel_initializer=tf.random_uniform_initializer(minval=-seed_val, maxval=seed_val),
        bias_initializer = tf.random_uniform_initializer(minval=-seed_val, maxval=seed_val),
        kernel_regularizer=tf.keras.regularizers.l1(valreg),
    activity_regularizer=tf.keras.regularizers.l2(valreg))

        self.l2 = Dense(100, kernel_regularizer=tf.keras.regularizers.l1(valreg),
    activity_regularizer=tf.keras.regularizers.l2(valreg),
    kernel_initializer=tf.random_uniform_initializer(minval=-seed_val, maxval=seed_val),
    bias_initializer = tf.random_uniform_initializer(minval=-seed_val, maxval=seed_val))

        self.l3 = Dense(100, kernel_regularizer=tf.keras.regularizers.l1(valreg),
    activity_regularizer=tf.keras.regularizers.l2(valreg),
    kernel_initializer=tf.random_uniform_initializer(minval=-seed_val, maxval=seed_val),
    bias_initializer = tf.random_uniform_initializer(minval=-seed_val, maxval=seed_val))

        self.l4 = Dense(1, kernel_regularizer=tf.keras.regularizers.l1(valreg),
    activity_regularizer=tf.keras.regularizers.l2(valreg),
    kernel_initializer=tf.random_uniform_initializer(minval=-seed_val, maxval=seed_val),
    bias_initializer = tf.random_uniform_initializer(minval=-seed_val, maxval=seed_val))



    def update_target_parameters(self,primary_net):
        #### only
        # for i,j in zip(self.get_weights(), primary_net.get_weights()):
        #     tf.assign(i, tau*j + (i-tau)*i )
        prim_weights = primary_net.get_weights()
        targ_weights = self.get_weights()
        weights = []
        for i in tf.range(len(prim_weights)):
            weights.append(self.tau * prim_weights[i] + (1 - self.tau) * targ_weights[i])
        self.set_weights(weights)
        return

    def call(self, inputs):
        feat = self.mask(inputs)
        feat= self.lstm(feat)
        # feat = tf.nn.dropout(feat, rate=0.01)
        feat = tf.nn.relu(self.l1(feat))
        # feat = tf.nn.dropout(feat, rate=0.01)
        feat = tf.nn.relu(self.l2(feat))
        feat = tf.nn.relu(self.l3(feat))
        feat = tf.nn.sigmoid(self.l4(feat))

        return feat

    def process_sequence_of_experiences(self, experiences):

        #This function takes a vector of experiences:
        #vector = (\beta1, o1, \beta2, o2, \beta3, o3,...,o_L, guess)
        #and retrieves
        #(\beta1, o1, \beta2_target, o2, \beta3_target, o3, \beta4_target,... ,o_L, guess)

        #For the primary it should give again the actions that generated the experience (this is to consider the wegiths
        #in the graph)

        #For the target it gives the "opinion" of the actions it should've taken...

        # if self.nature != "target":
        #     raise AttributeError("check the lstm memory of actor target, stateful == True ?")
        #     return
        export = experiences.copy()
        for index in range(1,2*self.dolinar_layers-1,2): # I consider from first outcome to last one (but guess)
            export[:,index+1] = np.squeeze(self(np.reshape(np.array(export[:,index]),
                                                                 (experiences.shape[0],1,1))))
        return export

    def __str__(self):
        return self.name

In [7]:
experinces = np.load("expe_2L.npy")

In [8]:
actor = Actor(nature="primary")
actor.mask.get_config()
actor.lstm.stateful=False

In [9]:
actor(np.reshape(experinces[:,1], (len(experinces[:,1]), 1,1)))



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



<tf.Tensor: shape=(100, 1, 1), dtype=float32, numpy=
array([[[0.50762874]],

       [[0.50762874]],

       [[0.50762874]],

       [[0.50762874]],

       [[0.50762874]],

       [[0.5076383 ]],

       [[0.5076383 ]],

       [[0.50762874]],

       [[0.50762874]],

       [[0.50762874]],

       [[0.5076383 ]],

       [[0.50762874]],

       [[0.50762874]],

       [[0.50762874]],

       [[0.50762874]],

       [[0.5076383 ]],

       [[0.5076383 ]],

       [[0.50762874]],

       [[0.50762874]],

       [[0.50762874]],

       [[0.50762874]],

       [[0.50762874]],

       [[0.5076383 ]],

       [[0.5076383 ]],

       [[0.50762874]],

       [[0.50762874]],

       [[0.50762874]],

       [[0.50762874]],

       [[0.5076383 ]],

       [[0.5076383 ]],

       [[0.50762874]],

       [[0.50762874]],

       [[0.5076383 ]],

       [[0.50762874]],

       [[0.50762874]],

       [[0.50762874]],

       [[0.50762874]],

       [[0.50762874]],

       [[0.50762874]],

       [[0.

In [60]:
actor(np.reshape(experinces[:,1][:4], (len(experinces[:,1][:4]), 1,1)))

<tf.Tensor: shape=(4, 1, 1), dtype=float32, numpy=
array([[[0.505339]],

       [[0.505339]],

       [[0.505339]],

       [[0.505339]]], dtype=float32)>

In [44]:
actor.mask.get_config()

{'name': 'masking_6',
 'trainable': True,
 'batch_input_shape': (None, None),
 'dtype': 'float32',
 'dynamic': True,
 'mask_value': -7.0}

In [36]:
help(actor.mask.dynamic)

Help on bool object:

class bool(int)
 |  bool(x) -> bool
 |  
 |  Returns True when the argument x is true, False otherwise.
 |  The builtins True and False are the only two instances of the class bool.
 |  The class bool is a subclass of the class int, and cannot be subclassed.
 |  
 |  Method resolution order:
 |      bool
 |      int
 |      object
 |  
 |  Methods defined here:
 |  
 |  __and__(self, value, /)
 |      Return self&value.
 |  
 |  __new__(*args, **kwargs) from builtins.type
 |      Create and return a new object.  See help(type) for accurate signature.
 |  
 |  __or__(self, value, /)
 |      Return self|value.
 |  
 |  __rand__(self, value, /)
 |      Return value&self.
 |  
 |  __repr__(self, /)
 |      Return repr(self).
 |  
 |  __ror__(self, value, /)
 |      Return value|self.
 |  
 |  __rxor__(self, value, /)
 |      Return value^self.
 |  
 |  __str__(self, /)
 |      Return str(self).
 |  
 |  __xor__(self, value, /)
 |      Return self^value.
 |  
 |  -----

In [53]:
actor.mask = tf.keras.layers.Masking(mask_value=actor.pad_value,
                                  input_shape=(1,1), dynamic=True)

In [54]:
actor(np.reshape(np.array([0.]), (1,1,1)))

ValueError: Tensor's shape (1, 100, 500) is not compatible with supplied shape [1, 1, 500]

In [11]:
experinces[:5]

array([[ 0.69102163,  0.        ,  0.83695079,  0.        , -1.        ,
         1.        ],
       [ 0.98145692,  0.        ,  0.04985608,  0.        , -1.        ,
         1.        ],
       [ 0.69588898,  0.        ,  0.30091034,  0.        ,  1.        ,
         0.        ],
       [ 0.65480695,  0.        ,  0.72952473,  1.        , -1.        ,
         0.        ],
       [ 0.07486903,  0.        ,  0.39324444,  1.        ,  1.        ,
         1.        ]])

In [7]:
actor.lstm.stateful = False

In [20]:
actor(np.reshape(experinces[:,1], (len(experinces[:,1]), 1,1)))



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



<tf.Tensor: shape=(100, 1, 1), dtype=float32, numpy=
array([[[0.49875805]],

       [[0.49875805]],

       [[0.49875805]],

       [[0.49875805]],

       [[0.49875805]],

       [[0.49860242]],

       [[0.49860242]],

       [[0.49875805]],

       [[0.49875805]],

       [[0.49875805]],

       [[0.49860242]],

       [[0.49875805]],

       [[0.49875805]],

       [[0.49875805]],

       [[0.49875805]],

       [[0.49860242]],

       [[0.49860242]],

       [[0.49875805]],

       [[0.49875805]],

       [[0.49875805]],

       [[0.49875805]],

       [[0.49875805]],

       [[0.49860242]],

       [[0.49860242]],

       [[0.49875805]],

       [[0.49875805]],

       [[0.49875805]],

       [[0.49875805]],

       [[0.49860242]],

       [[0.49860242]],

       [[0.49875805]],

       [[0.49875805]],

       [[0.49860242]],

       [[0.49875805]],

       [[0.49875805]],

       [[0.49875805]],

       [[0.49875805]],

       [[0.49875805]],

       [[0.49875805]],

       [[0.

In [18]:
actor(np.reshape(experinces[:,1][:5], (len(experinces[:,1][:5]), 1,1)))

InvalidArgumentError: Incompatible shapes: [5,2000] vs. [100,2000] [Op:AddV2]

In [15]:
actor.process_sequence_of_experiences(experinces)[:5]

array([[ 0.69102163,  0.        ,  0.49785164,  0.        , -1.        ,
         1.        ],
       [ 0.98145692,  0.        ,  0.49785164,  0.        , -1.        ,
         1.        ],
       [ 0.69588898,  0.        ,  0.49785164,  0.        ,  1.        ,
         0.        ],
       [ 0.65480695,  0.        ,  0.49785164,  1.        , -1.        ,
         0.        ],
       [ 0.07486903,  0.        ,  0.49785164,  1.        ,  1.        ,
         1.        ]])

In [23]:
np.reshape(experinces[:,1],(100,1,1)).shape

(100, 1, 1)

In [24]:
actor(np.reshape(experinces[:,1],(100,1,1)))

<tf.Tensor: shape=(100, 1, 1), dtype=float32, numpy=
array([[[0.49785164]],

       [[0.49785164]],

       [[0.49785164]],

       [[0.49785164]],

       [[0.49785164]],

       [[0.49676186]],

       [[0.49676186]],

       [[0.49785164]],

       [[0.49785164]],

       [[0.49785164]],

       [[0.49676186]],

       [[0.49785164]],

       [[0.49785164]],

       [[0.49785164]],

       [[0.49785164]],

       [[0.49676186]],

       [[0.49676186]],

       [[0.49785164]],

       [[0.49785164]],

       [[0.49785164]],

       [[0.49785164]],

       [[0.49785164]],

       [[0.49676186]],

       [[0.49676186]],

       [[0.49785164]],

       [[0.49785164]],

       [[0.49785164]],

       [[0.49785164]],

       [[0.49676186]],

       [[0.49676186]],

       [[0.49785164]],

       [[0.49785164]],

       [[0.49676186]],

       [[0.49785164]],

       [[0.49785164]],

       [[0.49785164]],

       [[0.49785164]],

       [[0.49785164]],

       [[0.49785164]],

       [[0.

In [26]:
actor.mask.get_config()

{'name': 'masking_1',
 'trainable': True,
 'batch_input_shape': (None, 1, 1),
 'dtype': 'float32',
 'mask_value': -7.0}

In [None]:
actor(np.reshape(experinces[:,1],(100,1,1)))