In [50]:
from misc import *
import numpy as np
import cmath

def P(a,b,et,n):

    p0=np.exp(-abs((et*a)+b)**2)

    if n ==0:
        return p0
    else:
        return 1-(p0)
    
def outcomes_universe(L):
    """
    Takes L (# of photodetections in the experiment) and returns
    all possible outcomes in a matrix of 2**L rows by L columns,
    which are all possible sequence of outcomes you can ever get.
    """
    a = np.array([0,1])
    two_outcomes = np.array([[0,0],[0,1],[1,0],[1,1]]).astype(int)
    if L<2:
        return np.array([0,1]).astype(int)
    elif L==2:
        return two_outcomes
    else:
        x = insert(a,two_outcomes)
        for i in range(L-3):
            x = insert(a,x)
        return x.astype(int)

def make_attenuations(layers):
    if layers == 1:
        return [0]
    else:
        ats=[0]
        for i in range(layers-1):
            ats.append(np.arctan(1/np.cos(ats[i])))
        return np.flip(ats)

    
def prob_2L(actions_tree, at): #
    #at = make_attenuations(2)
    p=0
    for ot in outcomes_universe(2):
        p += P(actions_tree["2"][str(ot[:2])]*0.4, actions_tree["0"]["[]"], np.sin(at[0]), ot[0])*P(actions_tree["2"][str(ot[:2])]*0.4,
                                                                                                    actions_tree["1"][str(ot[:1])], np.cos(at[0]), ot[1])
    return p/2

In [49]:
at = make_attenuations(2)

In [7]:
b1, b2 = -.4, .5
guesses = {}
for n in outcomes_universe(2):
    guesses[str(n)] = np.random.choice([-1,1],1)[0]
seconds = {"[0]": b1, "[1]": b2}

In [75]:
class Actions():
    def __init__(self, number_layers=2):
        self.number_layers = number_layers
        actions = {}
        for layer in range(number_layers+1):
            actions[str(layer)] = {}

        for k in outcomes_universe(number_layers):
            for layer in range(number_layers+1):
                actions[str(layer)][str(k[:layer])] = 0
                
        self.actions = actions
        
    def random_tree(self):
        actions = self.actions.copy()
        for k in outcomes_universe(self.number_layers):
            for layer in range(self.number_layers+1):
                actions[str(layer)][str(k[:layer])] = np.random.random()
        return actions

In [76]:
tree = Actions()

In [77]:
actions

{'0': {'[]': 0.09536316655133259},
 '1': {'[0]': 0.9805776651934801, '[1]': 0.3924997874180116},
 '2': {'[0 0]': 0.9592507899371683,
  '[0 1]': 0.00824808420342682,
  '[1 0]': 0.7874987939807336,
  '[1 1]': 0.33579160486351634}}

In [78]:
tree.actions

{'0': {'[]': 0},
 '1': {'[0]': 0, '[1]': 0},
 '2': {'[0 0]': 0, '[0 1]': 0, '[1 0]': 0, '[1 1]': 0}}

In [79]:
prob_2L(tree.actions, at)

0.5

In [56]:
actions

{'0': {'[]': [0.032962484545468684,
   0.6863144184684542,
   0.7392691047949408,
   0.6578099746897761]},
 '1': {'[0]': [0.5110449637454709, 0.002096637870309781],
  '[1]': [0.3504688649073524, 0.08617097607361901]},
 '2': {'[0 0]': [0.24777983301229067],
  '[0 1]': [0.8778730352077802],
  '[1 0]': [0.2266544423264727],
  '[1 1]': [0.8117419775022194]}}

In [10]:
critic = Critic()
inps = np.random.randn(1,8,2)

In [11]:
critic(inps)



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



<tf.Tensor: shape=(1, 8, 1), dtype=float32, numpy=
array([[[0.6358925 ],
        [0.63168585],
        [0.6304481 ],
        [0.64474213],
        [0.6384484 ],
        [0.62755436],
        [0.6376362 ],
        [0.63068944]]], dtype=float32)>

In [177]:

##### ACTOR CLASSS ####
class Actor(tf.keras.Model):
    #input_dim: 1 if layer=0, 3 if layer= 2, for the Kennedy receiver ##
    def __init__(self, input_dim=1, valreg=0.01, seed_val=0.1, pad_value = -7.,
                 dolinar_layers=2, nature="primary"):
        super(Actor,self).__init__()
        self.dolinar_layers = dolinar_layers
        self.pad_value = pad_value
        self.nature = nature
        self.mask = tf.keras.layers.Masking(mask_value=pad_value,
                                  input_shape=(1, 1))
        if nature == "primary":
            self.lstm = tf.keras.layers.LSTM(500, return_sequences=True, stateful=True)
        elif nature == "target":
            self.lstm = tf.keras.layers.LSTM(500, return_sequences=True, stateful=False)
        else:
            print("Hey! the character is either primary or target")
        self.l1 = Dense(250,kernel_initializer=tf.random_uniform_initializer(minval=-seed_val, maxval=seed_val),
        bias_initializer = tf.random_uniform_initializer(minval=-seed_val, maxval=seed_val),
        kernel_regularizer=tf.keras.regularizers.l1(valreg),
    activity_regularizer=tf.keras.regularizers.l2(valreg))

        self.l2 = Dense(100, kernel_regularizer=tf.keras.regularizers.l1(valreg),
    activity_regularizer=tf.keras.regularizers.l2(valreg),
    kernel_initializer=tf.random_uniform_initializer(minval=-seed_val, maxval=seed_val),
    bias_initializer = tf.random_uniform_initializer(minval=-seed_val, maxval=seed_val))

        self.l3 = Dense(100, kernel_regularizer=tf.keras.regularizers.l1(valreg),
    activity_regularizer=tf.keras.regularizers.l2(valreg),
    kernel_initializer=tf.random_uniform_initializer(minval=-seed_val, maxval=seed_val),
    bias_initializer = tf.random_uniform_initializer(minval=-seed_val, maxval=seed_val))

        self.l4 = Dense(1, kernel_regularizer=tf.keras.regularizers.l1(valreg),
    activity_regularizer=tf.keras.regularizers.l2(valreg),
    kernel_initializer=tf.random_uniform_initializer(minval=-seed_val, maxval=seed_val),
    bias_initializer = tf.random_uniform_initializer(minval=-seed_val, maxval=seed_val))



    def update_target_parameters(self,primary_net, tau=0.01):
        #### only
        # for i,j in zip(self.get_weights(), primary_net.get_weights()):
        #     tf.assign(i, tau*j + (i-tau)*i )
        prim_weights = primary_net.get_weights()
        targ_weights = self.get_weights()
        weights = []
        for i in tf.range(len(prim_weights)):
            weights.append(tau * prim_weights[i] + (1 - tau) * targ_weights[i])
        self.set_weights(weights)
        return

    def call(self, inputs):
        feat = self.mask(inputs)
        feat= self.lstm(feat)
        # feat = tf.nn.dropout(feat, rate=0.01)
        feat = tf.nn.relu(self.l1(feat))
        # feat = tf.nn.dropout(feat, rate=0.01)
        feat = tf.nn.relu(self.l2(feat))
        feat = tf.nn.relu(self.l3(feat))
        feat = tf.nn.sigmoid(self.l4(feat))

        return feat

    def targeted_sequence(self, experiences_vector):

        #This function takes a vector of experiences:
        #vector = (\beta1, o1, \beta2, o2, \beta3, o3,...,o_L, guess)
        #and retrieves
        #(\beta1, o1, \beta2_target, o2, \beta3_target, o3, \beta4_target,... ,o_L, guess)
        #actor_target.lstm.reset_states() NO! 
        if self.nature != "target":
            raise AttributeError("check the lstm memory of actor target, stateful == True ?")
            return
        export = experiences_vector.copy()
        for index in range(1,2*self.dolinar_layers-1,2): # I consider from first outcome to last one (but guess)
            export[:,index+1] = np.squeeze(self(np.reshape(np.array(export[:,index]),
                                                                 (experiences_vector.shape[0],1,1))))
        return export

    def __str__(self):
        return self.name


In [114]:
actor = Actor()

In [81]:
inp = np.array([actor.pad_value])
inp = np.reshape(inp, (1,1,1))

In [82]:
for k in range(3):
    print(actor(inp))

tf.Tensor([[[0.47883806]]], shape=(1, 1, 1), dtype=float32)
tf.Tensor([[[0.47883806]]], shape=(1, 1, 1), dtype=float32)
tf.Tensor([[[0.47883806]]], shape=(1, 1, 1), dtype=float32)


In [29]:
for k in range(3):
    print(actor(inp + np.reshape(np.array(-1.),(1,1,1))))

tf.Tensor([[[0.4708976]]], shape=(1, 1, 1), dtype=float32)
tf.Tensor([[[0.469261]]], shape=(1, 1, 1), dtype=float32)
tf.Tensor([[[0.46807468]]], shape=(1, 1, 1), dtype=float32)


In [30]:
actor.lstm.states

[<tf.Variable 'actor_3/lstm_4/Variable:0' shape=(1, 500) dtype=float32, numpy=
 array([[-1.14967205e-01,  2.07302809e-01, -1.50902703e-01,
         -1.80431888e-01, -1.65166214e-01,  2.91105248e-02,
         -1.59782320e-01, -2.47169985e-04,  2.26949424e-01,
          1.36741713e-01, -1.16445042e-01,  2.86793765e-02,
          5.73190413e-02,  8.74600261e-02,  8.25699866e-02,
         -5.34274727e-02,  1.87864751e-01, -5.02619073e-02,
          2.43400067e-01, -2.86729224e-02, -1.66121349e-01,
          6.70780241e-02,  3.10184322e-02, -8.43979195e-02,
         -1.92845851e-01, -1.89841986e-01, -2.72434056e-01,
         -6.83001801e-02, -9.96095240e-02, -1.26970783e-01,
          7.12318197e-02, -1.94747880e-01, -2.19760448e-01,
          1.58407703e-01, -2.05468416e-01,  1.73184738e-01,
          3.02557703e-02, -3.75555293e-03,  1.28477858e-03,
          1.15694858e-01, -5.42933568e-02, -1.26794139e-02,
          1.94115683e-01,  1.01883449e-01,  8.05953890e-02,
          1.72015861e

In [43]:
actions = {}
for layer in range(3):
    actions[str(layer)] = {}

for k in outcomes_universe(2):
    for layer in range(3):
        actions[str(layer)][str(k[:layer])] = []

In [84]:
prob_2L(tree.random_tree(),at)

0.4354973512141521

In [None]:
actor(np.array())

In [100]:

class Critic(tf.keras.Model):
    #input_dim: 1 if layer=0, 3 if layer= 2, for the Kennedy receiver ##
    def __init__(self, valreg=0.01, seed_val=0.3, pad_value=-7., dolinar_layers=2):
        '''
        dolinar_layers= number of photodetections
        pad_value: value not considered by the lstm
        valreg: regularisation value
        seed_val: interval of random parameter inizialitaion.
        '''
        super(Critic,self).__init__()

        self.pad_value = pad_value
        self.dolinar_layers = dolinar_layers
        self.mask = tf.keras.layers.Masking(mask_value=pad_value,
                                  input_shape=(3, 2)) #(beta1, pad), (n1, beta2), (n2, guess). In general i will have (layer+1)
        self.lstm = tf.keras.layers.LSTM(500, return_sequences=True)

        self.l1 = Dense(250,kernel_initializer=tf.random_uniform_initializer(minval=-seed_val, maxval=seed_val),
        bias_initializer = tf.random_uniform_initializer(minval=-seed_val, maxval=seed_val),
        kernel_regularizer=tf.keras.regularizers.l1(valreg),
    activity_regularizer=tf.keras.regularizers.l2(valreg))

        self.l2 = Dense(100, kernel_regularizer=tf.keras.regularizers.l1(valreg),
    activity_regularizer=tf.keras.regularizers.l2(valreg),
    kernel_initializer=tf.random_uniform_initializer(minval=-seed_val, maxval=seed_val),
    bias_initializer = tf.random_uniform_initializer(minval=-seed_val, maxval=seed_val))

        self.l3 = Dense(100, kernel_regularizer=tf.keras.regularizers.l1(valreg),
    activity_regularizer=tf.keras.regularizers.l2(valreg),
    kernel_initializer=tf.random_uniform_initializer(minval=-seed_val, maxval=seed_val),
    bias_initializer = tf.random_uniform_initializer(minval=-seed_val, maxval=seed_val))

        self.l4 = Dense(1, kernel_regularizer=tf.keras.regularizers.l1(valreg),
    activity_regularizer=tf.keras.regularizers.l2(valreg),
    kernel_initializer=tf.random_uniform_initializer(minval=-seed_val, maxval=seed_val),
    bias_initializer = tf.random_uniform_initializer(minval=-seed_val, maxval=seed_val))



    def update_target_parameters(self,primary_net, tau=0.01):
        #### only
        # for i,j in zip(self.get_weights(), primary_net.get_weights()):
        #     tf.assign(i, tau*j + (i-tau)*i )
        prim_weights = primary_net.get_weights()
        targ_weights = self.get_weights()
        weights = []
        for i in tf.range(len(prim_weights)):
            weights.append(tau * prim_weights[i] + (1 - tau) * targ_weights[i])
        self.set_weights(weights)
        return

    def call(self, inputs):
        feat = self.mask(inputs)
        feat= self.lstm(feat)
        # feat = tf.nn.dropout(feat, rate=0.01)
        feat = tf.nn.relu(self.l1(feat))
        # feat = tf.nn.dropout(feat, rate=0.01)
        feat = tf.nn.relu(self.l2(feat))
        feat = tf.nn.relu(self.l3(feat))
        feat = tf.nn.sigmoid(self.l4(feat))
        return feat


    def process_sequence(self,sample_buffer):
        """" 
        sample_buffer: array of shape (N,2*self.layers +1), N>1
        
        gets data obtained from N experiments: data.shape = (N, 2L+1),
        where +1 accounts for the guess and 2L for (beta, outcome).

        [[a0, o1, a1, o2, a2, o3, a4]
         [same but other experiment]
        ]

        and returns an array of shape (experiments, self.layers, 2 ), as accepted by an RNN
        """
        batch_size = sample_buffer.shape[0]
        data = sample_buffer[:,0:(self.dolinar_layers+1+1)]
        padded_data = np.ones((batch_size,self.dolinar_layers+1, 2))*self.pad_value
        padded_data[:,0][:,0] = data[:,0]
        for k in range(1,LAYERS+1):
            padded_data[:,k] = data[:,[k,k+1]]

        rewards_obtained = np.zeros((batch_size, self.dolinar_layers+1))
        rewards_obtained[:,-1] = sample_buffer[:,-1]
        return padded_data, rewards_obtained


    def pad_single_sequence(self, seq, LAYERS=1):
        """"
        input: [a0, o1, a1, o2, a2, o3, a4]

        output: [[a0, pad], [o1, a1], [...]]

        the cool thing is that then you can put this to predict the greedy guess/action.
        """
        padded_data = np.ones((1,LAYERS+1, 2))*self.pad_value
        padded_data[0][0][0] = seq[0]
        #padded_data[0][0] = data[0]
        for k in range(1,LAYERS+1):
            padded_data[0][k] = seq[k:(k+2)]
        return padded_data

    def give_td_error_Kennedy_guess(self,batched_input,sequential_rews_with_zeros):
        # this function takes as input the actions as given by the target actor (but the first one!)
        #and outpus the correspoindg TD-errors for DDPG! To obtain them from sample of buffer
        #you call the method targeted_sequence from the actor_target and then the process_sequence
        #of this critic network.
        if self.nature != "target":
            raise AttributeError("I'm not the target!")
            return
        b = batched_input.copy()
        ll = sequential_rews_with_zeros.copy()
        for k in range(0,self.dolinar_layers-1):
            print(k)
            ll[:,k] = np.squeeze(self(b))[:,k+1] + ll[:,k]

        preds1 = self(b)
        b[:,-1][:,-1] = -b[:,1][:,1]
        preds2 = self(b)
        both = tf.concat([preds1,preds2],2)
        maxs = np.squeeze(tf.math.reduce_max(both,axis=2).numpy())
        ll[:,-2] = maxs[:,1] # This is the last befre the guess.. so the label is max_g Q(h-L, g)
        ll = np.expand_dims(ll,axis=1)
        return ll


    def give_favourite_guess(self,sequence_with_plus):
        """"
            important !! the 1!
        sequence should be [[beta, pad], [outcome, 1]] """
        pred_1 = self(sequence_with_plus)
        sequence_with_plus[:,1][:,1] = -sequence_with_plus[:,1][:,1]
        pred_2 = self(sequence_with_plus)
        both = tf.concat([pred_1,pred_2],2)
        maxs = np.squeeze(tf.argmax(both,axis=2).numpy())[1]

        guess = (-1)**maxs
        return  guess



In [101]:
critic = Critic()

In [88]:
inps = np.random.randn(1,3,2)

In [102]:
critic(inps)



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



<tf.Tensor: shape=(1, 3, 1), dtype=float32, numpy=
array([[[0.51881504],
        [0.5235726 ],
        [0.5387318 ]]], dtype=float32)>

In [103]:
sample_buffer = np.array([[1,2,3,4,5,6],[7,8,9,10,11,12]])
critic.process_sequence(sample_buffer)

(array([[[ 1., -7.],
         [ 2.,  3.],
         [ 3.,  4.]],
 
        [[ 7., -7.],
         [ 8.,  9.],
         [ 9., 10.]]]),
 array([[ 0.,  0.,  6.],
        [ 0.,  0., 12.]]))

In [104]:
def give_td_error_Kennedy_guess(critic,batched_input,sequential_rews_with_zeros):
    b = batched_input.copy()
    ll = sequential_rews_with_zeros.copy()
    preds1 = critic(b)
    b[:,1][:,1] = -b[:,1][:,1]
    preds2 = critic(b)
    both = tf.concat([preds1,preds2],2)
    maxs = np.squeeze(tf.math.reduce_max(both,axis=2).numpy())
    ll[:,0] = maxs[:,1] + ll[:,0]
    ll = np.expand_dims(ll,axis=1)
    return ll

In [108]:
bbd, rrws = critic.process_sequence(sample_buffer)

In [111]:
critic(bbd)

<tf.Tensor: shape=(2, 3, 1), dtype=float32, numpy=
array([[[0.62175477],
        [0.59714985],
        [0.6186216 ]],

       [[0.53024596],
        [0.75999266],
        [0.8607184 ]]], dtype=float32)>

In [178]:
actor_target = Actor(nature="target")

In [116]:
inps_actor = np.random.randn(1,actor_target.dolinar_layers,1)

In [118]:
outputs_target = actor_target(inps_actor) #(\beta1, \beta2)

In [129]:
actor_target(np.reshape(np.array(2.),(1,1,1)))

<tf.Tensor: shape=(1, 1, 1), dtype=float32, numpy=array([[[0.4731382]]], dtype=float32)>

In [175]:
np.array([[1.,2.,3.,4.,5.], [1.,2.,3.,4.,5.]]).shape

(2, 5)

In [180]:
targeted_sequence = actor_target.targeted_sequence(np.array([[1.,2.,3.,4.,5.], [1.,2.,3.,4.,5.]]))

In [182]:
bbs, rrs = critic.process_sequence(targeted_sequence)

In [220]:
def give_td_error_Kennedy_guess(self,batched_input,sequential_rews_with_zeros):
    # this function takes as input the actions as given by the target actor (but the first one!)
    #and outpus the correspoindg TD-errors for DDPG! 
    if self.nature != "target":
        raise AttributeError("I'm not the target!")
        return
    b = batched_input.copy()
    ll = sequential_rews_with_zeros.copy()
    for k in range(0,self.dolinar_layers-1):
        print(k)
        ll[:,k] = np.squeeze(self(b))[:,k+1] + ll[:,k]
    
    preds1 = self(b)
    b[:,-1][:,-1] = -b[:,1][:,1]
    preds2 = self(b)
    both = tf.concat([preds1,preds2],2)
    maxs = np.squeeze(tf.math.reduce_max(both,axis=2).numpy())
    ll[:,-2] = maxs[:,1] # This is the last befre the guess.. so the label is max_g Q(h-L, g)
    ll = np.expand_dims(ll,axis=1)
    return ll

In [221]:
give_td_error_Kennedy_guess(critic, bbs, rrs)

0


array([[[0.60952461, 0.60952461, 5.        ]],

       [[0.60952461, 0.60952461, 5.        ]]])

In [208]:
critic(bbs)

<tf.Tensor: shape=(2, 3, 1), dtype=float32, numpy=
array([[[0.62175477],
        [0.6095246 ],
        [0.5792907 ]],

       [[0.62175477],
        [0.6095246 ],
        [0.5792907 ]]], dtype=float32)>

In [188]:
bbs[:,-1][:,-1]

array([4., 4.])