In [1]:
import numpy as np
import basics
import misc
import tensorflow as tf
from tensorflow.keras.layers import Dense
import random

basic = basics.Basics()
basic.define_actions()
actions = basic.actions
ats = misc.make_attenuations(layers=2)

class Memory():
    def __init__(self, max_memory):
        self._max_memory = max_memory
        self._samples = []
    def add_sample(self, sample):
        self._samples.append(sample)
        if len(self._samples) > self._max_memory:
            self._samples.pop(0)
    def sample(self, no_samples):
        if no_samples > len(self._samples):
            return random.sample(self._samples, len(self._samples))
        else:
            return random.sample(self._samples, no_samples)
    @property
    def num_samples(self):
        return len(self._samples)



cardinality_betas = len(basic.actions[0])

class QN_l1(tf.keras.Model):
    def __init__(self):
        super(QN_l1,self).__init__()
        self.l1 = Dense(30, input_shape=(0,), kernel_initializer='random_uniform',
                bias_initializer='random_uniform')
        self.l2 = Dense(35, kernel_initializer='random_uniform',
                bias_initializer='random_uniform')

        # self.l21 = Dense(90, kernel_initializer='random_uniform',
        #         bias_initializer='random_uniform')
        self.l3 = Dense(cardinality_betas, kernel_initializer='random_uniform',
                bias_initializer='random_uniform')

    def call(self, input):
        feat = tf.nn.relu(self.l1(input))
        feat = tf.nn.relu(self.l2(feat))
        # feat = tf.nn.relu(self.l21(feat))
        value = self.l3(feat)
        return value



class QN_l2(tf.keras.Model):
    def __init__(self):
        super(QN_l2,self).__init__()
        self.l1 = Dense(30, input_shape=(1,2), kernel_initializer='random_uniform',
                bias_initializer='random_uniform')
        self.l2 = Dense(35, kernel_initializer='random_uniform',
                bias_initializer='random_uniform')

        # self.l21 = Dense(90, kernel_initializer='random_uniform',
        #         bias_initializer='random_uniform')
        self.l3 = Dense(cardinality_betas, kernel_initializer='random_uniform',
                bias_initializer='random_uniform')

    def call(self, input):
        feat = tf.nn.relu(self.l1(input))
        feat = tf.nn.relu(self.l2(feat))
        # feat = tf.nn.relu(self.l21(feat))
        value = self.l3(feat)
        return value

class QN_guess(tf.keras.Model):
    def __init__(self):
        super(QN_guess,self).__init__()
        self.l1 = Dense(30, input_shape=(1,4), kernel_initializer='random_uniform',
                bias_initializer='random_uniform')
        self.l2 = Dense(35, kernel_initializer='random_uniform',
                bias_initializer='random_uniform')

        # self.l21 = Dense(90, kernel_initializer='random_uniform',
        #         bias_initializer='random_uniform')
        self.l3 = Dense(2, kernel_initializer='random_uniform',
                bias_initializer='random_uniform')

    def call(self, input):
        feat = tf.nn.relu(self.l1(input))
        feat = tf.nn.relu(self.l2(feat))
        # feat = tf.nn.relu(self.l21(feat))
        value = self.l3(feat)
        return value


#### define the networks #####

qn_l1_prim = QN_l1()
qn_l1_targ = QN_l1()

qn_l2_prim = QN_l2()
qn_l2_targ = QN_l2()

qn_guess_prim = QN_guess()
qn_guess_targ = QN_guess()



In [6]:

def give_first_beta(epsilon):
    if np.random.random() < epsilon:
        label = np.random.choice(np.arange(len(basic.actions[0])))
        return label, basic.actions[0][label]
    else:
        input = np.expand_dims(np.array([]), axis=0)
        q1s = qn_l1_prim(input)
        q1s = q1s.numpy()
        label = np.argmax(q1s)
        beta1 = basic.actions[0][label]
        return label, beta1

def give_second_beta(new_state, epsilon):
    if np.random.random() < epsilon:
        label = np.random.choice(np.arange(len(basic.actions[1])))
        return label, basic.actions[1][label]
    else:
        input = np.expand_dims(np.array(new_state), axis=0)
        q2s = qn_l2_prim(input)
        q2s = q2s.numpy()
        label = np.argmax(q2s)
        beta2 = basic.actions[1][label]
        return label, beta2


def give_guess(new_state, epsilon):
    if np.random.random() < epsilon:
        guess = np.random.choice(basic.possible_phases,1)[0]
        return guess
    else:
        input = np.expand_dims(np.array(new_state), axis=0)
        qguess = qn_guess_prim(input)
        guess = qguess.numpy()
        label = np.argmax(guess)
        guess = basic.possible_phases[label]
        return guess



In [7]:

buffer = Memory(10**4)

alpha = .56
states_wasted = 10**3

def main():
    for episode in range(states_wasted):
        epsilon = np.exp(-0.001*episode)
        phase = np.random.choice([-1,1],1)[0]
        labelbeta1, beta1 = give_first_beta(epsilon)
        p0 = np.exp(-(beta1-(phase*np.cos(ats[0])*alpha))**2)
        outcome1 = np.random.choice([0,1],1,p=[p0,1-p0])[0]
        new_state = [outcome1, beta1]
        labelbeta2, beta2 = give_second_beta(new_state,epsilon)
        p1 = np.exp(-(beta2-(phase*np.sin(ats[0])*alpha))**2)
        outcome2 = np.random.choice([0,1],1,p=[p1,1-p1])[0]
        new_state = [outcome1, outcome2, beta1, beta2]
        guess = give_guess(new_state,epsilon)
        if guess == phase:
            reward = 1
        else:
            reward = 0
        buffer.add_sample((outcome1, outcome2, beta1, beta2, labelbeta1, labelbeta2, guess, reward))

main()

W1103 12:09:31.059237 140411243505472 base_layer.py:1865] Layer qn_l2 is casting an input tensor from dtype float64 to the layer's dtype of float32, which is new behavior in TensorFlow 2.  The layer has dtype float32 because it's dtype defaults to floatx.


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



In [9]:
batch = buffer.sample(300)
batch

[(0, 0, -2.220446049250313e-16, 0.5999999999999996, 10, 8, 1, 1),
 (1, 1, -0.9, -0.40000000000000013, 1, 3, 1, 1),
 (1, 0, -0.40000000000000013, -0.8, 6, 1, 1, 1),
 (0, 0, -2.220446049250313e-16, 0.7999999999999996, 10, 9, 1, 1),
 (0, 1, -0.20000000000000018, 0.5999999999999996, 8, 8, 1, 0),
 (0, 1, -0.30000000000000016, -1.0, 7, 0, 1, 1),
 (0, 0, -2.220446049250313e-16, -0.40000000000000013, 10, 3, -1, 0),
 (1, 1, -0.9, 0.5999999999999996, 1, 8, 1, 0),
 (0, 0, -1.0, 0.5999999999999996, 0, 8, 1, 0),
 (1, 0, -0.5000000000000001, -0.6000000000000001, 5, 2, 1, 1),
 (0, 0, -0.6000000000000001, -0.40000000000000013, 4, 3, -1, 1),
 (0, 0, -0.20000000000000018, 0.5999999999999996, 8, 8, 1, 1),
 (0, 0, -1.0, 0.19999999999999973, 0, 6, 1, 1),
 (1, 0, -0.9, -0.6000000000000001, 1, 2, 1, 0),
 (0, 1, -0.40000000000000013, 0.5999999999999996, 6, 8, -1, 1),
 (0, 0, -2.220446049250313e-16, 0.5999999999999996, 10, 8, -1, 0),
 (0, 1, -2.220446049250313e-16, -1.0, 10, 0, 1, 1),
 (0, 0, -2.22044604925031

Updates: $$Q_{l=1}[s = \phi, a = \beta_1] <- Q^{target} [ n_1, \beta_1; \tilde{\beta}_2] \\ \text{For the $\beta_1$ selected, otherwise no update (not a detail! see later that the update for those betas is exactly the same, so the loss is only the

line above for the single beta selected at each episode in the batch...}$$ where $$ \tilde{\beta}_2 = \underset{\beta_2}{\text{argmax}} Q_{l=2}[ s=(n_1, \beta_1), a = \beta_2 ] $$

In [122]:
states_l2 = np.array([[ v[0], v[2]] for v in batch ] )
labels_1 = np.array([v[5] for v in batch])

layer1p1_normal = qn_l2_prim(np.expand_dims(states_l2, axis=0))
layer1p1_normal = np.squeeze(layer1p1_normal.numpy())

In [123]:
optimals_beta2_prim_labels = np.argmax(layer1p1_normal,axis=1)
optimals_beta2_prim_labels

array([9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
       9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
       9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
       9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
       9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
       9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
       9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
       9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
       9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
       9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
       9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
       9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
       9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
       9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9])

In [138]:
target_qlp1 = qn_l1_targ(np.expand_dims(np.array([[] for i in range(len(batch))]), axis=0))
target_qlp1 = np.squeeze(target_qlp1, axis=0)
targs = target_qlp1.copy()

Now i check that only update the ones that experience ...


In [139]:
targs[np.arange(300), labels_1] = np.squeeze(qn_l2_targ(np.expand_dims(states_l2, axis=0)).numpy())[np.arange(300),optimals_beta2_prim_labels]

In [145]:
target_qlp1 = qn_l1_targ(np.expand_dims(np.array([[] for i in range(len(batch))]), axis=0))
target_qlp1 = np.squeeze(target_qlp1, axis=0)
dif = targs - target_qlp1

In [146]:
dif[0]

array([0.        , 0.        , 0.        , 0.        , 0.00307485,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        ], dtype=float32)