In [26]:
import numpy as np
import basics
import misc
import tensorflow as tf
from tensorflow.keras.layers import Dense
import random

basic = basics.Basics(resolution=.25)
basic.define_actions()
actions = basic.actions
ats = misc.make_attenuations(layers=2)

class Memory():
    def __init__(self, max_memory):
        self._max_memory = max_memory
        self._samples = []
    def add_sample(self, sample):
        self._samples.append(sample)
        if len(self._samples) > self._max_memory:
            self._samples.pop(0)
    def sample(self, no_samples):
        if no_samples > len(self._samples):
            return random.sample(self._samples, len(self._samples))
        else:
            return random.sample(self._samples, no_samples)
    @property
    def num_samples(self):
        return len(self._samples)



cardinality_betas = len(basic.actions[0])

class QN_l1(tf.keras.Model):
    def __init__(self):
        super(QN_l1,self).__init__()
        self.l1 = Dense(30, input_shape=(0,), kernel_initializer=tf.keras.initializers.RandomNormal(mean=0.0, stddev=1.0, seed=None),
                bias_initializer=tf.keras.initializers.RandomNormal(mean=0.0, stddev=1.0, seed=None))
        self.l2 = Dense(35, kernel_initializer=tf.keras.initializers.RandomNormal(mean=0.0, stddev=1.0, seed=None),
                bias_initializer=tf.keras.initializers.RandomNormal(mean=0.0, stddev=1.0, seed=None))

        # self.l21 = Dense(90, kernel_initializer='random_uniform',
        #         bias_initializer='random_uniform')
        self.l3 = Dense(cardinality_betas, kernel_initializer='random_uniform',
                bias_initializer='random_uniform')

    def call(self, input):
        feat = tf.nn.relu(self.l1(input))
        feat = tf.nn.relu(self.l2(feat))
        # feat = tf.nn.relu(self.l21(feat))
        value = self.l3(feat)
        return value



class QN_l2(tf.keras.Model):
    def __init__(self):
        super(QN_l2,self).__init__()
        self.l1 = Dense(30, input_shape=(1,2), kernel_initializer=tf.keras.initializers.RandomNormal(mean=0.0, stddev=1.0, seed=None),
                bias_initializer=tf.keras.initializers.RandomNormal(mean=0.0, stddev=1.0, seed=None))
        self.l2 = Dense(35, kernel_initializer=tf.keras.initializers.RandomNormal(mean=0.0, stddev=1.0, seed=None),
                bias_initializer=tf.keras.initializers.RandomNormal(mean=0.0, stddev=1.0, seed=None))

        # self.l21 = Dense(90, kernel_initializer='random_uniform',
        #         bias_initializer='random_uniform')
        self.l3 = Dense(cardinality_betas, kernel_initializer='random_uniform',
                bias_initializer='random_uniform')

    def call(self, input):
        feat = tf.nn.relu(self.l1(input))
        feat = tf.nn.relu(self.l2(feat))
        # feat = tf.nn.relu(self.l21(feat))
        value = self.l3(feat)
        return value

class QN_guess(tf.keras.Model):
    def __init__(self):
        super(QN_guess,self).__init__()
        self.l1 = Dense(30, input_shape=(1,4), kernel_initializer='random_uniform',
                bias_initializer='random_uniform')
        self.l2 = Dense(35, kernel_initializer='random_uniform',
                bias_initializer='random_uniform')

        # self.l21 = Dense(90, kernel_initializer='random_uniform',
        #         bias_initializer='random_uniform')
        self.l3 = Dense(2, kernel_initializer='random_uniform',
                bias_initializer='random_uniform')

    def call(self, input):
        feat = tf.nn.relu(self.l1(input))
        feat = tf.nn.relu(self.l2(feat))
        # feat = tf.nn.relu(self.l21(feat))
        value = self.l3(feat)
        return value


#### define the networks #####

qn_l1_prim = QN_l1()
qn_l1_targ = QN_l1()

qn_l2_prim = QN_l2()
qn_l2_targ = QN_l2()

qn_guess_prim = QN_guess()
qn_guess_targ = QN_guess()



In [21]:

def give_first_beta(epsilon):
    if np.random.random() < epsilon:
        label = np.random.choice(np.arange(len(basic.actions[0])))
        return label, basic.actions[0][label]
    else:
        input = np.expand_dims(np.array([]), axis=0)
        q1s = qn_l1_prim(input)
        q1s = q1s.numpy()
        label = np.argmax(q1s)
        beta1 = basic.actions[0][label]
        return label, beta1

def give_second_beta(new_state, epsilon):
    if np.random.random() < epsilon:
        label = np.random.choice(np.arange(len(basic.actions[1])))
        return label, basic.actions[1][label]
    else:
        input = np.expand_dims(np.array(new_state), axis=0)
        q2s = qn_l2_prim(input)
        q2s = q2s.numpy()
        label = np.argmax(q2s)
        beta2 = basic.actions[1][label]
        return label, beta2


def give_guess(new_state, epsilon):
    if np.random.random() < epsilon:
        guess = np.random.choice(basic.possible_phases,1)[0]
        return guess
    else:
        input = np.expand_dims(np.array(new_state), axis=0)
        qguess = qn_guess_prim(input)
        guess = qguess.numpy()
        label = np.argmax(guess)
        guess = basic.possible_phases[label]
        return guess



In [27]:

buffer = Memory(10**4)

alpha = .56
states_wasted = 10**3

def main():
    for episode in range(states_wasted):
        epsilon = np.exp(-0.001*episode)
        phase = np.random.choice([-1,1],1)[0]
        labelbeta1, beta1 = give_first_beta(epsilon)
        p0 = np.exp(-(beta1-(phase*np.cos(ats[0])*alpha))**2)
        outcome1 = np.random.choice([0,1],1,p=[p0,1-p0])[0]
        new_state = [outcome1, beta1]
        labelbeta2, beta2 = give_second_beta(new_state,epsilon)
        p1 = np.exp(-(beta2-(phase*np.sin(ats[0])*alpha))**2)
        outcome2 = np.random.choice([0,1],1,p=[p1,1-p1])[0]
        new_state = [outcome1, outcome2, beta1, beta2]
        guess = give_guess(new_state,epsilon)
        if guess == phase:
            reward = 1
        else:
            reward = 0
        buffer.add_sample((outcome1, outcome2, beta1, beta2, labelbeta1, labelbeta2, guess, reward))

main()

W1103 12:20:49.810138 140411243505472 base_layer.py:1865] Layer qn_l1_6 is casting an input tensor from dtype float64 to the layer's dtype of float32, which is new behavior in TensorFlow 2.  The layer has dtype float32 because it's dtype defaults to floatx.


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

W1103 12:20:49.838775 140411243505472 base_layer.py:1865] Layer qn_l2_6 is casting an input tensor from dtype float64 to the layer's dtype of float32, which is new behavior in TensorFlow 2.  The layer has dtype float32 because it's dtype defaults to floatx.


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If yo

In [28]:
batch = buffer.sample(300)
batch

[(0, 0, 0.0, -0.5, 4, 1, 1, 0),
 (0, 0, -0.5, 0.5, 2, 3, 1, 0),
 (0, 1, -0.5, 1.0, 2, 4, 1, 1),
 (0, 0, -0.25, 0.5, 3, 3, -1, 0),
 (0, 0, -0.25, 1.0, 3, 4, 1, 0),
 (0, 0, -0.5, 0.5, 2, 3, 1, 0),
 (0, 0, -0.5, 0.5, 2, 3, -1, 1),
 (0, 0, -0.5, 0.5, 2, 3, -1, 0),
 (0, 0, -0.5, 0.0, 2, 2, -1, 1),
 (0, 0, 0.0, -0.5, 4, 1, 1, 0),
 (0, 0, -0.5, 0.5, 2, 3, 1, 0),
 (0, 1, -0.5, -1.0, 2, 0, -1, 0),
 (1, 0, -0.75, 0.0, 1, 2, 1, 1),
 (0, 1, -0.5, 0.5, 2, 3, 1, 0),
 (1, 0, -0.25, -0.5, 3, 1, -1, 0),
 (0, 1, -0.25, 0.0, 3, 2, -1, 1),
 (1, 0, -0.75, 0.5, 1, 3, 1, 1),
 (0, 1, -0.5, 1.0, 2, 4, 1, 0),
 (0, 0, 0.0, -0.5, 4, 1, -1, 1),
 (1, 1, -1.0, 0.5, 0, 3, -1, 1),
 (0, 0, -0.5, 0.5, 2, 3, -1, 1),
 (1, 0, -0.5, 0.5, 2, 3, -1, 0),
 (0, 1, -0.75, -1.0, 1, 0, 1, 1),
 (0, 0, -0.5, -1.0, 2, 0, -1, 1),
 (1, 0, -0.25, 0.5, 3, 3, -1, 0),
 (1, 1, 0.0, 1.0, 4, 4, -1, 1),
 (0, 0, -0.25, 0.5, 3, 3, -1, 1),
 (1, 0, -1.0, 0.0, 0, 2, -1, 0),
 (0, 0, -0.5, 0.5, 2, 3, -1, 0),
 (0, 0, 0.0, 0.5, 4, 3, -1, 1),
 (0, 1, -1.

Updates: $$Q_{l=1}[s = \phi, a = \beta_1] <- Q^{target} [ n_1, \beta_1; \tilde{\beta}_2] \\ \text{For the $\beta_1$ selected, otherwise no update (not a detail! see later that the update for those betas is exactly the same, so the loss is only the

line above for the single beta selected at each episode in the batch...}$$ where $$ \tilde{\beta}_2 = \underset{\beta_2}{\text{argmax}} Q_{l=2}[ s=(n_1, \beta_1), a = \beta_2 ] $$

In [10]:
#buffer.add_sample((outcome1, outcome2, beta1, beta2, labelbeta1, labelbeta2, guess, reward))


In [29]:
states_l2 = np.array([[ v[0], v[2]] for v in batch ] )
labels_beta1 = np.array([v[4] for v in batch])

layer1p1_normal = qn_l2_prim(np.expand_dims(states_l2, axis=0))
layer1p1_normal = np.squeeze(layer1p1_normal.numpy())

In [32]:
layer1p1_normal

array([[ 0.33491045, -0.07160825,  0.76262116,  0.65361667, -0.07509165],
       [ 0.2502162 , -0.4565762 , -0.02420005,  0.6332496 , -0.5558549 ],
       [ 0.2502162 , -0.4565762 , -0.02420005,  0.6332496 , -0.5558549 ],
       ...,
       [ 0.12944512, -0.40295878,  0.83297133,  1.3347999 ,  0.45993352],
       [ 0.30730686, -0.66711074, -0.01373112,  1.2221037 , -0.13811909],
       [ 0.12944512, -0.40295878,  0.83297133,  1.3347999 ,  0.45993352]],
      dtype=float32)

In [33]:
layer1p1_normal.shape

(300, 5)

In [34]:
cardinality_betas

5

In [40]:
lbls=[]
for i in range(300):
    lbls.append(np.argmax(layer1p1_normal[i]))

In [41]:
lbls

[2,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 2,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 2,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 2,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 2,
 3,
 3,
 3,
 2,
 3,
 2,
 3,
 2,
 2,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 2,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 2,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 2,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 2,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 2,
 3,
 3,
 2,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 2,
 3,
 3,
 3,
 3,
 2,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 2,
 2,
 2,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 2,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 2,
 3,
 3,
 3,
 3,
 2,
 2,
 3,
 3,
 3,
 3,
 3,
 3,
 3,


In [35]:
optimals_beta2_prim_labels = np.argmax(layer1p1_normal,axis=1)
optimals_beta2_prim_labels

array([2, 3, 3, 3, 3, 3, 3, 3, 3, 2, 3, 3, 3, 3, 3, 3, 3, 3, 2, 3, 3, 3,
       3, 3, 3, 3, 3, 3, 3, 2, 3, 3, 3, 3, 3, 3, 3, 3, 2, 3, 3, 3, 2, 3,
       2, 3, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 3, 3, 3, 3, 3, 3, 3,
       3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 3,
       3, 3, 3, 3, 3, 3, 3, 3, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
       3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3,
       3, 2, 3, 3, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
       3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
       3, 3, 3, 2, 3, 3, 3, 3, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
       3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 2, 3, 3, 3,
       3, 3, 3, 3, 3, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 3, 3, 3, 3, 2,
       2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 3, 3, 3, 3, 3,
       3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 3, 2, 2, 3, 3, 3, 2, 3, 3, 3,
       3, 3, 3, 3, 3, 3, 3, 2, 2, 3, 3, 3, 3, 3])

In [42]:
optimals_beta2_prim_labels - np.array(lbls)

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])

In [43]:
target_qlp1 = qn_l1_targ(np.expand_dims(np.array([[] for i in range(len(batch))]), axis=0))
target_qlp1 = np.squeeze(target_qlp1, axis=0)
targs = target_qlp1.copy()

W1103 12:23:57.597498 140411243505472 base_layer.py:1865] Layer qn_l1_7 is casting an input tensor from dtype float64 to the layer's dtype of float32, which is new behavior in TensorFlow 2.  The layer has dtype float32 because it's dtype defaults to floatx.


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



Now i check that only update the ones that experience ...


In [45]:
targs[np.arange(300), labels_beta1] = np.squeeze(qn_l2_targ(np.expand_dims(states_l2, axis=0)).numpy())[np.arange(300),optimals_beta2_prim_labels]

In [47]:
target_qlp1 = qn_l1_targ(np.expand_dims(np.array([[] for i in range(len(batch))]), axis=0))
target_qlp1 = np.squeeze(target_qlp1, axis=0)
dif = targs - target_qlp1

In [50]:
for i in range(len(dif)):
    print(dif[i])

[0.        0.        0.        0.        0.6818714]
[0.        0.        0.7985648 0.        0.       ]
[0.        0.        0.7985648 0.        0.       ]
[0.         0.         0.         0.41202188 0.        ]
[0.         0.         0.         0.41202188 0.        ]
[0.        0.        0.7985648 0.        0.       ]
[0.        0.        0.7985648 0.        0.       ]
[0.        0.        0.7985648 0.        0.       ]
[0.        0.        0.7985648 0.        0.       ]
[0.        0.        0.        0.        0.6818714]
[0.        0.        0.7985648 0.        0.       ]
[0.        0.        0.7985648 0.        0.       ]
[0.        1.8967357 0.        0.        0.       ]
[0.        0.        0.7985648 0.        0.       ]
[0.        0.        0.        0.8028864 0.       ]
[0.         0.         0.         0.41202188 0.        ]
[0.        1.8967357 0.        0.        0.       ]
[0.        0.        0.7985648 0.        0.       ]
[0.        0.        0.        0.        0.681871

In [56]:
loss_sum =tf.keras.losses.MSE(targs, target_qlp1)
tf.reduce_mean(loss_sum)

<tf.Tensor: shape=(), dtype=float32, numpy=0.18601634>

In [66]:
optimizer_ql1 = tf.keras.optimizers.Adam(lr=0.001)

with tf.device("/cpu:0"):
    with tf.GradientTape() as tape:
        tape.watch(qn_l1_prim.trainable_variables)
        
        pred_prim = qn_l1_prim(np.expand_dims(np.array([[] for i in range(len(batch))]), axis=0))
        
        loss_sum =tf.keras.losses.MSE(pred_prim, targs)
        loss = tf.reduce_mean(loss_sum)

        grads = tape.gradient(loss, qn_l1_prim.trainable_variables)
        optimizer_ql1.apply_gradients(zip(grads, qn_l1_prim.trainable_variables))