## Debugging / development of GAE
I used this script to help me debug GAE, loss function, and other issues I was having with `ac_implementation2b_gae`!
Last updated: 8/18/2020

In [1]:
import tensorflow as tf

### Fixing concat shapes

In [51]:
a = tf.convert_to_tensor([[0.444]])
b = tf.convert_to_tensor([1.0, 2.0, 3.0])

In [53]:
a2 = tf.reshape(a, [1])

In [58]:
tf.concat([b, a2], axis=0)

<tf.Tensor: shape=(4,), dtype=float32, numpy=array([1.   , 2.   , 3.   , 0.444], dtype=float32)>

In [69]:
a3 = tf.convert_to_tensor([[0.1], [0.2], [0.4]])
a3.shape

TensorShape([3, 1])

### Fixing GAE / Advantage shapes

In [41]:
critic_values = tf.convert_to_tensor([[1.], [2.], [3.]])
rewards = [1, 2]

In [42]:
deltas = rewards + 0.99 * tf.squeeze(critic_values)[1:] - tf.squeeze(critic_values)[:-1]
deltas

<tf.Tensor: shape=(2,), dtype=float32, numpy=array([1.98     , 2.9700003], dtype=float32)>

### Making sure GAE is calculating correctly

In [43]:
import scipy.signal
def discount_cumsum(x, discount):
    # source from OpenAI spinning up & rllab
    return scipy.signal.lfilter([1], [1, float(-discount)], x[::-1], axis=0)[::-1]

In [44]:
discount_cumsum(deltas, 0.99 * 0.95)

array([4.77328527, 2.97000027])

In [84]:
discount_cumsum(rewards, 0.99)

array([2.98, 2.  ])

### Create GAE manually

In [128]:
deltas = tf.convert_to_tensor([5544, 556, -204.00, 11, -.01,1, 0], dtype=tf.float32)

deltas

In [130]:
discount_cumsum(deltas, 0.99)

array([ 5.90611427e+03,  3.65771993e+02, -1.92149502e+02,  1.19702000e+01,
        9.80000000e-01,  1.00000000e+00,  0.00000000e+00])

In [131]:
advantages = []
total_d = 0
for d in deltas[::-1]:
    total_d = d + 0.99  * total_d
    advantages.append(total_d)

In [134]:
advantages.reverse()
advantages = tf.convert_to_tensor(advantages)
advantages

<tf.Tensor: shape=(7,), dtype=float32, numpy=
array([ 0.0000000e+00,  1.0000000e+00,  9.8000002e-01,  1.1970200e+01,
       -1.9214951e+02,  3.6577197e+02,  5.9061143e+03], dtype=float32)>

### Debugging actor loss

In [10]:
A = [1.1, 2.2, 3.3]
action_prob_trajectory = tf.convert_to_tensor([0.11, 0.22, 0.33])

In [70]:
tf.math.log(action_prob_trajectory)

<tf.Tensor: shape=(3,), dtype=float32, numpy=array([-2.207275 , -1.5141277, -1.1086626], dtype=float32)>

### Debugging total loss

In [17]:
a_loss = tf.convert_to_tensor([0.11, 0.22, 0.33])
b_loss = tf.convert_to_tensor([0.11, 0.22, 0.33])

tf.reduce_mean(a_loss + 0.5 * b_loss)

<tf.Tensor: shape=(), dtype=float32, numpy=0.33>

In [21]:
tf.expand_dims([3.50], 0)

<tf.Tensor: shape=(1, 1), dtype=float32, numpy=array([[3.5]], dtype=float32)>