In [3]:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.cm as cm
import matplotlib.patches as mpatches

In [4]:
def tf_reset():
    try:
        sess.close()
    except:
        pass
    tf.reset_default_graph()
    return tf.Session()

### build network

In [13]:
def build_mlp(input_placeholder, output_size, scope, n_layers, size, activation=tf.tanh, output_activation=None):
    """
        Builds a feedforward neural network
        
        arguments:
            input_placeholder: placeholder variable for the state (batch_size, input_size)
            output_size: size of the output layer
            scope: variable scope of the network
            n_layers: number of hidden layers
            size: dimension of the hidden layer
            activation: activation of the hidden layers
            output_activation: activation of the ouput layers

        returns:
            output placeholder of the network (the result of a forward pass) 

        Hint: use tf.layers.dense    
    """
    # YOUR CODE HERE
    fwd = input_placeholder
    for _ in range(n_layers):
        fwd = tf.layers.dense(fwd, size, activation=activation)
    output_placeholder = tf.layers.dense(fwd, output_size, activation=output_activation)
    return output_placeholder

In [15]:
sess = tf_reset()
input_ph = tf.placeholder(dtype=tf.float32, shape = [None, 2])
output_ph = build_mlp(input_ph, 3, None, 2, 5, tf.nn.relu, tf.nn.softmax)

sess.run(tf.global_variables_initializer())
output_run = sess.run(output_ph, feed_dict={input_ph: np.array([[2,3], [4,5]])})

In [16]:
print(output_run)

[[0.17747754 0.5960405  0.22648197]
 [0.07801642 0.7966605  0.12532304]]


### matrix broadcasting

In [89]:
sess = tf_reset()
b = tf.constant([[1,1,1], [1,1,1]], dtype = tf.float32)
a = tf.constant([[2],[3]], dtype = tf.float32)
c = b * a
print(sess.run(c))

[[2. 2. 2.]
 [3. 3. 3.]]


### random sampling

In [33]:
sess = tf_reset()
# logits = tf.constant([[0,0,10],[10,0,0]], dtype = tf.float32)
logits = tf.constant([[0,0,10]], dtype = tf.float32)
samples = tf.random.categorical(logits, 1)
# samples = tf.reshape(samples, [1,-1])
print(sess.run(samples))

[[2]]


In [28]:
import tensorflow_probability as tfp
labels = tf.constant([0, 0], dtype = tf.int32)
dist = tfp.distributions.Categorical(logits)
print(sess.run(dist.log_prob(labels)))
print(sess.run(dist.prob(labels)))

[-1.0000091e+01 -9.0833353e-05]
[4.5395816e-05 9.9990916e-01]


In [117]:
1.0 / (np.exp(10)+2)

4.539580782951091e-05

In [57]:
sess = tf_reset()
mu = tf.constant([[0, 0, 0], [1 ,2 ,3 ]], dtype = tf.float32)
std = tf.constant([.1, .1, 100], dtype = tf.float32)
samples = tf.random.normal(tf.shape(mu))
samples = mu + std * samples
print(sess.run(samples))

[[-1.0513288e-01 -1.9531267e-02  6.2492096e+01]
 [ 9.6234667e-01  1.8024181e+00  2.0686584e+01]]


In [62]:
import tensorflow_probability as tfp
dist = tfp.distributions.MultivariateNormalDiag(loc = mu, scale_diag = std)
log_prob = dist.log_prob(mu)
print(sess.run(log_prob))
mult = log_prob * tf.constant([1,2], dtype = tf.float32)
print(sess.run(mult))
mult1 = tf.multiply(log_prob, tf.constant([1,2], dtype = tf.float32))
print(sess.run(mult1))

[-2.7568154 -2.7568154]
[-2.7568154 -5.513631 ]
[-2.7568154 -5.513631 ]


In [55]:
sess = tf_reset()
sy_logstd = tf.get_variable(name='log_std', shape=[5],
                initializer=tf.constant_initializer(1.0))
sess.run(tf.global_variables_initializer())
print(sess.run(sy_logstd))

[1. 1. 1. 1. 1.]


In [52]:
dist = tfp.distributions.Normal(0, 1)
print(sess.run(dist.log_prob(0))*3)

-2.756815552711487


### reshape

In [65]:
xx = tf.reshape(tf.constant([[3]], dtype=tf.float32), [-1])
print(sess.run(xx))

[3.]


### numpy

In [176]:
from itertools import accumulate
import operator
ar = np.array([2,3,4, 5])
list(accumulate(ar[::-1], lambda bal, x: bal * 0.5 + x))[::-1]

[5.125, 6.25, 6.5, 5]

In [166]:
np.array(list(accumulate(np.ones(10), lambda res, _: res * 0.5)))

array([1.        , 0.5       , 0.25      , 0.125     , 0.0625    ,
       0.03125   , 0.015625  , 0.0078125 , 0.00390625, 0.00195312])

In [175]:
from functools import reduce
reduce(lambda bal, x: bal * 0.5 + x, ar[::-1])

5.125

### Open AI gym

In [192]:
import gym
env = gym.make('CartPole-v0')
env.reset()
for _ in range(1000):
    env.render()
    env.step(env.action_space.sample()) # take a random action
env.close()

  result = entry_point.load(False)


[33mWARN: gym.spaces.Box autodetected dtype as <class 'numpy.float32'>. Please provide explicit dtype.[0m
[33mWARN: You are calling 'step()' even though this environment has already returned done = True. You should always call 'reset()' once you receive 'done = True' -- any further steps are undefined behavior.[0m


KeyboardInterrupt: 

In [69]:
import gym
env = gym.make('CartPole-v0')
print(env.action_space.sample())
print(env.reset())

[33mWARN: gym.spaces.Box autodetected dtype as <class 'numpy.float32'>. Please provide explicit dtype.[0m
1
[-0.03064322  0.03213029 -0.00987202 -0.03919015]


In [72]:
env.step(1)[0]

array([-0.01699948,  0.61802413, -0.02997516, -0.92909315])

In [71]:
env.step(1)

(array([-0.02545277,  0.42266437, -0.01735525, -0.63099546]), 1.0, False, {})

In [73]:
env = gym.make('LunarLanderContinuous-v2')

[33mWARN: gym.spaces.Box autodetected dtype as <class 'numpy.float32'>. Please provide explicit dtype.[0m
[33mWARN: gym.spaces.Box autodetected dtype as <class 'numpy.float32'>. Please provide explicit dtype.[0m


In [79]:
env.action_space.sample()

array([0.05778984, 0.13608912], dtype=float32)

In [80]:
env.action_space.contains(np.array([ 1.3631266 , -0.2566176]))

False

In [85]:
env.action_space.contains(np.array([0.9, -0.9]))

True

In [82]:
env.action_space

Box(2,)