# Softmax

In [3]:
import numpy as np
from collections import deque

In [4]:
# if using the softmax action selection policy
# as found on https://nolanbconaway.github.io/blog/2017/softmax-numpy
def softmax(X, theta=1.0, axis=None):
    """
    Compute the softmax of each element along an axis of X.

    Parameters
    ----------
    X: ND-Array. Probably should be floats.
    theta (optional): float parameter, used as a multiplier
        prior to exponentiation. Default = 1.0
    axis (optional): axis to compute values along. Default is the
        first non-singleton axis.

    Returns an array the same size as X. The result will sum to 1
    along the specified axis.
    """

    # make X at least 2d
    y = np.atleast_2d(X)

    # find axis
    if axis is None:
        axis = next(j[0] for j in enumerate(y.shape) if j[1] > 1)

    # multiply y against the theta parameter,
    y = y * float(theta)

    # subtract the max for numerical stability
    y = y - np.expand_dims(np.max(y, axis=axis), axis)

    # exponentiate y
    y = np.exp(y)

    # take the sum along the specified axis
    ax_sum = np.expand_dims(np.sum(y, axis=axis), axis)

    # finally: divide elementwise
    p = y / ax_sum

    # flatten if X was 1D
    if len(X.shape) == 1: p = p.flatten()

    return p


In [5]:
list(map(lambda x: round(x, 4), softmax(X=np.array([-10, -5, 0, 5, 10]))))

[0.0, 0.0, 0.0, 0.0067000000000000002, 0.99329999999999996]

In [6]:
list(map(lambda x: round(x, 30), softmax(X=np.array([-10, -5, 0, 5, 10]), theta=10.0)))

[0.0, 0.0, 0.0, 1.9287498500000003e-22, 1.0]

In [7]:
list(map(lambda x: round(x, 4), softmax(X=np.array([-10, -5, 0, 5, 10]), theta=0.1)))

[0.058000000000000003,
 0.095600000000000004,
 0.15770000000000001,
 0.26000000000000001,
 0.42870000000000003]

## Softmax in action

In [8]:
# Q-probabilities
Q_probs = list(map(lambda x: round(x, 4), softmax(X=np.array([-10, -5, 0, 5, 10]), theta=0.1)))
Q_probs

[0.058000000000000003,
 0.095600000000000004,
 0.15770000000000001,
 0.26000000000000001,
 0.42870000000000003]

In [9]:
action_value = np.random.choice(Q_probs, p=Q_probs)
action_value

0.26000000000000001

In [10]:
action = np.argmax(Q_probs == action_value)
action

3

In [11]:
Q_probs == action_value

array([False, False, False,  True, False], dtype=bool)

In [13]:
lst = deque(maxlen=4)

In [15]:
lst.append(0)
lst.append(1)
lst.append(2)
lst.append(3)
lst

deque([0, 1, 2, 3])

In [16]:
lst.append(4)
lst

deque([1, 2, 3, 4])

# Generators

In [27]:
def gen():
    print('first_honour')
    for i in range(3):
        yield i
        print('second_honour')
    print('last_honour')
        
generate = gen()
next(generate)

first_honour


0

In [28]:
next(generate)

second_honour


1

In [29]:
next(generate)

second_honour


2

In [30]:
next(generate)

second_honour
last_honour


StopIteration: 