In [1]:
import gym
import pandas as pd
import numpy as np
from gym import envs
import random
import scipy.stats


In [5]:

class LinearSoftmaxAgent(object):
    """Act with softmax policy. Features are encoded as
    phi(s, a) is a 1-hot vector of states."""
    def __init__(self, state_size, action_size):
        self.state_size = state_size
        self.action_size = action_size
        self.states = []
        self.actions = []
        self.probs = []
        self.rewards = []
        self.theta = np.random.random(state_size * action_size)
        self.alpha = .01
        self.gamma = .99

    def store(self, state, action, prob, reward):
        self.states.append(state)
        self.actions.append(action)
        self.probs.append(prob)
        self.rewards.append(reward)

    def _phi(self, s, a):
        encoded = np.zeros([self.action_size, self.state_size])
        encoded[a] = s
        return encoded.flatten()

    def _softmax(self, s, a):
        return np.exp(self.theta.dot(self._phi(s, a)) / 100)

    def pi(self, s):
        """\pi(a | s)"""
        weights = np.empty(self.action_size)
        for a in range(self.action_size):
            weights[a] = self._softmax(s, a)
        return weights / np.sum(weights)

    def act(self, state):
        probs = self.pi(state)
        a = random.choices(range(0, self.action_size), weights=probs)
        a = a[0]
        pi = probs[a]
        return (a, pi)

    def _gradient(self, s, a):
        expected = 0
        probs = self.pi(s)
        for b in range(0, self.action_size):
            expected += probs[b] * self._phi(s, b)
        return self._phi(s, a) - expected

    def _R(self, t):
        """Reward function."""
        total = 0
        for tau in range(t, len(self.rewards)):
            total += self.gamma**(tau - t) * self.rewards[tau]
        return total

    def train(self):
        self.rewards -= np.mean(self.rewards)
        self.rewards /= np.std(self.rewards)
        for t in range(len(self.states)):
            s = self.states[t]
            a = self.actions[t]
            r = self._R(t)
            grad = self._gradient(s, a)
            self.theta = self.theta + self.alpha * r * grad
        # print(self.theta)
        self.states = []
        self.actions = []
        self.probs = []
        self.rewards = []

    def getName(self):
        return 'LinearSoftmaxAgent'

    def save(self):
        pass

In [6]:
import gym

SAVE_FREQUENCY = 10
env = gym.make('CartPole-v1')
state = env.reset()
score = 0
episode = 0
prev_frame = None
state_size = 4
action_size = env.action_space.n
g = LinearSoftmaxAgent(state_size, action_size)


MAX_EPISODES = 10000
while episode < MAX_EPISODES:  # episode loop
    # env.render()
    action, prob = g.act(state)
    state, reward, done, info = env.step(action)  # take a random action
    if done:
        reward = -10
    score += reward
    g.store(state, action, prob, reward)

    if done:
        episode += 1
        g.train()
        print('Episode: {} Score: {}'.format(episode, score))
        score = 0
        state = env.reset()
        if episode % SAVE_FREQUENCY == 0:
            g.save()

Episode: 1 Score: 15.0
Episode: 2 Score: 27.0
Episode: 3 Score: 1.0
Episode: 4 Score: -1.0
Episode: 5 Score: 18.0
Episode: 6 Score: 5.0
Episode: 7 Score: 50.0
Episode: 8 Score: 14.0
Episode: 9 Score: 0.0
Episode: 10 Score: 30.0
Episode: 11 Score: 18.0
Episode: 12 Score: 4.0
Episode: 13 Score: 9.0
Episode: 14 Score: 7.0
Episode: 15 Score: 9.0
Episode: 16 Score: 13.0
Episode: 17 Score: 0.0
Episode: 18 Score: 19.0
Episode: 19 Score: 6.0
Episode: 20 Score: 6.0
Episode: 21 Score: 2.0
Episode: 22 Score: 3.0
Episode: 23 Score: 27.0
Episode: 24 Score: 3.0
Episode: 25 Score: 13.0
Episode: 26 Score: 23.0
Episode: 27 Score: 2.0
Episode: 28 Score: -2.0
Episode: 29 Score: 20.0
Episode: 30 Score: 81.0
Episode: 31 Score: 8.0
Episode: 32 Score: 15.0
Episode: 33 Score: 16.0
Episode: 34 Score: 14.0
Episode: 35 Score: 8.0
Episode: 36 Score: 7.0
Episode: 37 Score: 10.0
Episode: 38 Score: 11.0
Episode: 39 Score: 10.0
Episode: 40 Score: 6.0
Episode: 41 Score: 5.0
Episode: 42 Score: 32.0
Episode: 43 Score: 3

Episode: 370 Score: 88.0
Episode: 371 Score: 19.0
Episode: 372 Score: 79.0
Episode: 373 Score: 9.0
Episode: 374 Score: 8.0
Episode: 375 Score: 28.0
Episode: 376 Score: 39.0
Episode: 377 Score: 53.0
Episode: 378 Score: 51.0
Episode: 379 Score: 39.0
Episode: 380 Score: 11.0
Episode: 381 Score: 20.0
Episode: 382 Score: 81.0
Episode: 383 Score: 24.0
Episode: 384 Score: 23.0
Episode: 385 Score: 21.0
Episode: 386 Score: 42.0
Episode: 387 Score: 45.0
Episode: 388 Score: 35.0
Episode: 389 Score: 30.0
Episode: 390 Score: 19.0
Episode: 391 Score: 36.0
Episode: 392 Score: 55.0
Episode: 393 Score: 45.0
Episode: 394 Score: 57.0
Episode: 395 Score: 16.0
Episode: 396 Score: 83.0
Episode: 397 Score: 17.0
Episode: 398 Score: 14.0
Episode: 399 Score: 26.0
Episode: 400 Score: 18.0
Episode: 401 Score: 32.0
Episode: 402 Score: 9.0
Episode: 403 Score: 33.0
Episode: 404 Score: 4.0
Episode: 405 Score: 12.0
Episode: 406 Score: 34.0
Episode: 407 Score: 60.0
Episode: 408 Score: 12.0
Episode: 409 Score: 6.0
Episo

Episode: 713 Score: 65.0
Episode: 714 Score: 48.0
Episode: 715 Score: 30.0
Episode: 716 Score: 33.0
Episode: 717 Score: 12.0
Episode: 718 Score: 34.0
Episode: 719 Score: 39.0
Episode: 720 Score: 26.0
Episode: 721 Score: 62.0
Episode: 722 Score: 15.0
Episode: 723 Score: 60.0
Episode: 724 Score: 17.0
Episode: 725 Score: 23.0
Episode: 726 Score: 28.0
Episode: 727 Score: 21.0
Episode: 728 Score: 56.0
Episode: 729 Score: 18.0
Episode: 730 Score: 10.0
Episode: 731 Score: 57.0
Episode: 732 Score: 68.0
Episode: 733 Score: 34.0
Episode: 734 Score: 29.0
Episode: 735 Score: 44.0
Episode: 736 Score: 28.0
Episode: 737 Score: 24.0
Episode: 738 Score: 38.0
Episode: 739 Score: 32.0
Episode: 740 Score: 22.0
Episode: 741 Score: 50.0
Episode: 742 Score: 9.0
Episode: 743 Score: 26.0
Episode: 744 Score: 31.0
Episode: 745 Score: 37.0
Episode: 746 Score: 29.0
Episode: 747 Score: 48.0
Episode: 748 Score: 63.0
Episode: 749 Score: 86.0
Episode: 750 Score: 21.0
Episode: 751 Score: 34.0
Episode: 752 Score: 19.0
E

Episode: 1045 Score: 85.0
Episode: 1046 Score: 67.0
Episode: 1047 Score: 55.0
Episode: 1048 Score: 35.0
Episode: 1049 Score: 63.0
Episode: 1050 Score: 190.0
Episode: 1051 Score: 49.0
Episode: 1052 Score: 56.0
Episode: 1053 Score: 23.0
Episode: 1054 Score: 53.0
Episode: 1055 Score: 31.0
Episode: 1056 Score: 47.0
Episode: 1057 Score: 28.0
Episode: 1058 Score: 20.0
Episode: 1059 Score: 81.0
Episode: 1060 Score: 85.0
Episode: 1061 Score: 58.0
Episode: 1062 Score: 54.0
Episode: 1063 Score: 62.0
Episode: 1064 Score: 35.0
Episode: 1065 Score: 52.0
Episode: 1066 Score: 47.0
Episode: 1067 Score: 74.0
Episode: 1068 Score: 40.0
Episode: 1069 Score: 31.0
Episode: 1070 Score: 25.0
Episode: 1071 Score: 67.0
Episode: 1072 Score: 31.0
Episode: 1073 Score: 42.0
Episode: 1074 Score: 26.0
Episode: 1075 Score: 95.0
Episode: 1076 Score: 53.0
Episode: 1077 Score: 62.0
Episode: 1078 Score: 101.0
Episode: 1079 Score: 70.0
Episode: 1080 Score: 41.0
Episode: 1081 Score: 45.0
Episode: 1082 Score: 56.0
Episode: 1

Episode: 1362 Score: 108.0
Episode: 1363 Score: 60.0
Episode: 1364 Score: 32.0
Episode: 1365 Score: 38.0
Episode: 1366 Score: 71.0
Episode: 1367 Score: 26.0
Episode: 1368 Score: 85.0
Episode: 1369 Score: 145.0
Episode: 1370 Score: 49.0
Episode: 1371 Score: 69.0
Episode: 1372 Score: 55.0
Episode: 1373 Score: 33.0
Episode: 1374 Score: 31.0
Episode: 1375 Score: 128.0
Episode: 1376 Score: 49.0
Episode: 1377 Score: 129.0
Episode: 1378 Score: 52.0
Episode: 1379 Score: 52.0
Episode: 1380 Score: 44.0
Episode: 1381 Score: 35.0
Episode: 1382 Score: 47.0
Episode: 1383 Score: 99.0
Episode: 1384 Score: 23.0
Episode: 1385 Score: 38.0
Episode: 1386 Score: 44.0
Episode: 1387 Score: 80.0
Episode: 1388 Score: 48.0
Episode: 1389 Score: 33.0
Episode: 1390 Score: 49.0
Episode: 1391 Score: 36.0
Episode: 1392 Score: 46.0
Episode: 1393 Score: 59.0
Episode: 1394 Score: 107.0
Episode: 1395 Score: 42.0
Episode: 1396 Score: 29.0
Episode: 1397 Score: 74.0
Episode: 1398 Score: 48.0
Episode: 1399 Score: 42.0
Episode

Episode: 1686 Score: 55.0
Episode: 1687 Score: 70.0
Episode: 1688 Score: 38.0
Episode: 1689 Score: 56.0
Episode: 1690 Score: 111.0
Episode: 1691 Score: 115.0
Episode: 1692 Score: 49.0
Episode: 1693 Score: 60.0
Episode: 1694 Score: 41.0
Episode: 1695 Score: 52.0
Episode: 1696 Score: 71.0
Episode: 1697 Score: 75.0
Episode: 1698 Score: 68.0
Episode: 1699 Score: 49.0
Episode: 1700 Score: 62.0
Episode: 1701 Score: 72.0
Episode: 1702 Score: 75.0
Episode: 1703 Score: 49.0
Episode: 1704 Score: 63.0
Episode: 1705 Score: 40.0
Episode: 1706 Score: 37.0
Episode: 1707 Score: 34.0
Episode: 1708 Score: 89.0
Episode: 1709 Score: 34.0
Episode: 1710 Score: 45.0
Episode: 1711 Score: 79.0
Episode: 1712 Score: 39.0
Episode: 1713 Score: 121.0
Episode: 1714 Score: 37.0
Episode: 1715 Score: 69.0
Episode: 1716 Score: 39.0
Episode: 1717 Score: 37.0
Episode: 1718 Score: 54.0
Episode: 1719 Score: 48.0
Episode: 1720 Score: 57.0
Episode: 1721 Score: 71.0
Episode: 1722 Score: 58.0
Episode: 1723 Score: 47.0
Episode: 

Episode: 2013 Score: 52.0
Episode: 2014 Score: 49.0
Episode: 2015 Score: 65.0
Episode: 2016 Score: 50.0
Episode: 2017 Score: 61.0
Episode: 2018 Score: 58.0
Episode: 2019 Score: 68.0
Episode: 2020 Score: 74.0
Episode: 2021 Score: 239.0
Episode: 2022 Score: 79.0
Episode: 2023 Score: 37.0
Episode: 2024 Score: 52.0
Episode: 2025 Score: 67.0
Episode: 2026 Score: 55.0
Episode: 2027 Score: 69.0
Episode: 2028 Score: 47.0
Episode: 2029 Score: 39.0
Episode: 2030 Score: 34.0
Episode: 2031 Score: 75.0
Episode: 2032 Score: 41.0
Episode: 2033 Score: 41.0
Episode: 2034 Score: 73.0
Episode: 2035 Score: 134.0
Episode: 2036 Score: 24.0
Episode: 2037 Score: 66.0
Episode: 2038 Score: 52.0
Episode: 2039 Score: 84.0
Episode: 2040 Score: 45.0
Episode: 2041 Score: 59.0
Episode: 2042 Score: 49.0
Episode: 2043 Score: 87.0
Episode: 2044 Score: 135.0
Episode: 2045 Score: 47.0
Episode: 2046 Score: 57.0
Episode: 2047 Score: 80.0
Episode: 2048 Score: 55.0
Episode: 2049 Score: 65.0
Episode: 2050 Score: 96.0
Episode: 

Episode: 2348 Score: 99.0
Episode: 2349 Score: 63.0
Episode: 2350 Score: 44.0
Episode: 2351 Score: 106.0
Episode: 2352 Score: 58.0
Episode: 2353 Score: 38.0
Episode: 2354 Score: 110.0
Episode: 2355 Score: 77.0
Episode: 2356 Score: 56.0
Episode: 2357 Score: 81.0
Episode: 2358 Score: 39.0
Episode: 2359 Score: 69.0
Episode: 2360 Score: 48.0
Episode: 2361 Score: 56.0
Episode: 2362 Score: 77.0
Episode: 2363 Score: 29.0
Episode: 2364 Score: 72.0
Episode: 2365 Score: 57.0
Episode: 2366 Score: 38.0
Episode: 2367 Score: 74.0
Episode: 2368 Score: 35.0
Episode: 2369 Score: 52.0
Episode: 2370 Score: 70.0
Episode: 2371 Score: 46.0
Episode: 2372 Score: 51.0
Episode: 2373 Score: 40.0
Episode: 2374 Score: 60.0
Episode: 2375 Score: 88.0
Episode: 2376 Score: 77.0
Episode: 2377 Score: 61.0
Episode: 2378 Score: 104.0
Episode: 2379 Score: 42.0
Episode: 2380 Score: 90.0
Episode: 2381 Score: 30.0
Episode: 2382 Score: 51.0
Episode: 2383 Score: 102.0
Episode: 2384 Score: 55.0
Episode: 2385 Score: 63.0
Episode:

Episode: 2682 Score: 39.0
Episode: 2683 Score: 41.0
Episode: 2684 Score: 113.0
Episode: 2685 Score: 47.0
Episode: 2686 Score: 41.0
Episode: 2687 Score: 35.0
Episode: 2688 Score: 101.0
Episode: 2689 Score: 76.0
Episode: 2690 Score: 35.0
Episode: 2691 Score: 75.0
Episode: 2692 Score: 38.0
Episode: 2693 Score: 32.0
Episode: 2694 Score: 207.0
Episode: 2695 Score: 50.0
Episode: 2696 Score: 43.0
Episode: 2697 Score: 95.0
Episode: 2698 Score: 33.0
Episode: 2699 Score: 59.0
Episode: 2700 Score: 64.0
Episode: 2701 Score: 42.0
Episode: 2702 Score: 71.0
Episode: 2703 Score: 93.0
Episode: 2704 Score: 51.0
Episode: 2705 Score: 72.0
Episode: 2706 Score: 94.0
Episode: 2707 Score: 59.0
Episode: 2708 Score: 36.0
Episode: 2709 Score: 47.0
Episode: 2710 Score: 91.0
Episode: 2711 Score: 85.0
Episode: 2712 Score: 42.0
Episode: 2713 Score: 80.0
Episode: 2714 Score: 91.0
Episode: 2715 Score: 57.0
Episode: 2716 Score: 78.0
Episode: 2717 Score: 45.0
Episode: 2718 Score: 41.0
Episode: 2719 Score: 34.0
Episode: 

Episode: 3003 Score: 87.0
Episode: 3004 Score: 55.0
Episode: 3005 Score: 68.0
Episode: 3006 Score: 186.0
Episode: 3007 Score: 42.0
Episode: 3008 Score: 62.0
Episode: 3009 Score: 103.0
Episode: 3010 Score: 68.0
Episode: 3011 Score: 46.0
Episode: 3012 Score: 25.0
Episode: 3013 Score: 43.0
Episode: 3014 Score: 49.0
Episode: 3015 Score: 40.0
Episode: 3016 Score: 36.0
Episode: 3017 Score: 55.0
Episode: 3018 Score: 117.0
Episode: 3019 Score: 68.0
Episode: 3020 Score: 44.0
Episode: 3021 Score: 54.0
Episode: 3022 Score: 45.0
Episode: 3023 Score: 69.0
Episode: 3024 Score: 70.0
Episode: 3025 Score: 56.0
Episode: 3026 Score: 47.0
Episode: 3027 Score: 51.0
Episode: 3028 Score: 56.0
Episode: 3029 Score: 85.0
Episode: 3030 Score: 43.0
Episode: 3031 Score: 91.0
Episode: 3032 Score: 64.0
Episode: 3033 Score: 69.0
Episode: 3034 Score: 75.0
Episode: 3035 Score: 47.0
Episode: 3036 Score: 46.0
Episode: 3037 Score: 85.0
Episode: 3038 Score: 66.0
Episode: 3039 Score: 32.0
Episode: 3040 Score: 37.0
Episode: 

Episode: 3336 Score: 97.0
Episode: 3337 Score: 81.0
Episode: 3338 Score: 59.0
Episode: 3339 Score: 59.0
Episode: 3340 Score: 60.0
Episode: 3341 Score: 55.0
Episode: 3342 Score: 35.0
Episode: 3343 Score: 107.0
Episode: 3344 Score: 29.0
Episode: 3345 Score: 96.0
Episode: 3346 Score: 106.0
Episode: 3347 Score: 119.0
Episode: 3348 Score: 74.0
Episode: 3349 Score: 110.0
Episode: 3350 Score: 54.0
Episode: 3351 Score: 53.0
Episode: 3352 Score: 58.0
Episode: 3353 Score: 38.0
Episode: 3354 Score: 41.0
Episode: 3355 Score: 43.0
Episode: 3356 Score: 91.0
Episode: 3357 Score: 64.0
Episode: 3358 Score: 63.0
Episode: 3359 Score: 146.0
Episode: 3360 Score: 105.0
Episode: 3361 Score: 66.0
Episode: 3362 Score: 46.0
Episode: 3363 Score: 87.0
Episode: 3364 Score: 94.0
Episode: 3365 Score: 68.0
Episode: 3366 Score: 39.0
Episode: 3367 Score: 63.0
Episode: 3368 Score: 29.0
Episode: 3369 Score: 111.0
Episode: 3370 Score: 72.0
Episode: 3371 Score: 57.0
Episode: 3372 Score: 110.0
Episode: 3373 Score: 57.0
Epis

Episode: 3670 Score: 57.0
Episode: 3671 Score: 50.0
Episode: 3672 Score: 49.0
Episode: 3673 Score: 45.0
Episode: 3674 Score: 44.0
Episode: 3675 Score: 71.0
Episode: 3676 Score: 32.0
Episode: 3677 Score: 79.0
Episode: 3678 Score: 51.0
Episode: 3679 Score: 47.0
Episode: 3680 Score: 57.0
Episode: 3681 Score: 42.0
Episode: 3682 Score: 56.0
Episode: 3683 Score: 41.0
Episode: 3684 Score: 49.0
Episode: 3685 Score: 57.0
Episode: 3686 Score: 89.0
Episode: 3687 Score: 70.0
Episode: 3688 Score: 75.0
Episode: 3689 Score: 42.0
Episode: 3690 Score: 102.0
Episode: 3691 Score: 66.0
Episode: 3692 Score: 29.0
Episode: 3693 Score: 35.0
Episode: 3694 Score: 94.0
Episode: 3695 Score: 60.0
Episode: 3696 Score: 57.0
Episode: 3697 Score: 59.0
Episode: 3698 Score: 77.0
Episode: 3699 Score: 161.0
Episode: 3700 Score: 38.0
Episode: 3701 Score: 77.0
Episode: 3702 Score: 49.0
Episode: 3703 Score: 43.0
Episode: 3704 Score: 41.0
Episode: 3705 Score: 84.0
Episode: 3706 Score: 44.0
Episode: 3707 Score: 49.0
Episode: 3

Episode: 3991 Score: 92.0
Episode: 3992 Score: 51.0
Episode: 3993 Score: 50.0
Episode: 3994 Score: 60.0
Episode: 3995 Score: 31.0
Episode: 3996 Score: 98.0
Episode: 3997 Score: 61.0
Episode: 3998 Score: 61.0
Episode: 3999 Score: 55.0
Episode: 4000 Score: 96.0
Episode: 4001 Score: 73.0
Episode: 4002 Score: 69.0
Episode: 4003 Score: 45.0
Episode: 4004 Score: 62.0
Episode: 4005 Score: 54.0
Episode: 4006 Score: 86.0
Episode: 4007 Score: 129.0
Episode: 4008 Score: 43.0
Episode: 4009 Score: 43.0
Episode: 4010 Score: 39.0
Episode: 4011 Score: 68.0
Episode: 4012 Score: 45.0
Episode: 4013 Score: 93.0
Episode: 4014 Score: 60.0
Episode: 4015 Score: 77.0
Episode: 4016 Score: 125.0
Episode: 4017 Score: 70.0
Episode: 4018 Score: 71.0
Episode: 4019 Score: 141.0
Episode: 4020 Score: 50.0
Episode: 4021 Score: 45.0
Episode: 4022 Score: 45.0
Episode: 4023 Score: 109.0
Episode: 4024 Score: 73.0
Episode: 4025 Score: 87.0
Episode: 4026 Score: 104.0
Episode: 4027 Score: 47.0
Episode: 4028 Score: 80.0
Episode

Episode: 4316 Score: 161.0
Episode: 4317 Score: 52.0
Episode: 4318 Score: 34.0
Episode: 4319 Score: 50.0
Episode: 4320 Score: 75.0
Episode: 4321 Score: 50.0
Episode: 4322 Score: 124.0
Episode: 4323 Score: 60.0
Episode: 4324 Score: 83.0
Episode: 4325 Score: 76.0
Episode: 4326 Score: 64.0
Episode: 4327 Score: 59.0
Episode: 4328 Score: 52.0
Episode: 4329 Score: 72.0
Episode: 4330 Score: 50.0
Episode: 4331 Score: 76.0
Episode: 4332 Score: 53.0
Episode: 4333 Score: 97.0
Episode: 4334 Score: 75.0
Episode: 4335 Score: 41.0
Episode: 4336 Score: 65.0
Episode: 4337 Score: 55.0
Episode: 4338 Score: 46.0
Episode: 4339 Score: 53.0
Episode: 4340 Score: 56.0
Episode: 4341 Score: 97.0
Episode: 4342 Score: 40.0
Episode: 4343 Score: 33.0
Episode: 4344 Score: 55.0
Episode: 4345 Score: 53.0
Episode: 4346 Score: 107.0
Episode: 4347 Score: 53.0
Episode: 4348 Score: 110.0
Episode: 4349 Score: 31.0
Episode: 4350 Score: 182.0
Episode: 4351 Score: 43.0
Episode: 4352 Score: 50.0
Episode: 4353 Score: 43.0
Episode

Episode: 4643 Score: 79.0
Episode: 4644 Score: 110.0
Episode: 4645 Score: 67.0
Episode: 4646 Score: 74.0
Episode: 4647 Score: 46.0
Episode: 4648 Score: 66.0
Episode: 4649 Score: 89.0
Episode: 4650 Score: 47.0
Episode: 4651 Score: 67.0
Episode: 4652 Score: 146.0
Episode: 4653 Score: 44.0
Episode: 4654 Score: 46.0
Episode: 4655 Score: 84.0
Episode: 4656 Score: 53.0
Episode: 4657 Score: 72.0
Episode: 4658 Score: 188.0
Episode: 4659 Score: 59.0
Episode: 4660 Score: 68.0
Episode: 4661 Score: 51.0
Episode: 4662 Score: 53.0
Episode: 4663 Score: 59.0
Episode: 4664 Score: 56.0
Episode: 4665 Score: 42.0
Episode: 4666 Score: 77.0
Episode: 4667 Score: 30.0
Episode: 4668 Score: 51.0
Episode: 4669 Score: 84.0
Episode: 4670 Score: 39.0
Episode: 4671 Score: 191.0
Episode: 4672 Score: 66.0
Episode: 4673 Score: 57.0
Episode: 4674 Score: 32.0
Episode: 4675 Score: 45.0
Episode: 4676 Score: 51.0
Episode: 4677 Score: 36.0
Episode: 4678 Score: 114.0
Episode: 4679 Score: 33.0
Episode: 4680 Score: 147.0
Episod

Episode: 4974 Score: 135.0
Episode: 4975 Score: 97.0
Episode: 4976 Score: 62.0
Episode: 4977 Score: 33.0
Episode: 4978 Score: 54.0
Episode: 4979 Score: 49.0
Episode: 4980 Score: 119.0
Episode: 4981 Score: 59.0
Episode: 4982 Score: 76.0
Episode: 4983 Score: 28.0
Episode: 4984 Score: 38.0
Episode: 4985 Score: 41.0
Episode: 4986 Score: 34.0
Episode: 4987 Score: 54.0
Episode: 4988 Score: 84.0
Episode: 4989 Score: 43.0
Episode: 4990 Score: 183.0
Episode: 4991 Score: 61.0
Episode: 4992 Score: 37.0
Episode: 4993 Score: 56.0
Episode: 4994 Score: 90.0
Episode: 4995 Score: 132.0
Episode: 4996 Score: 55.0
Episode: 4997 Score: 38.0
Episode: 4998 Score: 155.0
Episode: 4999 Score: 36.0
Episode: 5000 Score: 115.0
Episode: 5001 Score: 64.0
Episode: 5002 Score: 85.0
Episode: 5003 Score: 32.0
Episode: 5004 Score: 50.0
Episode: 5005 Score: 48.0
Episode: 5006 Score: 79.0
Episode: 5007 Score: 48.0
Episode: 5008 Score: 41.0
Episode: 5009 Score: 34.0
Episode: 5010 Score: 73.0
Episode: 5011 Score: 93.0
Episod

Episode: 5302 Score: 324.0
Episode: 5303 Score: 55.0
Episode: 5304 Score: 39.0
Episode: 5305 Score: 33.0
Episode: 5306 Score: 69.0
Episode: 5307 Score: 59.0
Episode: 5308 Score: 55.0
Episode: 5309 Score: 49.0
Episode: 5310 Score: 117.0
Episode: 5311 Score: 60.0
Episode: 5312 Score: 109.0
Episode: 5313 Score: 44.0
Episode: 5314 Score: 55.0
Episode: 5315 Score: 114.0
Episode: 5316 Score: 61.0
Episode: 5317 Score: 121.0
Episode: 5318 Score: 147.0
Episode: 5319 Score: 67.0
Episode: 5320 Score: 45.0
Episode: 5321 Score: 39.0
Episode: 5322 Score: 57.0
Episode: 5323 Score: 48.0
Episode: 5324 Score: 107.0
Episode: 5325 Score: 75.0
Episode: 5326 Score: 63.0
Episode: 5327 Score: 53.0
Episode: 5328 Score: 121.0
Episode: 5329 Score: 48.0
Episode: 5330 Score: 49.0
Episode: 5331 Score: 38.0
Episode: 5332 Score: 60.0
Episode: 5333 Score: 40.0
Episode: 5334 Score: 47.0
Episode: 5335 Score: 55.0
Episode: 5336 Score: 108.0
Episode: 5337 Score: 57.0
Episode: 5338 Score: 39.0
Episode: 5339 Score: 40.0
Epi

Episode: 5633 Score: 69.0
Episode: 5634 Score: 39.0
Episode: 5635 Score: 131.0
Episode: 5636 Score: 100.0
Episode: 5637 Score: 78.0
Episode: 5638 Score: 58.0
Episode: 5639 Score: 52.0
Episode: 5640 Score: 98.0
Episode: 5641 Score: 56.0
Episode: 5642 Score: 51.0
Episode: 5643 Score: 84.0
Episode: 5644 Score: 65.0
Episode: 5645 Score: 56.0
Episode: 5646 Score: 100.0
Episode: 5647 Score: 33.0
Episode: 5648 Score: 63.0
Episode: 5649 Score: 39.0
Episode: 5650 Score: 31.0
Episode: 5651 Score: 36.0
Episode: 5652 Score: 111.0
Episode: 5653 Score: 33.0
Episode: 5654 Score: 74.0
Episode: 5655 Score: 111.0
Episode: 5656 Score: 47.0
Episode: 5657 Score: 45.0
Episode: 5658 Score: 42.0
Episode: 5659 Score: 53.0
Episode: 5660 Score: 50.0
Episode: 5661 Score: 219.0
Episode: 5662 Score: 76.0
Episode: 5663 Score: 46.0
Episode: 5664 Score: 134.0
Episode: 5665 Score: 45.0
Episode: 5666 Score: 78.0
Episode: 5667 Score: 39.0
Episode: 5668 Score: 51.0
Episode: 5669 Score: 66.0
Episode: 5670 Score: 74.0
Episo

Episode: 5961 Score: 71.0
Episode: 5962 Score: 48.0
Episode: 5963 Score: 110.0
Episode: 5964 Score: 73.0
Episode: 5965 Score: 52.0
Episode: 5966 Score: 91.0
Episode: 5967 Score: 91.0
Episode: 5968 Score: 41.0
Episode: 5969 Score: 84.0
Episode: 5970 Score: 80.0
Episode: 5971 Score: 59.0
Episode: 5972 Score: 211.0
Episode: 5973 Score: 72.0
Episode: 5974 Score: 43.0
Episode: 5975 Score: 55.0
Episode: 5976 Score: 88.0
Episode: 5977 Score: 105.0
Episode: 5978 Score: 55.0
Episode: 5979 Score: 83.0
Episode: 5980 Score: 55.0
Episode: 5981 Score: 62.0
Episode: 5982 Score: 55.0
Episode: 5983 Score: 45.0
Episode: 5984 Score: 35.0
Episode: 5985 Score: 43.0
Episode: 5986 Score: 109.0
Episode: 5987 Score: 40.0
Episode: 5988 Score: 55.0
Episode: 5989 Score: 49.0
Episode: 5990 Score: 61.0
Episode: 5991 Score: 49.0
Episode: 5992 Score: 39.0
Episode: 5993 Score: 105.0
Episode: 5994 Score: 107.0
Episode: 5995 Score: 51.0
Episode: 5996 Score: 44.0
Episode: 5997 Score: 215.0
Episode: 5998 Score: 93.0
Episo

Episode: 6292 Score: 137.0
Episode: 6293 Score: 79.0
Episode: 6294 Score: 65.0
Episode: 6295 Score: 200.0
Episode: 6296 Score: 36.0
Episode: 6297 Score: 86.0
Episode: 6298 Score: 51.0
Episode: 6299 Score: 47.0
Episode: 6300 Score: 106.0
Episode: 6301 Score: 77.0
Episode: 6302 Score: 45.0
Episode: 6303 Score: 42.0
Episode: 6304 Score: 82.0
Episode: 6305 Score: 69.0
Episode: 6306 Score: 96.0
Episode: 6307 Score: 135.0
Episode: 6308 Score: 46.0
Episode: 6309 Score: 139.0
Episode: 6310 Score: 100.0
Episode: 6311 Score: 61.0
Episode: 6312 Score: 73.0
Episode: 6313 Score: 40.0
Episode: 6314 Score: 55.0
Episode: 6315 Score: 44.0
Episode: 6316 Score: 44.0
Episode: 6317 Score: 209.0
Episode: 6318 Score: 38.0
Episode: 6319 Score: 49.0
Episode: 6320 Score: 45.0
Episode: 6321 Score: 65.0
Episode: 6322 Score: 98.0
Episode: 6323 Score: 52.0
Episode: 6324 Score: 38.0
Episode: 6325 Score: 52.0
Episode: 6326 Score: 59.0
Episode: 6327 Score: 87.0
Episode: 6328 Score: 69.0
Episode: 6329 Score: 43.0
Episo

Episode: 6607 Score: 49.0
Episode: 6608 Score: 236.0
Episode: 6609 Score: 41.0
Episode: 6610 Score: 39.0
Episode: 6611 Score: 177.0
Episode: 6612 Score: 43.0
Episode: 6613 Score: 68.0
Episode: 6614 Score: 61.0
Episode: 6615 Score: 83.0
Episode: 6616 Score: 84.0
Episode: 6617 Score: 119.0
Episode: 6618 Score: 43.0
Episode: 6619 Score: 50.0
Episode: 6620 Score: 56.0
Episode: 6621 Score: 54.0
Episode: 6622 Score: 47.0
Episode: 6623 Score: 154.0
Episode: 6624 Score: 70.0
Episode: 6625 Score: 55.0
Episode: 6626 Score: 78.0
Episode: 6627 Score: 62.0
Episode: 6628 Score: 81.0
Episode: 6629 Score: 35.0
Episode: 6630 Score: 102.0
Episode: 6631 Score: 32.0
Episode: 6632 Score: 107.0
Episode: 6633 Score: 71.0
Episode: 6634 Score: 79.0
Episode: 6635 Score: 106.0
Episode: 6636 Score: 81.0
Episode: 6637 Score: 55.0
Episode: 6638 Score: 49.0
Episode: 6639 Score: 99.0
Episode: 6640 Score: 75.0
Episode: 6641 Score: 105.0
Episode: 6642 Score: 177.0
Episode: 6643 Score: 64.0
Episode: 6644 Score: 161.0
Ep

Episode: 6927 Score: 144.0
Episode: 6928 Score: 41.0
Episode: 6929 Score: 63.0
Episode: 6930 Score: 123.0
Episode: 6931 Score: 51.0
Episode: 6932 Score: 35.0
Episode: 6933 Score: 48.0
Episode: 6934 Score: 137.0
Episode: 6935 Score: 42.0
Episode: 6936 Score: 100.0
Episode: 6937 Score: 56.0
Episode: 6938 Score: 54.0
Episode: 6939 Score: 96.0
Episode: 6940 Score: 74.0
Episode: 6941 Score: 64.0
Episode: 6942 Score: 43.0
Episode: 6943 Score: 129.0
Episode: 6944 Score: 60.0
Episode: 6945 Score: 85.0
Episode: 6946 Score: 44.0
Episode: 6947 Score: 82.0
Episode: 6948 Score: 186.0
Episode: 6949 Score: 167.0
Episode: 6950 Score: 74.0
Episode: 6951 Score: 43.0
Episode: 6952 Score: 82.0
Episode: 6953 Score: 55.0
Episode: 6954 Score: 127.0
Episode: 6955 Score: 43.0
Episode: 6956 Score: 242.0
Episode: 6957 Score: 111.0
Episode: 6958 Score: 115.0
Episode: 6959 Score: 98.0
Episode: 6960 Score: 66.0
Episode: 6961 Score: 41.0
Episode: 6962 Score: 73.0
Episode: 6963 Score: 59.0
Episode: 6964 Score: 98.0
E

Episode: 7243 Score: 54.0
Episode: 7244 Score: 307.0
Episode: 7245 Score: 221.0
Episode: 7246 Score: 111.0
Episode: 7247 Score: 127.0
Episode: 7248 Score: 50.0
Episode: 7249 Score: 53.0
Episode: 7250 Score: 70.0
Episode: 7251 Score: 91.0
Episode: 7252 Score: 43.0
Episode: 7253 Score: 97.0
Episode: 7254 Score: 51.0
Episode: 7255 Score: 34.0
Episode: 7256 Score: 81.0
Episode: 7257 Score: 32.0
Episode: 7258 Score: 115.0
Episode: 7259 Score: 79.0
Episode: 7260 Score: 67.0
Episode: 7261 Score: 83.0
Episode: 7262 Score: 55.0
Episode: 7263 Score: 41.0
Episode: 7264 Score: 157.0
Episode: 7265 Score: 87.0
Episode: 7266 Score: 48.0
Episode: 7267 Score: 44.0
Episode: 7268 Score: 34.0
Episode: 7269 Score: 41.0
Episode: 7270 Score: 74.0
Episode: 7271 Score: 175.0
Episode: 7272 Score: 125.0
Episode: 7273 Score: 50.0
Episode: 7274 Score: 89.0
Episode: 7275 Score: 223.0
Episode: 7276 Score: 71.0
Episode: 7277 Score: 174.0
Episode: 7278 Score: 79.0
Episode: 7279 Score: 52.0
Episode: 7280 Score: 119.0
E

Episode: 7575 Score: 123.0
Episode: 7576 Score: 104.0
Episode: 7577 Score: 37.0
Episode: 7578 Score: 80.0
Episode: 7579 Score: 103.0
Episode: 7580 Score: 148.0
Episode: 7581 Score: 206.0
Episode: 7582 Score: 101.0
Episode: 7583 Score: 77.0
Episode: 7584 Score: 39.0
Episode: 7585 Score: 85.0
Episode: 7586 Score: 111.0
Episode: 7587 Score: 63.0
Episode: 7588 Score: 46.0
Episode: 7589 Score: 73.0
Episode: 7590 Score: 40.0
Episode: 7591 Score: 44.0
Episode: 7592 Score: 143.0
Episode: 7593 Score: 37.0
Episode: 7594 Score: 178.0
Episode: 7595 Score: 43.0
Episode: 7596 Score: 148.0
Episode: 7597 Score: 55.0
Episode: 7598 Score: 55.0
Episode: 7599 Score: 106.0
Episode: 7600 Score: 83.0
Episode: 7601 Score: 57.0
Episode: 7602 Score: 68.0
Episode: 7603 Score: 46.0
Episode: 7604 Score: 85.0
Episode: 7605 Score: 71.0
Episode: 7606 Score: 47.0
Episode: 7607 Score: 87.0
Episode: 7608 Score: 95.0
Episode: 7609 Score: 42.0
Episode: 7610 Score: 130.0
Episode: 7611 Score: 186.0
Episode: 7612 Score: 42.0

Episode: 7890 Score: 165.0
Episode: 7891 Score: 41.0
Episode: 7892 Score: 133.0
Episode: 7893 Score: 110.0
Episode: 7894 Score: 167.0
Episode: 7895 Score: 102.0
Episode: 7896 Score: 77.0
Episode: 7897 Score: 43.0
Episode: 7898 Score: 67.0
Episode: 7899 Score: 74.0
Episode: 7900 Score: 36.0
Episode: 7901 Score: 72.0
Episode: 7902 Score: 143.0
Episode: 7903 Score: 101.0
Episode: 7904 Score: 49.0
Episode: 7905 Score: 105.0
Episode: 7906 Score: 57.0
Episode: 7907 Score: 38.0
Episode: 7908 Score: 67.0
Episode: 7909 Score: 51.0
Episode: 7910 Score: 67.0
Episode: 7911 Score: 71.0
Episode: 7912 Score: 87.0
Episode: 7913 Score: 157.0
Episode: 7914 Score: 49.0
Episode: 7915 Score: 119.0
Episode: 7916 Score: 54.0
Episode: 7917 Score: 49.0
Episode: 7918 Score: 45.0
Episode: 7919 Score: 43.0
Episode: 7920 Score: 56.0
Episode: 7921 Score: 91.0
Episode: 7922 Score: 55.0
Episode: 7923 Score: 36.0
Episode: 7924 Score: 45.0
Episode: 7925 Score: 39.0
Episode: 7926 Score: 44.0
Episode: 7927 Score: 43.0
Ep

Episode: 8226 Score: 35.0
Episode: 8227 Score: 194.0
Episode: 8228 Score: 59.0
Episode: 8229 Score: 91.0
Episode: 8230 Score: 150.0
Episode: 8231 Score: 63.0
Episode: 8232 Score: 122.0
Episode: 8233 Score: 67.0
Episode: 8234 Score: 125.0
Episode: 8235 Score: 58.0
Episode: 8236 Score: 85.0
Episode: 8237 Score: 65.0
Episode: 8238 Score: 55.0
Episode: 8239 Score: 57.0
Episode: 8240 Score: 53.0
Episode: 8241 Score: 54.0
Episode: 8242 Score: 52.0
Episode: 8243 Score: 70.0
Episode: 8244 Score: 103.0
Episode: 8245 Score: 133.0
Episode: 8246 Score: 45.0
Episode: 8247 Score: 64.0
Episode: 8248 Score: 42.0
Episode: 8249 Score: 116.0
Episode: 8250 Score: 73.0
Episode: 8251 Score: 64.0
Episode: 8252 Score: 106.0
Episode: 8253 Score: 84.0
Episode: 8254 Score: 71.0
Episode: 8255 Score: 131.0
Episode: 8256 Score: 89.0
Episode: 8257 Score: 199.0
Episode: 8258 Score: 36.0
Episode: 8259 Score: 51.0
Episode: 8260 Score: 55.0
Episode: 8261 Score: 104.0
Episode: 8262 Score: 43.0
Episode: 8263 Score: 95.0
E

Episode: 8560 Score: 57.0
Episode: 8561 Score: 80.0
Episode: 8562 Score: 51.0
Episode: 8563 Score: 47.0
Episode: 8564 Score: 50.0
Episode: 8565 Score: 42.0
Episode: 8566 Score: 47.0
Episode: 8567 Score: 106.0
Episode: 8568 Score: 58.0
Episode: 8569 Score: 77.0
Episode: 8570 Score: 73.0
Episode: 8571 Score: 109.0
Episode: 8572 Score: 61.0
Episode: 8573 Score: 215.0
Episode: 8574 Score: 71.0
Episode: 8575 Score: 35.0
Episode: 8576 Score: 32.0
Episode: 8577 Score: 43.0
Episode: 8578 Score: 38.0
Episode: 8579 Score: 99.0
Episode: 8580 Score: 61.0
Episode: 8581 Score: 175.0
Episode: 8582 Score: 137.0
Episode: 8583 Score: 45.0
Episode: 8584 Score: 49.0
Episode: 8585 Score: 95.0
Episode: 8586 Score: 68.0
Episode: 8587 Score: 41.0
Episode: 8588 Score: 55.0
Episode: 8589 Score: 58.0
Episode: 8590 Score: 29.0
Episode: 8591 Score: 53.0
Episode: 8592 Score: 135.0
Episode: 8593 Score: 75.0
Episode: 8594 Score: 73.0
Episode: 8595 Score: 51.0
Episode: 8596 Score: 66.0
Episode: 8597 Score: 105.0
Episo

Episode: 8874 Score: 133.0
Episode: 8875 Score: 76.0
Episode: 8876 Score: 69.0
Episode: 8877 Score: 154.0
Episode: 8878 Score: 80.0
Episode: 8879 Score: 167.0
Episode: 8880 Score: 189.0
Episode: 8881 Score: 73.0
Episode: 8882 Score: 101.0
Episode: 8883 Score: 72.0
Episode: 8884 Score: 103.0
Episode: 8885 Score: 44.0
Episode: 8886 Score: 51.0
Episode: 8887 Score: 66.0
Episode: 8888 Score: 55.0
Episode: 8889 Score: 91.0
Episode: 8890 Score: 85.0
Episode: 8891 Score: 34.0
Episode: 8892 Score: 61.0
Episode: 8893 Score: 88.0
Episode: 8894 Score: 62.0
Episode: 8895 Score: 41.0
Episode: 8896 Score: 67.0
Episode: 8897 Score: 192.0
Episode: 8898 Score: 107.0
Episode: 8899 Score: 51.0
Episode: 8900 Score: 42.0
Episode: 8901 Score: 66.0
Episode: 8902 Score: 43.0
Episode: 8903 Score: 188.0
Episode: 8904 Score: 76.0
Episode: 8905 Score: 81.0
Episode: 8906 Score: 153.0
Episode: 8907 Score: 46.0
Episode: 8908 Score: 61.0
Episode: 8909 Score: 91.0
Episode: 8910 Score: 53.0
Episode: 8911 Score: 78.0
Ep

Episode: 9196 Score: 103.0
Episode: 9197 Score: 42.0
Episode: 9198 Score: 113.0
Episode: 9199 Score: 57.0
Episode: 9200 Score: 69.0
Episode: 9201 Score: 39.0
Episode: 9202 Score: 87.0
Episode: 9203 Score: 53.0
Episode: 9204 Score: 89.0
Episode: 9205 Score: 157.0
Episode: 9206 Score: 39.0
Episode: 9207 Score: 61.0
Episode: 9208 Score: 85.0
Episode: 9209 Score: 65.0
Episode: 9210 Score: 65.0
Episode: 9211 Score: 46.0
Episode: 9212 Score: 164.0
Episode: 9213 Score: 42.0
Episode: 9214 Score: 150.0
Episode: 9215 Score: 102.0
Episode: 9216 Score: 39.0
Episode: 9217 Score: 53.0
Episode: 9218 Score: 63.0
Episode: 9219 Score: 91.0
Episode: 9220 Score: 114.0
Episode: 9221 Score: 76.0
Episode: 9222 Score: 108.0
Episode: 9223 Score: 71.0
Episode: 9224 Score: 39.0
Episode: 9225 Score: 48.0
Episode: 9226 Score: 82.0
Episode: 9227 Score: 59.0
Episode: 9228 Score: 59.0
Episode: 9229 Score: 56.0
Episode: 9230 Score: 39.0
Episode: 9231 Score: 72.0
Episode: 9232 Score: 77.0
Episode: 9233 Score: 249.0
Epi

Episode: 9514 Score: 183.0
Episode: 9515 Score: 121.0
Episode: 9516 Score: 51.0
Episode: 9517 Score: 101.0
Episode: 9518 Score: 80.0
Episode: 9519 Score: 93.0
Episode: 9520 Score: 54.0
Episode: 9521 Score: 154.0
Episode: 9522 Score: 48.0
Episode: 9523 Score: 54.0
Episode: 9524 Score: 50.0
Episode: 9525 Score: 85.0
Episode: 9526 Score: 71.0
Episode: 9527 Score: 83.0
Episode: 9528 Score: 65.0
Episode: 9529 Score: 45.0
Episode: 9530 Score: 129.0
Episode: 9531 Score: 49.0
Episode: 9532 Score: 143.0
Episode: 9533 Score: 39.0
Episode: 9534 Score: 77.0
Episode: 9535 Score: 67.0
Episode: 9536 Score: 38.0
Episode: 9537 Score: 90.0
Episode: 9538 Score: 120.0
Episode: 9539 Score: 56.0
Episode: 9540 Score: 72.0
Episode: 9541 Score: 45.0
Episode: 9542 Score: 40.0
Episode: 9543 Score: 71.0
Episode: 9544 Score: 36.0
Episode: 9545 Score: 48.0
Episode: 9546 Score: 37.0
Episode: 9547 Score: 36.0
Episode: 9548 Score: 73.0
Episode: 9549 Score: 100.0
Episode: 9550 Score: 43.0
Episode: 9551 Score: 58.0
Epis

Episode: 9847 Score: 81.0
Episode: 9848 Score: 29.0
Episode: 9849 Score: 59.0
Episode: 9850 Score: 166.0
Episode: 9851 Score: 63.0
Episode: 9852 Score: 67.0
Episode: 9853 Score: 95.0
Episode: 9854 Score: 204.0
Episode: 9855 Score: 103.0
Episode: 9856 Score: 69.0
Episode: 9857 Score: 46.0
Episode: 9858 Score: 95.0
Episode: 9859 Score: 167.0
Episode: 9860 Score: 59.0
Episode: 9861 Score: 40.0
Episode: 9862 Score: 267.0
Episode: 9863 Score: 59.0
Episode: 9864 Score: 40.0
Episode: 9865 Score: 84.0
Episode: 9866 Score: 60.0
Episode: 9867 Score: 85.0
Episode: 9868 Score: 35.0
Episode: 9869 Score: 88.0
Episode: 9870 Score: 59.0
Episode: 9871 Score: 51.0
Episode: 9872 Score: 87.0
Episode: 9873 Score: 43.0
Episode: 9874 Score: 71.0
Episode: 9875 Score: 39.0
Episode: 9876 Score: 56.0
Episode: 9877 Score: 45.0
Episode: 9878 Score: 55.0
Episode: 9879 Score: 83.0
Episode: 9880 Score: 111.0
Episode: 9881 Score: 100.0
Episode: 9882 Score: 59.0
Episode: 9883 Score: 46.0
Episode: 9884 Score: 41.0
Episo

In [7]:
state = env.reset()
state_size = 4
action_size = env.action_space.n
theta = np.random.random(state_size * action_size)

def _phi(s, a):
    encoded = np.zeros([action_size, state_size])
    encoded[a] = s
    return encoded.flatten()

def _softmax(s, a):
    return np.exp(theta.dot(_phi(s, a)) / 100)

def pi(s):
    """\pi(a | s)"""
    weights = np.empty(action_size)
    for a in range(action_size):
        weights[a] = _softmax(s, a)
    return weights / np.sum(weights)


def act(state):
    probs = pi(state)
    a = random.choices(range(0, action_size), weights=probs)
    a = a[0]
    pi = probs[a]
    return (a, pi)

In [8]:
theta

array([0.67059612, 0.25784005, 0.77806322, 0.75089143, 0.90915224,
       0.39152139, 0.26376316, 0.11291722])

In [9]:
action_size

2

In [10]:
np.zeros([action_size, state_size])

array([[0., 0., 0., 0.],
       [0., 0., 0., 0.]])

In [14]:
np.empty(action_size)

array([4.10666840e-06, 9.99995893e-01])

In [16]:
probs = pi(state)
a = random.choices(range(0, action_size), weights=probs)

In [17]:
probs

array([0.5000036, 0.4999964])

In [18]:
a

[0]