# https://medium.com/@thechrisyoon/deriving-policy-gradients-and-implementing-reinforce-f887949bd63

In [2]:
import sys
import torch  
import gym
import numpy as np  
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.autograd import Variable
import matplotlib.pyplot as plt

# Constants
GAMMA = 0.9

# policy network

class PolicyNetwork(nn.Module):
    def __init__(self, num_inputs, num_actions, hidden_size, learning_rate=3e-4):
        super(PolicyNetwork, self).__init__()

        self.num_actions = num_actions
        self.linear1 = nn.Linear(num_inputs, hidden_size)
        self.linear2 = nn.Linear(hidden_size, num_actions)
        self.optimizer = optim.Adam(self.parameters(), lr=learning_rate)

    def forward(self, state):
        x = F.relu(self.linear1(state))
        x = F.softmax(self.linear2(x), dim=1)
        return x 
    
    def get_action(self, state):
        state = torch.from_numpy(state).float().unsqueeze(0)
        probs = self.forward(Variable(state))
        highest_prob_action = np.random.choice(self.num_actions, p=np.squeeze(probs.detach().numpy()))
        log_prob = torch.log(probs.squeeze(0)[highest_prob_action])
        return highest_prob_action, log_prob


In [3]:
def update_policy(policy_network, rewards, log_probs):
    discounted_rewards = []

    for t in range(len(rewards)):
        Gt = 0 
        pw = 0
        for r in rewards[t:]:
            Gt = Gt + GAMMA**pw * r
            pw = pw + 1
        discounted_rewards.append(Gt)
        
    discounted_rewards = torch.tensor(discounted_rewards)
    discounted_rewards = (discounted_rewards - discounted_rewards.mean()) / (discounted_rewards.std() + 1e-9) # normalize discounted rewards

    policy_gradient = []
    for log_prob, Gt in zip(log_probs, discounted_rewards):
        policy_gradient.append(-log_prob * Gt)
    
    policy_network.optimizer.zero_grad()
    policy_gradient = torch.stack(policy_gradient).sum()
    policy_gradient.backward()
    policy_network.optimizer.step()

In [4]:
def main():
    env = gym.make('CartPole-v0')
    policy_net = PolicyNetwork(env.observation_space.shape[0], env.action_space.n, 128)
    
    max_episode_num = 5000
    max_steps = 10000
    numsteps = []
    avg_numsteps = []
    all_rewards = []

    for episode in range(max_episode_num):
        state = env.reset()
        log_probs = []
        rewards = []

        for steps in range(max_steps):
            env.render()
            action, log_prob = policy_net.get_action(state)
            new_state, reward, done, _ = env.step(action)
            log_probs.append(log_prob)
            rewards.append(reward)

            if done:
                update_policy(policy_net, rewards, log_probs)
                numsteps.append(steps)
                avg_numsteps.append(np.mean(numsteps[-10:]))
                all_rewards.append(np.sum(rewards))
                if episode % 1 == 0:
                    sys.stdout.write("episode: {}, total reward: {}, average_reward: {}, length: {}\n".format(episode, np.round(np.sum(rewards), decimals = 3),  np.round(np.mean(all_rewards[-10:]), decimals = 3), steps))
                break
            
            state = new_state
        
    plt.plot(numsteps)
    plt.plot(avg_numsteps)
    plt.xlabel('Episode')
    plt.show()

In [None]:
main()

episode: 0, total reward: 15.0, average_reward: 15.0, length: 14
episode: 1, total reward: 11.0, average_reward: 13.0, length: 10
episode: 2, total reward: 35.0, average_reward: 20.333, length: 34
episode: 3, total reward: 11.0, average_reward: 18.0, length: 10
episode: 4, total reward: 21.0, average_reward: 18.6, length: 20
episode: 5, total reward: 22.0, average_reward: 19.167, length: 21
episode: 6, total reward: 11.0, average_reward: 18.0, length: 10
episode: 7, total reward: 24.0, average_reward: 18.75, length: 23
episode: 8, total reward: 19.0, average_reward: 18.778, length: 18
episode: 9, total reward: 17.0, average_reward: 18.6, length: 16
episode: 10, total reward: 43.0, average_reward: 21.4, length: 42
episode: 11, total reward: 17.0, average_reward: 22.0, length: 16
episode: 12, total reward: 17.0, average_reward: 20.2, length: 16
episode: 13, total reward: 16.0, average_reward: 20.7, length: 15
episode: 14, total reward: 10.0, average_reward: 19.6, length: 9
episode: 15, t

episode: 125, total reward: 21.0, average_reward: 20.2, length: 20
episode: 126, total reward: 12.0, average_reward: 20.3, length: 11
episode: 127, total reward: 25.0, average_reward: 19.1, length: 24
episode: 128, total reward: 34.0, average_reward: 21.1, length: 33
episode: 129, total reward: 14.0, average_reward: 21.3, length: 13
episode: 130, total reward: 37.0, average_reward: 23.2, length: 36
episode: 131, total reward: 18.0, average_reward: 23.1, length: 17
episode: 132, total reward: 14.0, average_reward: 21.9, length: 13
episode: 133, total reward: 40.0, average_reward: 22.5, length: 39
episode: 134, total reward: 19.0, average_reward: 23.4, length: 18
episode: 135, total reward: 52.0, average_reward: 26.5, length: 51
episode: 136, total reward: 41.0, average_reward: 29.4, length: 40
episode: 137, total reward: 14.0, average_reward: 28.3, length: 13
episode: 138, total reward: 43.0, average_reward: 29.2, length: 42
episode: 139, total reward: 29.0, average_reward: 30.7, length

episode: 248, total reward: 16.0, average_reward: 26.7, length: 15
episode: 249, total reward: 34.0, average_reward: 27.4, length: 33
episode: 250, total reward: 83.0, average_reward: 30.6, length: 82
episode: 251, total reward: 12.0, average_reward: 30.6, length: 11
episode: 252, total reward: 19.0, average_reward: 31.3, length: 18
episode: 253, total reward: 48.0, average_reward: 35.0, length: 47
episode: 254, total reward: 28.0, average_reward: 29.3, length: 27
episode: 255, total reward: 26.0, average_reward: 29.8, length: 25
episode: 256, total reward: 36.0, average_reward: 32.2, length: 35
episode: 257, total reward: 34.0, average_reward: 33.6, length: 33
episode: 258, total reward: 28.0, average_reward: 34.8, length: 27
episode: 259, total reward: 59.0, average_reward: 37.3, length: 58
episode: 260, total reward: 33.0, average_reward: 32.3, length: 32
episode: 261, total reward: 44.0, average_reward: 35.5, length: 43
episode: 262, total reward: 16.0, average_reward: 35.2, length

episode: 371, total reward: 19.0, average_reward: 42.0, length: 18
episode: 372, total reward: 62.0, average_reward: 43.1, length: 61
episode: 373, total reward: 35.0, average_reward: 41.0, length: 34
episode: 374, total reward: 16.0, average_reward: 38.1, length: 15
episode: 375, total reward: 67.0, average_reward: 42.0, length: 66
episode: 376, total reward: 26.0, average_reward: 42.6, length: 25
episode: 377, total reward: 62.0, average_reward: 38.0, length: 61
episode: 378, total reward: 56.0, average_reward: 42.4, length: 55
episode: 379, total reward: 18.0, average_reward: 41.8, length: 17
episode: 380, total reward: 26.0, average_reward: 38.7, length: 25
episode: 381, total reward: 62.0, average_reward: 43.0, length: 61
episode: 382, total reward: 40.0, average_reward: 40.8, length: 39
episode: 383, total reward: 55.0, average_reward: 42.8, length: 54
episode: 384, total reward: 24.0, average_reward: 43.6, length: 23
episode: 385, total reward: 56.0, average_reward: 42.5, length

episode: 494, total reward: 61.0, average_reward: 66.1, length: 60
episode: 495, total reward: 51.0, average_reward: 62.8, length: 50
episode: 496, total reward: 57.0, average_reward: 65.3, length: 56
episode: 497, total reward: 136.0, average_reward: 74.8, length: 135
episode: 498, total reward: 22.0, average_reward: 72.2, length: 21
episode: 499, total reward: 42.0, average_reward: 65.2, length: 41
episode: 500, total reward: 79.0, average_reward: 69.8, length: 78
episode: 501, total reward: 51.0, average_reward: 63.3, length: 50
episode: 502, total reward: 38.0, average_reward: 65.4, length: 37
episode: 503, total reward: 33.0, average_reward: 57.0, length: 32
episode: 504, total reward: 83.0, average_reward: 59.2, length: 82
episode: 505, total reward: 39.0, average_reward: 58.0, length: 38
episode: 506, total reward: 32.0, average_reward: 55.5, length: 31
episode: 507, total reward: 43.0, average_reward: 46.2, length: 42
episode: 508, total reward: 49.0, average_reward: 48.9, leng

episode: 616, total reward: 200.0, average_reward: 93.8, length: 199
episode: 617, total reward: 129.0, average_reward: 98.0, length: 128
episode: 618, total reward: 56.0, average_reward: 95.5, length: 55
episode: 619, total reward: 200.0, average_reward: 107.0, length: 199
episode: 620, total reward: 200.0, average_reward: 119.9, length: 199
episode: 621, total reward: 99.0, average_reward: 119.1, length: 98
episode: 622, total reward: 141.0, average_reward: 119.1, length: 140
episode: 623, total reward: 200.0, average_reward: 135.4, length: 199
episode: 624, total reward: 200.0, average_reward: 152.4, length: 199
episode: 625, total reward: 109.0, average_reward: 153.4, length: 108
episode: 626, total reward: 197.0, average_reward: 153.1, length: 196
episode: 627, total reward: 70.0, average_reward: 147.2, length: 69
episode: 628, total reward: 41.0, average_reward: 145.7, length: 40
episode: 629, total reward: 98.0, average_reward: 135.5, length: 97
episode: 630, total reward: 45.0,

episode: 735, total reward: 150.0, average_reward: 134.5, length: 149
episode: 736, total reward: 53.0, average_reward: 119.8, length: 52
episode: 737, total reward: 200.0, average_reward: 127.6, length: 199
episode: 738, total reward: 49.0, average_reward: 121.2, length: 48
episode: 739, total reward: 185.0, average_reward: 132.5, length: 184
episode: 740, total reward: 156.0, average_reward: 139.5, length: 155
episode: 741, total reward: 113.0, average_reward: 137.2, length: 112
episode: 742, total reward: 60.0, average_reward: 123.2, length: 59
episode: 743, total reward: 172.0, average_reward: 129.7, length: 171
episode: 744, total reward: 200.0, average_reward: 133.8, length: 199
episode: 745, total reward: 166.0, average_reward: 135.4, length: 165
episode: 746, total reward: 200.0, average_reward: 150.1, length: 199
episode: 747, total reward: 148.0, average_reward: 144.9, length: 147
episode: 748, total reward: 200.0, average_reward: 160.0, length: 199
episode: 749, total reward

episode: 853, total reward: 200.0, average_reward: 172.2, length: 199
episode: 854, total reward: 200.0, average_reward: 172.2, length: 199
episode: 855, total reward: 142.0, average_reward: 171.3, length: 141
episode: 856, total reward: 78.0, average_reward: 168.2, length: 77
episode: 857, total reward: 200.0, average_reward: 169.5, length: 199
episode: 858, total reward: 153.0, average_reward: 167.0, length: 152
episode: 859, total reward: 171.0, average_reward: 170.0, length: 170
episode: 860, total reward: 200.0, average_reward: 174.4, length: 199
episode: 861, total reward: 159.0, average_reward: 170.3, length: 158
episode: 862, total reward: 200.0, average_reward: 170.3, length: 199
episode: 863, total reward: 194.0, average_reward: 169.7, length: 193
episode: 864, total reward: 127.0, average_reward: 162.4, length: 126
episode: 865, total reward: 200.0, average_reward: 168.2, length: 199
episode: 866, total reward: 200.0, average_reward: 180.4, length: 199
episode: 867, total re

episode: 971, total reward: 189.0, average_reward: 174.2, length: 188
episode: 972, total reward: 131.0, average_reward: 170.2, length: 130
episode: 973, total reward: 191.0, average_reward: 169.3, length: 190
episode: 974, total reward: 185.0, average_reward: 167.8, length: 184
episode: 975, total reward: 200.0, average_reward: 177.9, length: 199
episode: 976, total reward: 158.0, average_reward: 182.2, length: 157
episode: 977, total reward: 149.0, average_reward: 177.1, length: 148
episode: 978, total reward: 200.0, average_reward: 179.1, length: 199
episode: 979, total reward: 140.0, average_reward: 174.3, length: 139
episode: 980, total reward: 200.0, average_reward: 174.3, length: 199
episode: 981, total reward: 200.0, average_reward: 175.4, length: 199
episode: 982, total reward: 200.0, average_reward: 182.3, length: 199
episode: 983, total reward: 183.0, average_reward: 181.5, length: 182
episode: 984, total reward: 156.0, average_reward: 178.6, length: 155
episode: 985, total 

episode: 1088, total reward: 200.0, average_reward: 172.1, length: 199
episode: 1089, total reward: 200.0, average_reward: 172.1, length: 199
episode: 1090, total reward: 190.0, average_reward: 180.4, length: 189
episode: 1091, total reward: 118.0, average_reward: 172.2, length: 117
episode: 1092, total reward: 200.0, average_reward: 176.5, length: 199
episode: 1093, total reward: 200.0, average_reward: 181.0, length: 199
episode: 1094, total reward: 182.0, average_reward: 179.2, length: 181
episode: 1095, total reward: 129.0, average_reward: 172.1, length: 128
episode: 1096, total reward: 200.0, average_reward: 174.8, length: 199
episode: 1097, total reward: 200.0, average_reward: 181.9, length: 199
episode: 1098, total reward: 197.0, average_reward: 181.6, length: 196
episode: 1099, total reward: 134.0, average_reward: 175.0, length: 133
episode: 1100, total reward: 200.0, average_reward: 176.0, length: 199
episode: 1101, total reward: 200.0, average_reward: 184.2, length: 199
episod

episode: 1204, total reward: 200.0, average_reward: 184.8, length: 199
episode: 1205, total reward: 200.0, average_reward: 184.8, length: 199
episode: 1206, total reward: 65.0, average_reward: 171.3, length: 64
episode: 1207, total reward: 200.0, average_reward: 171.3, length: 199
episode: 1208, total reward: 200.0, average_reward: 175.5, length: 199
episode: 1209, total reward: 188.0, average_reward: 174.3, length: 187
episode: 1210, total reward: 200.0, average_reward: 183.2, length: 199
episode: 1211, total reward: 200.0, average_reward: 183.6, length: 199
episode: 1212, total reward: 155.0, average_reward: 179.3, length: 154
episode: 1213, total reward: 194.0, average_reward: 180.2, length: 193
episode: 1214, total reward: 200.0, average_reward: 180.2, length: 199
episode: 1215, total reward: 200.0, average_reward: 180.2, length: 199
episode: 1216, total reward: 167.0, average_reward: 190.4, length: 166
episode: 1217, total reward: 200.0, average_reward: 190.4, length: 199
episode:

episode: 1320, total reward: 200.0, average_reward: 189.6, length: 199
episode: 1321, total reward: 200.0, average_reward: 189.6, length: 199
episode: 1322, total reward: 200.0, average_reward: 197.5, length: 199
episode: 1323, total reward: 159.0, average_reward: 195.6, length: 158
episode: 1324, total reward: 200.0, average_reward: 195.6, length: 199
episode: 1325, total reward: 113.0, average_reward: 186.9, length: 112
episode: 1326, total reward: 142.0, average_reward: 181.4, length: 141
episode: 1327, total reward: 200.0, average_reward: 181.4, length: 199
episode: 1328, total reward: 200.0, average_reward: 181.4, length: 199
episode: 1329, total reward: 200.0, average_reward: 181.4, length: 199
episode: 1330, total reward: 120.0, average_reward: 173.4, length: 119
episode: 1331, total reward: 200.0, average_reward: 173.4, length: 199
episode: 1332, total reward: 112.0, average_reward: 164.6, length: 111
episode: 1333, total reward: 171.0, average_reward: 165.8, length: 170
episod

episode: 1436, total reward: 200.0, average_reward: 179.5, length: 199
episode: 1437, total reward: 200.0, average_reward: 179.5, length: 199
episode: 1438, total reward: 200.0, average_reward: 179.5, length: 199
episode: 1439, total reward: 200.0, average_reward: 179.5, length: 199
episode: 1440, total reward: 200.0, average_reward: 187.4, length: 199
episode: 1441, total reward: 200.0, average_reward: 194.6, length: 199
episode: 1442, total reward: 200.0, average_reward: 199.8, length: 199
episode: 1443, total reward: 200.0, average_reward: 200.0, length: 199
episode: 1444, total reward: 200.0, average_reward: 200.0, length: 199
episode: 1445, total reward: 200.0, average_reward: 200.0, length: 199
episode: 1446, total reward: 121.0, average_reward: 192.1, length: 120
episode: 1447, total reward: 173.0, average_reward: 189.4, length: 172
episode: 1448, total reward: 200.0, average_reward: 189.4, length: 199
episode: 1449, total reward: 200.0, average_reward: 189.4, length: 199
episod

episode: 1552, total reward: 200.0, average_reward: 189.8, length: 199
episode: 1553, total reward: 200.0, average_reward: 189.8, length: 199
episode: 1554, total reward: 200.0, average_reward: 189.8, length: 199
episode: 1555, total reward: 170.0, average_reward: 186.8, length: 169
episode: 1556, total reward: 119.0, average_reward: 178.7, length: 118
episode: 1557, total reward: 194.0, average_reward: 181.2, length: 193
episode: 1558, total reward: 200.0, average_reward: 188.3, length: 199
episode: 1559, total reward: 200.0, average_reward: 188.3, length: 199
episode: 1560, total reward: 200.0, average_reward: 188.3, length: 199
episode: 1561, total reward: 181.0, average_reward: 186.4, length: 180
episode: 1562, total reward: 148.0, average_reward: 181.2, length: 147
episode: 1563, total reward: 186.0, average_reward: 179.8, length: 185
episode: 1564, total reward: 200.0, average_reward: 179.8, length: 199
episode: 1565, total reward: 200.0, average_reward: 182.8, length: 199
episod

episode: 1668, total reward: 200.0, average_reward: 190.9, length: 199
episode: 1669, total reward: 144.0, average_reward: 185.3, length: 143
episode: 1670, total reward: 200.0, average_reward: 185.3, length: 199
episode: 1671, total reward: 178.0, average_reward: 183.1, length: 177
episode: 1672, total reward: 200.0, average_reward: 192.2, length: 199
episode: 1673, total reward: 200.0, average_reward: 192.2, length: 199
episode: 1674, total reward: 200.0, average_reward: 192.2, length: 199
episode: 1675, total reward: 200.0, average_reward: 192.2, length: 199
episode: 1676, total reward: 200.0, average_reward: 192.2, length: 199
episode: 1677, total reward: 200.0, average_reward: 192.2, length: 199
episode: 1678, total reward: 200.0, average_reward: 192.2, length: 199
episode: 1679, total reward: 200.0, average_reward: 197.8, length: 199
episode: 1680, total reward: 200.0, average_reward: 197.8, length: 199
episode: 1681, total reward: 200.0, average_reward: 200.0, length: 199
episod

episode: 1784, total reward: 200.0, average_reward: 182.1, length: 199
episode: 1785, total reward: 200.0, average_reward: 182.1, length: 199
episode: 1786, total reward: 200.0, average_reward: 185.2, length: 199
episode: 1787, total reward: 200.0, average_reward: 185.2, length: 199
episode: 1788, total reward: 200.0, average_reward: 190.6, length: 199
episode: 1789, total reward: 200.0, average_reward: 190.6, length: 199
episode: 1790, total reward: 200.0, average_reward: 190.6, length: 199
episode: 1791, total reward: 196.0, average_reward: 191.8, length: 195
episode: 1792, total reward: 200.0, average_reward: 197.3, length: 199
episode: 1793, total reward: 200.0, average_reward: 199.6, length: 199
episode: 1794, total reward: 200.0, average_reward: 199.6, length: 199
episode: 1795, total reward: 200.0, average_reward: 199.6, length: 199
episode: 1796, total reward: 200.0, average_reward: 199.6, length: 199
episode: 1797, total reward: 200.0, average_reward: 199.6, length: 199
episod

episode: 1900, total reward: 129.0, average_reward: 186.9, length: 128
episode: 1901, total reward: 200.0, average_reward: 186.9, length: 199
episode: 1902, total reward: 200.0, average_reward: 186.9, length: 199
episode: 1903, total reward: 200.0, average_reward: 186.9, length: 199
episode: 1904, total reward: 200.0, average_reward: 186.9, length: 199
episode: 1905, total reward: 200.0, average_reward: 186.9, length: 199
episode: 1906, total reward: 200.0, average_reward: 188.8, length: 199
episode: 1907, total reward: 200.0, average_reward: 188.8, length: 199
episode: 1908, total reward: 150.0, average_reward: 187.9, length: 149
episode: 1909, total reward: 200.0, average_reward: 187.9, length: 199
episode: 1910, total reward: 200.0, average_reward: 195.0, length: 199
episode: 1911, total reward: 200.0, average_reward: 195.0, length: 199
episode: 1912, total reward: 200.0, average_reward: 195.0, length: 199
episode: 1913, total reward: 200.0, average_reward: 195.0, length: 199
episod

episode: 2016, total reward: 170.0, average_reward: 181.6, length: 169
episode: 2017, total reward: 200.0, average_reward: 188.4, length: 199
episode: 2018, total reward: 171.0, average_reward: 185.5, length: 170
episode: 2019, total reward: 134.0, average_reward: 180.5, length: 133
episode: 2020, total reward: 139.0, average_reward: 174.4, length: 138
episode: 2021, total reward: 198.0, average_reward: 174.2, length: 197
episode: 2022, total reward: 124.0, average_reward: 167.7, length: 123
episode: 2023, total reward: 200.0, average_reward: 167.7, length: 199
episode: 2024, total reward: 186.0, average_reward: 172.2, length: 185
episode: 2025, total reward: 200.0, average_reward: 172.2, length: 199
episode: 2026, total reward: 200.0, average_reward: 175.2, length: 199
episode: 2027, total reward: 200.0, average_reward: 175.2, length: 199
episode: 2028, total reward: 194.0, average_reward: 177.5, length: 193
episode: 2029, total reward: 186.0, average_reward: 182.7, length: 185
episod

episode: 2132, total reward: 200.0, average_reward: 190.8, length: 199
episode: 2133, total reward: 164.0, average_reward: 188.8, length: 163
episode: 2134, total reward: 200.0, average_reward: 188.8, length: 199
episode: 2135, total reward: 200.0, average_reward: 188.8, length: 199
episode: 2136, total reward: 200.0, average_reward: 189.1, length: 199
episode: 2137, total reward: 200.0, average_reward: 189.1, length: 199
episode: 2138, total reward: 199.0, average_reward: 190.1, length: 198
episode: 2139, total reward: 141.0, average_reward: 190.4, length: 140
episode: 2140, total reward: 200.0, average_reward: 190.4, length: 199
episode: 2141, total reward: 200.0, average_reward: 190.4, length: 199
episode: 2142, total reward: 156.0, average_reward: 186.0, length: 155
episode: 2143, total reward: 200.0, average_reward: 189.6, length: 199
episode: 2144, total reward: 200.0, average_reward: 189.6, length: 199
episode: 2145, total reward: 200.0, average_reward: 189.6, length: 199
episod

episode: 2248, total reward: 200.0, average_reward: 189.5, length: 199
episode: 2249, total reward: 200.0, average_reward: 189.5, length: 199
episode: 2250, total reward: 200.0, average_reward: 192.2, length: 199
episode: 2251, total reward: 200.0, average_reward: 192.2, length: 199
episode: 2252, total reward: 200.0, average_reward: 195.4, length: 199
episode: 2253, total reward: 179.0, average_reward: 193.8, length: 178
episode: 2254, total reward: 200.0, average_reward: 193.8, length: 199
episode: 2255, total reward: 191.0, average_reward: 192.9, length: 190
episode: 2256, total reward: 90.0, average_reward: 181.9, length: 89
episode: 2257, total reward: 200.0, average_reward: 186.0, length: 199
episode: 2258, total reward: 200.0, average_reward: 186.0, length: 199
episode: 2259, total reward: 200.0, average_reward: 186.0, length: 199
episode: 2260, total reward: 200.0, average_reward: 186.0, length: 199
episode: 2261, total reward: 200.0, average_reward: 186.0, length: 199
episode:

episode: 2364, total reward: 195.0, average_reward: 192.8, length: 194
episode: 2365, total reward: 200.0, average_reward: 192.8, length: 199
episode: 2366, total reward: 155.0, average_reward: 188.6, length: 154
episode: 2367, total reward: 200.0, average_reward: 188.6, length: 199
episode: 2368, total reward: 200.0, average_reward: 188.6, length: 199
episode: 2369, total reward: 200.0, average_reward: 188.6, length: 199
episode: 2370, total reward: 138.0, average_reward: 182.4, length: 137
episode: 2371, total reward: 159.0, average_reward: 183.8, length: 158
episode: 2372, total reward: 125.0, average_reward: 177.2, length: 124
episode: 2373, total reward: 148.0, average_reward: 172.0, length: 147
episode: 2374, total reward: 200.0, average_reward: 172.5, length: 199
episode: 2375, total reward: 200.0, average_reward: 172.5, length: 199
episode: 2376, total reward: 200.0, average_reward: 177.0, length: 199
episode: 2377, total reward: 200.0, average_reward: 177.0, length: 199
episod

episode: 2480, total reward: 195.0, average_reward: 197.5, length: 194
episode: 2481, total reward: 200.0, average_reward: 198.2, length: 199
episode: 2482, total reward: 135.0, average_reward: 191.7, length: 134
episode: 2483, total reward: 200.0, average_reward: 191.7, length: 199
episode: 2484, total reward: 200.0, average_reward: 191.7, length: 199
episode: 2485, total reward: 200.0, average_reward: 193.0, length: 199
episode: 2486, total reward: 200.0, average_reward: 193.0, length: 199
episode: 2487, total reward: 159.0, average_reward: 188.9, length: 158
episode: 2488, total reward: 200.0, average_reward: 188.9, length: 199
episode: 2489, total reward: 200.0, average_reward: 188.9, length: 199
episode: 2490, total reward: 200.0, average_reward: 189.4, length: 199
episode: 2491, total reward: 200.0, average_reward: 189.4, length: 199
episode: 2492, total reward: 200.0, average_reward: 195.9, length: 199
episode: 2493, total reward: 200.0, average_reward: 195.9, length: 199
episod

episode: 2596, total reward: 200.0, average_reward: 198.3, length: 199
episode: 2597, total reward: 200.0, average_reward: 198.3, length: 199
episode: 2598, total reward: 200.0, average_reward: 198.3, length: 199
episode: 2599, total reward: 200.0, average_reward: 198.3, length: 199
episode: 2600, total reward: 200.0, average_reward: 198.3, length: 199
episode: 2601, total reward: 200.0, average_reward: 198.3, length: 199
episode: 2602, total reward: 200.0, average_reward: 198.3, length: 199
episode: 2603, total reward: 200.0, average_reward: 200.0, length: 199
episode: 2604, total reward: 200.0, average_reward: 200.0, length: 199
episode: 2605, total reward: 200.0, average_reward: 200.0, length: 199
episode: 2606, total reward: 200.0, average_reward: 200.0, length: 199
episode: 2607, total reward: 200.0, average_reward: 200.0, length: 199
episode: 2608, total reward: 125.0, average_reward: 192.5, length: 124
episode: 2609, total reward: 200.0, average_reward: 192.5, length: 199
episod

episode: 2712, total reward: 182.0, average_reward: 192.9, length: 181
episode: 2713, total reward: 173.0, average_reward: 190.2, length: 172
episode: 2714, total reward: 190.0, average_reward: 189.2, length: 189
episode: 2715, total reward: 200.0, average_reward: 189.2, length: 199
episode: 2716, total reward: 200.0, average_reward: 189.2, length: 199
episode: 2717, total reward: 200.0, average_reward: 189.4, length: 199
episode: 2718, total reward: 176.0, average_reward: 192.1, length: 175
episode: 2719, total reward: 200.0, average_reward: 192.1, length: 199
episode: 2720, total reward: 200.0, average_reward: 192.1, length: 199
episode: 2721, total reward: 200.0, average_reward: 192.1, length: 199
episode: 2722, total reward: 193.0, average_reward: 193.2, length: 192
episode: 2723, total reward: 200.0, average_reward: 195.9, length: 199
episode: 2724, total reward: 200.0, average_reward: 196.9, length: 199
episode: 2725, total reward: 200.0, average_reward: 196.9, length: 199
episod

episode: 2828, total reward: 200.0, average_reward: 184.1, length: 199
episode: 2829, total reward: 120.0, average_reward: 176.2, length: 119
episode: 2830, total reward: 200.0, average_reward: 176.9, length: 199
episode: 2831, total reward: 103.0, average_reward: 167.2, length: 102
episode: 2832, total reward: 143.0, average_reward: 169.8, length: 142
episode: 2833, total reward: 151.0, average_reward: 164.9, length: 150
episode: 2834, total reward: 181.0, average_reward: 163.8, length: 180
episode: 2835, total reward: 175.0, average_reward: 163.4, length: 174
episode: 2836, total reward: 138.0, average_reward: 161.1, length: 137
episode: 2837, total reward: 120.0, average_reward: 153.1, length: 119
episode: 2838, total reward: 160.0, average_reward: 149.1, length: 159
episode: 2839, total reward: 130.0, average_reward: 150.1, length: 129
episode: 2840, total reward: 200.0, average_reward: 150.1, length: 199
episode: 2841, total reward: 117.0, average_reward: 151.5, length: 116
episod

episode: 2944, total reward: 153.0, average_reward: 186.4, length: 152
episode: 2945, total reward: 200.0, average_reward: 186.4, length: 199
episode: 2946, total reward: 200.0, average_reward: 192.2, length: 199
episode: 2947, total reward: 200.0, average_reward: 192.2, length: 199
episode: 2948, total reward: 200.0, average_reward: 192.2, length: 199
episode: 2949, total reward: 200.0, average_reward: 195.0, length: 199
episode: 2950, total reward: 200.0, average_reward: 195.0, length: 199
episode: 2951, total reward: 200.0, average_reward: 195.3, length: 199
episode: 2952, total reward: 200.0, average_reward: 195.3, length: 199
episode: 2953, total reward: 200.0, average_reward: 195.3, length: 199
episode: 2954, total reward: 122.0, average_reward: 192.2, length: 121
episode: 2955, total reward: 200.0, average_reward: 192.2, length: 199
episode: 2956, total reward: 200.0, average_reward: 192.2, length: 199
episode: 2957, total reward: 200.0, average_reward: 192.2, length: 199
episod

episode: 3060, total reward: 200.0, average_reward: 173.0, length: 199
episode: 3061, total reward: 200.0, average_reward: 189.5, length: 199
episode: 3062, total reward: 200.0, average_reward: 189.6, length: 199
episode: 3063, total reward: 200.0, average_reward: 191.0, length: 199
episode: 3064, total reward: 200.0, average_reward: 193.4, length: 199
episode: 3065, total reward: 159.0, average_reward: 191.2, length: 158
episode: 3066, total reward: 200.0, average_reward: 191.2, length: 199
episode: 3067, total reward: 200.0, average_reward: 195.9, length: 199
episode: 3068, total reward: 197.0, average_reward: 195.6, length: 196
episode: 3069, total reward: 200.0, average_reward: 195.6, length: 199
episode: 3070, total reward: 200.0, average_reward: 195.6, length: 199
episode: 3071, total reward: 200.0, average_reward: 195.6, length: 199
episode: 3072, total reward: 200.0, average_reward: 195.6, length: 199
episode: 3073, total reward: 200.0, average_reward: 195.6, length: 199
episod

episode: 3176, total reward: 200.0, average_reward: 190.7, length: 199
episode: 3177, total reward: 200.0, average_reward: 190.7, length: 199
episode: 3178, total reward: 200.0, average_reward: 190.7, length: 199
episode: 3179, total reward: 200.0, average_reward: 198.2, length: 199
episode: 3180, total reward: 200.0, average_reward: 198.2, length: 199
episode: 3181, total reward: 200.0, average_reward: 198.2, length: 199
episode: 3182, total reward: 200.0, average_reward: 200.0, length: 199
episode: 3183, total reward: 200.0, average_reward: 200.0, length: 199
episode: 3184, total reward: 200.0, average_reward: 200.0, length: 199
episode: 3185, total reward: 200.0, average_reward: 200.0, length: 199
episode: 3186, total reward: 200.0, average_reward: 200.0, length: 199
episode: 3187, total reward: 200.0, average_reward: 200.0, length: 199
episode: 3188, total reward: 200.0, average_reward: 200.0, length: 199
episode: 3189, total reward: 200.0, average_reward: 200.0, length: 199
episod

episode: 3292, total reward: 200.0, average_reward: 200.0, length: 199
episode: 3293, total reward: 200.0, average_reward: 200.0, length: 199
episode: 3294, total reward: 200.0, average_reward: 200.0, length: 199
episode: 3295, total reward: 200.0, average_reward: 200.0, length: 199
episode: 3296, total reward: 200.0, average_reward: 200.0, length: 199
episode: 3297, total reward: 200.0, average_reward: 200.0, length: 199
episode: 3298, total reward: 200.0, average_reward: 200.0, length: 199
episode: 3299, total reward: 200.0, average_reward: 200.0, length: 199
episode: 3300, total reward: 200.0, average_reward: 200.0, length: 199
episode: 3301, total reward: 134.0, average_reward: 193.4, length: 133
episode: 3302, total reward: 200.0, average_reward: 193.4, length: 199
episode: 3303, total reward: 200.0, average_reward: 193.4, length: 199
episode: 3304, total reward: 200.0, average_reward: 193.4, length: 199
episode: 3305, total reward: 200.0, average_reward: 193.4, length: 199
episod

episode: 3408, total reward: 200.0, average_reward: 197.9, length: 199
episode: 3409, total reward: 182.0, average_reward: 196.1, length: 181
episode: 3410, total reward: 200.0, average_reward: 198.2, length: 199
episode: 3411, total reward: 200.0, average_reward: 198.2, length: 199
episode: 3412, total reward: 200.0, average_reward: 198.2, length: 199
episode: 3413, total reward: 200.0, average_reward: 198.2, length: 199
episode: 3414, total reward: 200.0, average_reward: 198.2, length: 199
episode: 3415, total reward: 200.0, average_reward: 198.2, length: 199
episode: 3416, total reward: 200.0, average_reward: 198.2, length: 199
episode: 3417, total reward: 200.0, average_reward: 198.2, length: 199
episode: 3418, total reward: 200.0, average_reward: 198.2, length: 199
episode: 3419, total reward: 200.0, average_reward: 200.0, length: 199
episode: 3420, total reward: 200.0, average_reward: 200.0, length: 199
episode: 3421, total reward: 200.0, average_reward: 200.0, length: 199
episod

episode: 3524, total reward: 192.0, average_reward: 199.2, length: 191
episode: 3525, total reward: 200.0, average_reward: 199.2, length: 199
episode: 3526, total reward: 200.0, average_reward: 199.2, length: 199
episode: 3527, total reward: 200.0, average_reward: 199.2, length: 199
episode: 3528, total reward: 200.0, average_reward: 199.2, length: 199
episode: 3529, total reward: 200.0, average_reward: 199.2, length: 199
episode: 3530, total reward: 200.0, average_reward: 199.2, length: 199
episode: 3531, total reward: 200.0, average_reward: 199.2, length: 199
episode: 3532, total reward: 200.0, average_reward: 199.2, length: 199
episode: 3533, total reward: 200.0, average_reward: 199.2, length: 199
episode: 3534, total reward: 200.0, average_reward: 200.0, length: 199
episode: 3535, total reward: 200.0, average_reward: 200.0, length: 199
episode: 3536, total reward: 200.0, average_reward: 200.0, length: 199
episode: 3537, total reward: 169.0, average_reward: 196.9, length: 168
episod

episode: 3640, total reward: 200.0, average_reward: 182.8, length: 199
episode: 3641, total reward: 200.0, average_reward: 182.8, length: 199
episode: 3642, total reward: 200.0, average_reward: 182.8, length: 199
episode: 3643, total reward: 200.0, average_reward: 182.8, length: 199
episode: 3644, total reward: 176.0, average_reward: 191.5, length: 175
episode: 3645, total reward: 200.0, average_reward: 197.6, length: 199
episode: 3646, total reward: 200.0, average_reward: 197.6, length: 199
episode: 3647, total reward: 200.0, average_reward: 197.6, length: 199
episode: 3648, total reward: 200.0, average_reward: 197.6, length: 199
episode: 3649, total reward: 200.0, average_reward: 197.6, length: 199
episode: 3650, total reward: 200.0, average_reward: 197.6, length: 199
episode: 3651, total reward: 200.0, average_reward: 197.6, length: 199
episode: 3652, total reward: 200.0, average_reward: 197.6, length: 199
episode: 3653, total reward: 179.0, average_reward: 195.5, length: 178
episod

episode: 3756, total reward: 200.0, average_reward: 191.2, length: 199
episode: 3757, total reward: 200.0, average_reward: 191.2, length: 199
episode: 3758, total reward: 200.0, average_reward: 199.8, length: 199
episode: 3759, total reward: 200.0, average_reward: 199.8, length: 199
episode: 3760, total reward: 200.0, average_reward: 199.8, length: 199
episode: 3761, total reward: 137.0, average_reward: 193.5, length: 136
episode: 3762, total reward: 200.0, average_reward: 193.5, length: 199
episode: 3763, total reward: 200.0, average_reward: 193.5, length: 199
episode: 3764, total reward: 200.0, average_reward: 193.7, length: 199
episode: 3765, total reward: 200.0, average_reward: 193.7, length: 199
episode: 3766, total reward: 200.0, average_reward: 193.7, length: 199
episode: 3767, total reward: 200.0, average_reward: 193.7, length: 199
episode: 3768, total reward: 200.0, average_reward: 193.7, length: 199
episode: 3769, total reward: 200.0, average_reward: 193.7, length: 199
episod

episode: 3872, total reward: 200.0, average_reward: 200.0, length: 199
episode: 3873, total reward: 200.0, average_reward: 200.0, length: 199
episode: 3874, total reward: 200.0, average_reward: 200.0, length: 199
episode: 3875, total reward: 200.0, average_reward: 200.0, length: 199
episode: 3876, total reward: 200.0, average_reward: 200.0, length: 199
episode: 3877, total reward: 200.0, average_reward: 200.0, length: 199
episode: 3878, total reward: 171.0, average_reward: 197.1, length: 170
episode: 3879, total reward: 200.0, average_reward: 197.1, length: 199
episode: 3880, total reward: 183.0, average_reward: 195.4, length: 182
episode: 3881, total reward: 167.0, average_reward: 192.1, length: 166
episode: 3882, total reward: 200.0, average_reward: 192.1, length: 199
episode: 3883, total reward: 200.0, average_reward: 192.1, length: 199
episode: 3884, total reward: 200.0, average_reward: 192.1, length: 199
episode: 3885, total reward: 200.0, average_reward: 192.1, length: 199
episod

episode: 3988, total reward: 200.0, average_reward: 200.0, length: 199
episode: 3989, total reward: 200.0, average_reward: 200.0, length: 199
episode: 3990, total reward: 200.0, average_reward: 200.0, length: 199
episode: 3991, total reward: 200.0, average_reward: 200.0, length: 199
episode: 3992, total reward: 200.0, average_reward: 200.0, length: 199
episode: 3993, total reward: 200.0, average_reward: 200.0, length: 199
episode: 3994, total reward: 200.0, average_reward: 200.0, length: 199
episode: 3995, total reward: 200.0, average_reward: 200.0, length: 199
episode: 3996, total reward: 200.0, average_reward: 200.0, length: 199
episode: 3997, total reward: 200.0, average_reward: 200.0, length: 199
episode: 3998, total reward: 200.0, average_reward: 200.0, length: 199
episode: 3999, total reward: 200.0, average_reward: 200.0, length: 199
episode: 4000, total reward: 200.0, average_reward: 200.0, length: 199
episode: 4001, total reward: 200.0, average_reward: 200.0, length: 199
episod