In [None]:
# Spring 2022, IOC 5259 Reinforcement Learning
# HW1-partII: REINFORCE and baseline

import gym
from itertools import count
from collections import namedtuple
import numpy as np
from numpy import sqrt 

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.distributions import Categorical
import torch.optim.lr_scheduler as Scheduler


# Define a useful tuple (optional)
SavedAction = namedtuple('SavedAction', ['log_prob', 'value'])

        
class Policy(nn.Module):
    """
        Implement both policy network and the value network in one model
        - Note that here we let the actor and value networks share the first layer
        - Feel free to change the architecture (e.g. number of hidden layers and the width of each hidden layer) as you like
        - Feel free to add any member variables/functions whenever needed
        TODO:
            1. Initialize the network (including the shared layer(s), the action layer(s), and the value layer(s)
            2. Random weight initialization of each layer
    """
    def __init__(self):
        super(Policy, self).__init__()
        
        # Extract the dimensionality of state and action spaces
        self.discrete = isinstance(env.action_space, gym.spaces.Discrete)
        self.observation_dim = env.observation_space.shape[0]
        self.action_dim = env.action_space.n if self.discrete else env.action_space.shape[0]
        self.hidden_size = 128
#         print(self.observation_dim )
#         print(self.action_dim )
        ########## YOUR CODE HERE (5~10 lines) ##########
        self.s_layer1= nn.Linear(8, self.hidden_size)
        torch.nn.init.xavier_uniform_(self.s_layer1.weight)
        
        self.s_layer2= nn.Linear(self.hidden_size, self.hidden_size)
        torch.nn.init.xavier_uniform_(self.s_layer2.weight)

        self.a_layer = nn.Linear(self.hidden_size, 4)
        torch.nn.init.xavier_uniform_(self.a_layer.weight)

        
        self.v_layer = nn.Linear(self.hidden_size, 1)
        torch.nn.init.xavier_uniform_(self.v_layer.weight)

        ########## END OF YOUR CODE ##########
        
        # action & reward memory
        self.saved_actions = []
        self.rewards = []

    def forward(self, state):
        """
            Forward pass of both policy and value networks
            - The input is the state, and the outputs are the corresponding 
              action probability distirbution and the state value
            TODO:
                1. Implement the forward pass for both the action and the state value
        """
        
        ########## YOUR CODE HERE (3~5 lines) ##########
        x =  self.s_layer1(state)
        x = F.relu(x)
        x =  self.s_layer2(x)
        x = F.relu(x)
        y=self.a_layer(x)
        action_prob = F.softmax(y, dim=-1)#choses what action you want to take and return probability of each action
        state_value = self.v_layer(x)
         
        # return values are  tuple, action_prob:  the probability of each action over the action space
                                          #  state_value: the value from state s_t 
        ########## END OF YOUR CODE ##########

        return action_prob, state_value




    def select_action(self, state):
        """
            Select the action given the current state
            - The input is the state, and the output is the action to apply 
            (based on the learned stochastic policy)
            TODO:
                1. Implement the forward pass for both the action and the state value
        """
        
        ########## YOUR CODE HERE (3~5 lines) ##########
        state = torch.from_numpy(state).float().squeeze(0)
        action_prob, state_value = self.forward(state)
        m = Categorical(action_prob)       #create a distribution from action_prob
        action = m.sample()    #sample the action

        ########## END OF YOUR CODE ##########
        
        # save to action buffer
        self.saved_actions.append(SavedAction(m.log_prob(action), state_value))

        return action.item()


    def calculate_loss(self, gamma=0.99):
        """
            Calculate the loss (= policy loss + value loss) to perform backprop later
            TODO:
                1. Calculate rewards-to-go required by REINFORCE with the help of self.rewards
                2. Calculate the policy loss using the policy gradient
                3. Calculate the value loss using either MSE loss or smooth L1 loss
        """
        
        # Initialize the lists and variables
        R = 0
        saved_actions = self.saved_actions
        policy_losses = [] 
        value_losses = [] 
        returns = []

        ########## YOUR CODE HERE (8-15 lines) ##########
        eps = np.finfo(np.float32).eps.item()

        for r in reversed(self.rewards):
            R = r + gamma * R
            returns.insert(0, R)        #calculate discounted value
        
        returns = torch.tensor(returns).float()
        returns = (returns - returns.mean()) / (returns.std() + eps) #standardlization for faster converge / eps: To increase the stability when calculating
        
        
        for (log_prob, state_value),R in zip(saved_actions , returns):
            advantage = R - state_value.item()                        
            policy_losses.append(advantage * -log_prob )        #using baseline to reduce variance (baseline=v_pi(s)) with advantage function
            
            value_losses.append( F.mse_loss(state_value, torch.tensor ([R]) ) )    # calcilate loss using MSE
            
        loss = torch.stack(policy_losses).sum()+torch.stack(value_losses).sum()    #sum up both policy_losses and value_losses
 
        ########## END OF YOUR CODE ##########
        
        return loss

    def clear_memory(self):
        # reset rewards and action buffer
        del self.rewards[:]
        del self.saved_actions[:]


def train(lr=0.005):
    '''
        Train the model using SGD (via backpropagation)
        TODO: In each episode, 
        1. run the policy till the end of the episode and keep the sampled trajectory
        2. update both the policy and the value network at the end of episode
    '''    
    
    # Instantiate the policy model and the optimizer
    model = Policy()
    optimizer = optim.Adam(model.parameters(), lr=lr)
    
    # Learning rate scheduler (optional)
#     scheduler = Scheduler.StepLR(optimizer, step_size=100, gamma=0.9)
    
    # EWMA reward for tracking the learning progress
    ewma_reward = 0
    
    # run inifinitely many episodes
    for i_episode in count(1):
        # reset environment and episode reward
        state = env.reset()
        ep_reward = 0
        t = 0
        # Uncomment the following line to use learning rate scheduler
        
        
        # For each episode, only run 9999 steps so that we don't 
        # infinite loop while learning
        
        ########## YOUR CODE HERE (10-15 lines) ##########
        for t in range(0, 10000):  # Don't infinite loop while learning
            action = model.select_action(state)
            s_state, reward, done, _ = env.step(action)

            model.rewards.append(reward)
            ep_reward += reward
            if done:
                break
            state = s_state

        # Backpropagation
        optimizer.zero_grad()
        policy_loss = model.calculate_loss()
        policy_loss.backward()
        optimizer.step()
#         scheduler.step()
        model.clear_memory()
        
        ########## END OF YOUR CODE ##########
            
        # update EWMA reward and log the results
        ewma_reward = 0.05 * ep_reward + (1 - 0.05) * ewma_reward
        print('Episode {}\tlength: {}\treward: {}\t ewma reward: {}'.format(i_episode, t, ep_reward, ewma_reward))

        # check if we have "solved" the cart pole problem
        if ewma_reward > env.spec.reward_threshold:
            torch.save(model.state_dict(), './preTrained/LunarLander_{}.pth'.format(lr))
#             torch.save(model.state_dict(), './preTrained/LunarLander.pth')
            print("Solved! Running reward is now {} and "
                  "the last episode runs to {} time steps!".format(ewma_reward, t))
            break


def test(name, n_episodes=10):
    '''
        Test the learned model (no change needed)
    '''      
    model = Policy()
    
    model.load_state_dict(torch.load('./preTrained/{}'.format(name)))
    
    render = True
    max_episode_len = 10000
    
    for i_episode in range(1, n_episodes+1):
        state = env.reset()
        running_reward = 0
        for t in range(max_episode_len+1):
            action = model.select_action(state)
            state, reward, done, _ = env.step(action)
            running_reward += reward
            if render:
                 env.render()
            if done:
                break
        print('Episode {}\tReward: {}'.format(i_episode, running_reward))
    env.close()
    

if __name__ == '__main__':
    # For reproducibility, fix the random seed
    random_seed = 20  
    lr = 0.005
    env = gym.make("LunarLander-v2")
    env.seed(random_seed)  
    torch.manual_seed(random_seed)  
    train(lr)
#     test('LunarLander_0.01.pth')
    test('LunarLander_0.005.pth')


Episode 1	length: 72	reward: -72.10817345845335	 ewma reward: -3.6054086729226675
Episode 2	length: 94	reward: -113.68456511415435	 ewma reward: -9.109366494984252
Episode 3	length: 83	reward: -102.141332726004	 ewma reward: -13.76096480653524
Episode 4	length: 111	reward: -346.97170635128003	 ewma reward: -30.42150188377248
Episode 5	length: 82	reward: -146.29018118443864	 ewma reward: -36.21493584880579
Episode 6	length: 78	reward: -162.42387272482648	 ewma reward: -42.52538269260682
Episode 7	length: 58	reward: -139.97343629718094	 ewma reward: -47.39778537283553
Episode 8	length: 109	reward: -223.32553462623972	 ewma reward: -56.19417283550574
Episode 9	length: 89	reward: -383.6158308745566	 ewma reward: -72.56525573745829
Episode 10	length: 83	reward: -112.36755045253813	 ewma reward: -74.55537047321226
Episode 11	length: 120	reward: -156.0931668717655	 ewma reward: -78.63226029313992
Episode 12	length: 116	reward: -472.44252443231045	 ewma reward: -98.32277350009844
Episode 13	le

Episode 100	length: 115	reward: -109.5949136451231	 ewma reward: -168.72447137979285
Episode 101	length: 155	reward: -36.149710983267966	 ewma reward: -162.0957333599666
Episode 102	length: 88	reward: -77.86623811687208	 ewma reward: -157.88425859781188
Episode 103	length: 95	reward: -197.96785317441126	 ewma reward: -159.88843832664182
Episode 104	length: 99	reward: -115.29581357784807	 ewma reward: -157.65880708920213
Episode 105	length: 76	reward: -101.82924765486446	 ewma reward: -154.86732911748524
Episode 106	length: 141	reward: -117.16883588523575	 ewma reward: -152.98240445587274
Episode 107	length: 136	reward: -106.34869904244873	 ewma reward: -150.65071918520152
Episode 108	length: 126	reward: -158.01813396783655	 ewma reward: -151.01908992433326
Episode 109	length: 123	reward: -232.98194636585256	 ewma reward: -155.11723274640923
Episode 110	length: 108	reward: -101.41545415497633	 ewma reward: -152.4321438168376
Episode 111	length: 117	reward: -165.66960217022313	 ewma rewa

Episode 198	length: 93	reward: -187.16821760313724	 ewma reward: -134.01752276639803
Episode 199	length: 101	reward: 3.758925772215079	 ewma reward: -127.12870033946736
Episode 200	length: 79	reward: -87.31566821154193	 ewma reward: -125.13804873307107
Episode 201	length: 110	reward: -267.51301890097545	 ewma reward: -132.25679724146627
Episode 202	length: 72	reward: -93.17825231427523	 ewma reward: -130.30286999510673
Episode 203	length: 67	reward: -109.13155888747808	 ewma reward: -129.24430443972528
Episode 204	length: 66	reward: -86.5372587921235	 ewma reward: -127.10895215734519
Episode 205	length: 145	reward: -78.89657212600002	 ewma reward: -124.69833315577793
Episode 206	length: 124	reward: -282.23318109131077	 ewma reward: -132.57507555255458
Episode 207	length: 117	reward: -201.69381313271288	 ewma reward: -136.0310124315625
Episode 208	length: 148	reward: -88.0731966508755	 ewma reward: -133.63312164252812
Episode 209	length: 122	reward: -192.4712361470032	 ewma reward: -136

Episode 295	length: 217	reward: -128.88466875998085	 ewma reward: -122.76419444577672
Episode 296	length: 143	reward: -149.74252661844497	 ewma reward: -124.11311105441014
Episode 297	length: 165	reward: -152.8057669389351	 ewma reward: -125.54774384863639
Episode 298	length: 108	reward: -100.72889119744788	 ewma reward: -124.30680121607695
Episode 299	length: 158	reward: -69.69174658594436	 ewma reward: -121.57604848457031
Episode 300	length: 194	reward: -153.77995001089218	 ewma reward: -123.1862435608864
Episode 301	length: 89	reward: -61.57606320461984	 ewma reward: -120.10573454307305
Episode 302	length: 221	reward: -168.30134074434147	 ewma reward: -122.51551485313647
Episode 303	length: 184	reward: -156.2175308207734	 ewma reward: -124.2006156515183
Episode 304	length: 108	reward: -67.03119781858891	 ewma reward: -121.34214475987183
Episode 305	length: 124	reward: -79.18392965478102	 ewma reward: -119.23423400461728
Episode 306	length: 107	reward: -100.39558765811	 ewma reward: 

Episode 392	length: 161	reward: -33.80583044116456	 ewma reward: -97.01602175535838
Episode 393	length: 112	reward: -134.08618981869077	 ewma reward: -98.869530158525
Episode 394	length: 100	reward: -71.13155723720166	 ewma reward: -97.48263151245882
Episode 395	length: 175	reward: -56.735042988251806	 ewma reward: -95.44525208624847
Episode 396	length: 146	reward: -50.40664450149496	 ewma reward: -93.1933217070108
Episode 397	length: 124	reward: -116.27679701445004	 ewma reward: -94.34749547238276
Episode 398	length: 104	reward: 4.669153952293925	 ewma reward: -89.39666300114892
Episode 399	length: 126	reward: -41.268755835383665	 ewma reward: -86.99026764286066
Episode 400	length: 88	reward: -39.37944581541733	 ewma reward: -84.60972655148849
Episode 401	length: 100	reward: -51.41978906534192	 ewma reward: -82.95022967718117
Episode 402	length: 142	reward: -53.02032638204388	 ewma reward: -81.4537345124243
Episode 403	length: 590	reward: -391.3657902835217	 ewma reward: -96.949337300

Episode 491	length: 112	reward: -175.56026489629852	 ewma reward: -66.11381388440617
Episode 492	length: 168	reward: 19.815943020182928	 ewma reward: -61.81732603917671
Episode 493	length: 200	reward: -194.4373500424029	 ewma reward: -68.44832723933801
Episode 494	length: 233	reward: -243.49299902115715	 ewma reward: -77.20056082842896
Episode 495	length: 125	reward: -4.4180701011132015	 ewma reward: -73.56143629206316
Episode 496	length: 147	reward: -96.54139965727954	 ewma reward: -74.71043446032398
Episode 497	length: 189	reward: -116.06885223661268	 ewma reward: -76.77835534913841
Episode 498	length: 123	reward: -55.361861136980906	 ewma reward: -75.70753063853053
Episode 499	length: 162	reward: -48.19400881106226	 ewma reward: -74.3318545471571
Episode 500	length: 205	reward: -300.8075121047654	 ewma reward: -85.65563742503751
Episode 501	length: 145	reward: -81.58012973037532	 ewma reward: -85.45186204030439
Episode 502	length: 192	reward: -185.68129425909717	 ewma reward: -90.46

Episode 589	length: 157	reward: -25.63957524774176	 ewma reward: -63.26983007882414
Episode 590	length: 584	reward: -290.1554913750607	 ewma reward: -74.61411314363596
Episode 591	length: 136	reward: -128.7774036283392	 ewma reward: -77.32227766787112
Episode 592	length: 209	reward: -307.09686836057455	 ewma reward: -88.81100720250629
Episode 593	length: 176	reward: -245.24795671460825	 ewma reward: -96.63285467811139
Episode 594	length: 92	reward: 45.33634243812048	 ewma reward: -89.53439482229979
Episode 595	length: 216	reward: -26.11637225915962	 ewma reward: -86.36349369414278
Episode 596	length: 86	reward: -6.9081131347375475	 ewma reward: -82.3907246661725
Episode 597	length: 166	reward: -15.457983212391184	 ewma reward: -79.04408759348344
Episode 598	length: 138	reward: -81.94753085040351	 ewma reward: -79.18925975632945
Episode 599	length: 198	reward: -54.74740376492655	 ewma reward: -77.9671669567593
Episode 600	length: 134	reward: 22.24515449895064	 ewma reward: -72.956550883

Episode 687	length: 174	reward: -309.51272383238603	 ewma reward: -65.69650246601948
Episode 688	length: 136	reward: -16.741734427410847	 ewma reward: -63.248764064089045
Episode 689	length: 102	reward: -21.972925805814384	 ewma reward: -61.18497215117531
Episode 690	length: 118	reward: -71.50969332175579	 ewma reward: -61.701208209704326
Episode 691	length: 111	reward: -2.333424916290099	 ewma reward: -58.73281904503361
Episode 692	length: 199	reward: 32.39553481085241	 ewma reward: -54.1764013522393
Episode 693	length: 100	reward: 3.453728522164468	 ewma reward: -51.294894858519115
Episode 694	length: 124	reward: -11.699392594464783	 ewma reward: -49.315119745316395
Episode 695	length: 236	reward: -54.543125162312464	 ewma reward: -49.57652001616619
Episode 696	length: 103	reward: -40.19842301570776	 ewma reward: -49.10761516614327
Episode 697	length: 291	reward: -237.65034611153953	 ewma reward: -58.53475171341308
Episode 698	length: 176	reward: 25.463538288612	 ewma reward: -54.334

Episode 785	length: 179	reward: -10.00092370041537	 ewma reward: -58.77472414866733
Episode 786	length: 343	reward: -359.75477011874034	 ewma reward: -73.82372644717097
Episode 787	length: 117	reward: 21.1551192559928	 ewma reward: -69.07478416201278
Episode 788	length: 172	reward: -13.101753989467468	 ewma reward: -66.2761326533855
Episode 789	length: 476	reward: -42.70635999130286	 ewma reward: -65.09764402028136
Episode 790	length: 140	reward: 22.105596834426336	 ewma reward: -60.737481977545976
Episode 791	length: 177	reward: -231.49483772007912	 ewma reward: -69.27534976467263
Episode 792	length: 103	reward: -6.985482947235397	 ewma reward: -66.16085642380077
Episode 793	length: 634	reward: -283.12571193182566	 ewma reward: -77.00909919920201
Episode 794	length: 112	reward: 49.249047238761165	 ewma reward: -70.69619187730385
Episode 795	length: 210	reward: -102.67296414469992	 ewma reward: -72.29503049067365
Episode 796	length: 81	reward: -44.104984240669964	 ewma reward: -70.8855

Episode 882	length: 113	reward: -15.642524786778893	 ewma reward: -58.932501919602664
Episode 883	length: 156	reward: -84.17202643063169	 ewma reward: -60.19447814515411
Episode 884	length: 110	reward: 57.96015362330402	 ewma reward: -54.286746556731195
Episode 885	length: 144	reward: 16.54762543300511	 ewma reward: -50.74502795724438
Episode 886	length: 208	reward: -218.82956887309663	 ewma reward: -59.14925500303699
Episode 887	length: 151	reward: -9.175164265763073	 ewma reward: -56.65055046617329
Episode 888	length: 119	reward: 22.30969811746766	 ewma reward: -52.70253803699124
Episode 889	length: 107	reward: 24.736792810478946	 ewma reward: -48.83057149461773
Episode 890	length: 111	reward: 24.21436170725157	 ewma reward: -45.17832483452426
Episode 891	length: 999	reward: -0.7726140267059616	 ewma reward: -42.95803929413335
Episode 892	length: 150	reward: 16.01334859119524	 ewma reward: -40.00946989986692
Episode 893	length: 999	reward: 8.507536787382008	 ewma reward: -37.58361956

Episode 981	length: 129	reward: 23.898196056181774	 ewma reward: -42.20243001279677
Episode 982	length: 217	reward: -115.31421764822227	 ewma reward: -45.85801939456804
Episode 983	length: 500	reward: -291.5703025688107	 ewma reward: -58.14363355328017
Episode 984	length: 172	reward: -147.1591450160095	 ewma reward: -62.59440912641663
Episode 985	length: 231	reward: -338.03666007250683	 ewma reward: -76.36652167372114
Episode 986	length: 107	reward: -15.376855855088024	 ewma reward: -73.31703838278948
Episode 987	length: 999	reward: 8.781879617451832	 ewma reward: -69.21209248277742
Episode 988	length: 211	reward: -27.08764912655967	 ewma reward: -67.10587031496652
Episode 989	length: 208	reward: -142.85083908365482	 ewma reward: -70.89311875340094
Episode 990	length: 145	reward: -58.81570637675341	 ewma reward: -70.28924813456857
Episode 991	length: 139	reward: -40.23307828019385	 ewma reward: -68.78643964184984
Episode 992	length: 181	reward: -96.82813027766674	 ewma reward: -70.1885

Episode 1079	length: 116	reward: 40.00759484690059	 ewma reward: -19.738090427094203
Episode 1080	length: 188	reward: -60.727971322165516	 ewma reward: -21.78758447184777
Episode 1081	length: 124	reward: 24.971518543463915	 ewma reward: -19.449629321082185
Episode 1082	length: 999	reward: -26.03981237148918	 ewma reward: -19.779138473602536
Episode 1083	length: 128	reward: 9.752818505091156	 ewma reward: -18.302540624667852
Episode 1084	length: 112	reward: 15.586108108377374	 ewma reward: -16.608108188015592
Episode 1085	length: 146	reward: 7.2953032049713755	 ewma reward: -15.412937618366243
Episode 1086	length: 155	reward: -48.53167482864028	 ewma reward: -17.068874478879945
Episode 1087	length: 107	reward: -49.58810879788856	 ewma reward: -18.694836194830376
Episode 1088	length: 156	reward: -32.05617149059091	 ewma reward: -19.362902959618403
Episode 1089	length: 117	reward: -13.429410425209511	 ewma reward: -19.066228332897957
Episode 1090	length: 130	reward: -7.02459508371048	 ewm

Episode 1175	length: 187	reward: -143.72241781952917	 ewma reward: -26.26894531465953
Episode 1176	length: 999	reward: 21.715563150246563	 ewma reward: -23.86971989141422
Episode 1177	length: 999	reward: 61.16634426067505	 ewma reward: -19.617916683809757
Episode 1178	length: 142	reward: -1.1658899092580697	 ewma reward: -18.69531534508217
Episode 1179	length: 999	reward: -8.499679466500147	 ewma reward: -18.18553355115307
Episode 1180	length: 999	reward: 40.27031155087214	 ewma reward: -15.26274129605181
Episode 1181	length: 283	reward: -79.39519670980695	 ewma reward: -18.46936406673957
Episode 1182	length: 999	reward: 17.490620816004615	 ewma reward: -16.671364822602357
Episode 1183	length: 999	reward: 22.268379782138325	 ewma reward: -14.724377592365322
Episode 1184	length: 240	reward: -47.24822890908854	 ewma reward: -16.35057015820148
Episode 1185	length: 999	reward: 33.36746850856212	 ewma reward: -13.864668224863301
Episode 1186	length: 219	reward: -28.636033748176573	 ewma rew

Episode 1273	length: 262	reward: -199.07922125174989	 ewma reward: -32.94454027816431
Episode 1274	length: 120	reward: 1.017443280249907	 ewma reward: -31.246441100243597
Episode 1275	length: 999	reward: 40.89212083838578	 ewma reward: -27.63951300331213
Episode 1276	length: 999	reward: 8.545053350260863	 ewma reward: -25.83028468563348
Episode 1277	length: 129	reward: -11.537356678918385	 ewma reward: -25.115638285297724
Episode 1278	length: 999	reward: -39.37423319849276	 ewma reward: -25.828568030957474
Episode 1279	length: 162	reward: 41.58771652831612	 ewma reward: -22.457753802993793
Episode 1280	length: 999	reward: -51.228644079789035	 ewma reward: -23.896298316833555
Episode 1281	length: 999	reward: 3.7166565079919796	 ewma reward: -22.515650575592275
Episode 1282	length: 119	reward: 7.218310558780345	 ewma reward: -21.028952518873645
Episode 1283	length: 225	reward: 59.134953392406175	 ewma reward: -17.02075722330965
Episode 1284	length: 269	reward: -112.42752464130498	 ewma r

Episode 1370	length: 999	reward: 77.02793894325555	 ewma reward: 4.9620132610864776
Episode 1371	length: 999	reward: 82.63211893240924	 ewma reward: 8.845518544652617
Episode 1372	length: 93	reward: 1.1833722153935895	 ewma reward: 8.462411228189666
Episode 1373	length: 138	reward: 31.836632102988318	 ewma reward: 9.631122271929598
Episode 1374	length: 173	reward: 6.465662774398879	 ewma reward: 9.472849297053061
Episode 1375	length: 116	reward: -35.69419996605478	 ewma reward: 7.214496833897668
Episode 1376	length: 134	reward: 19.130263320674175	 ewma reward: 7.810285158236493
Episode 1377	length: 139	reward: 11.88639753764869	 ewma reward: 8.014090777207102
Episode 1378	length: 148	reward: 20.77458559218725	 ewma reward: 8.652115517956108
Episode 1379	length: 999	reward: 15.424702519204839	 ewma reward: 8.990744868018545
Episode 1380	length: 283	reward: -151.65246819006336	 ewma reward: 0.9585842151144499
Episode 1381	length: 999	reward: -12.02480182508098	 ewma reward: 0.30941491310

Episode 1468	length: 127	reward: 30.202944208760556	 ewma reward: 19.000160966175493
Episode 1469	length: 261	reward: -57.117064262828805	 ewma reward: 15.194299704725278
Episode 1470	length: 128	reward: 58.01009914327227	 ewma reward: 17.33508967665263
Episode 1471	length: 168	reward: 16.03805467829551	 ewma reward: 17.270237926734772
Episode 1472	length: 154	reward: 53.96364975885524	 ewma reward: 19.104908518340796
Episode 1473	length: 137	reward: 20.985222679748674	 ewma reward: 19.198924226411187
Episode 1474	length: 999	reward: 93.50574208657882	 ewma reward: 22.914265119419568
Episode 1475	length: 173	reward: 45.81747127735292	 ewma reward: 24.059425427316235
Episode 1476	length: 104	reward: 51.55707634161908	 ewma reward: 25.434307973031377
Episode 1477	length: 239	reward: -109.05513460434695	 ewma reward: 18.70983584416246
Episode 1478	length: 192	reward: 61.24522352114073	 ewma reward: 20.836605228011372
Episode 1479	length: 148	reward: -61.679273857404695	 ewma reward: 16.71

Episode 1566	length: 140	reward: 54.5034276961824	 ewma reward: 26.219457753043187
Episode 1567	length: 156	reward: 27.21482656117051	 ewma reward: 26.269226193449555
Episode 1568	length: 112	reward: -13.695150232704648	 ewma reward: 24.271007372141842
Episode 1569	length: 172	reward: 6.813841810985053	 ewma reward: 23.398149094084
Episode 1570	length: 152	reward: 1.5470729215588506	 ewma reward: 22.30559528545774
Episode 1571	length: 999	reward: 15.263449401846227	 ewma reward: 21.953487991277164
Episode 1572	length: 139	reward: 2.705314262146146	 ewma reward: 20.991079304820612
Episode 1573	length: 146	reward: 27.44216128999456	 ewma reward: 21.313633404079308
Episode 1574	length: 999	reward: 82.9027057904279	 ewma reward: 24.39308702339674
Episode 1575	length: 999	reward: 17.566628469780174	 ewma reward: 24.05176409571591
Episode 1576	length: 156	reward: 5.246581282211267	 ewma reward: 23.111504955040676
Episode 1577	length: 132	reward: -8.622376901307675	 ewma reward: 21.5248108622

Episode 1664	length: 999	reward: 97.97627387567886	 ewma reward: 35.9616260930654
Episode 1665	length: 204	reward: -17.45295403701809	 ewma reward: 33.29089708656122
Episode 1666	length: 187	reward: -22.444007327294543	 ewma reward: 30.504151865868433
Episode 1667	length: 999	reward: 113.18872788480888	 ewma reward: 34.63838066681545
Episode 1668	length: 999	reward: 86.96969371105311	 ewma reward: 37.25494631902733
Episode 1669	length: 335	reward: -139.0142222866562	 ewma reward: 28.441487888743154
Episode 1670	length: 999	reward: 87.75406908605088	 ewma reward: 31.40711694860854
Episode 1671	length: 285	reward: -40.87380991335135	 ewma reward: 27.793070605510543
Episode 1672	length: 164	reward: 12.777519272699337	 ewma reward: 27.042293038869982
Episode 1673	length: 149	reward: 21.326983030664948	 ewma reward: 26.756527538459732
Episode 1674	length: 999	reward: 26.05580280320123	 ewma reward: 26.72149130169681
Episode 1675	length: 999	reward: 93.10900821289589	 ewma reward: 30.0408671

Episode 1762	length: 999	reward: 61.36384633163318	 ewma reward: 56.40049097664804
Episode 1763	length: 999	reward: 107.13015304064768	 ewma reward: 58.936974079848014
Episode 1764	length: 999	reward: 131.43453559379483	 ewma reward: 62.56185215554535
Episode 1765	length: 999	reward: 59.348529112967285	 ewma reward: 62.40118600341645
Episode 1766	length: 999	reward: 44.63258692519101	 ewma reward: 61.51275604950518
Episode 1767	length: 999	reward: 85.31659306866132	 ewma reward: 62.70294790046298
Episode 1768	length: 999	reward: 68.0492016746589	 ewma reward: 62.970260589172774
Episode 1769	length: 999	reward: 5.557834564965506	 ewma reward: 60.09963928796241
Episode 1770	length: 224	reward: -3.9610817965410945	 ewma reward: 56.89660323373723
Episode 1771	length: 138	reward: 36.479917611783065	 ewma reward: 55.87576895263952
Episode 1772	length: 434	reward: -99.75932801109767	 ewma reward: 48.09401410445266
Episode 1773	length: 266	reward: 29.502206355138526	 ewma reward: 47.1644237169

Episode 1860	length: 249	reward: -117.84051259210528	 ewma reward: 54.459858148699084
Episode 1861	length: 999	reward: 37.69355275524703	 ewma reward: 53.62154287902648
Episode 1862	length: 191	reward: -20.96704633330529	 ewma reward: 49.892113418409885
Episode 1863	length: 167	reward: 20.09911214618502	 ewma reward: 48.402463354798634
Episode 1864	length: 999	reward: 55.758965511875545	 ewma reward: 48.77028846265248
Episode 1865	length: 999	reward: 90.99443694502855	 ewma reward: 50.88149588677128
Episode 1866	length: 999	reward: 12.002194822452429	 ewma reward: 48.93753083355534
Episode 1867	length: 999	reward: 54.104199766893444	 ewma reward: 49.195864280222246
Episode 1868	length: 224	reward: -4.3788569663905434	 ewma reward: 46.5171282178916
Episode 1869	length: 331	reward: 200.19802310566473	 ewma reward: 54.20117296228026
Episode 1870	length: 999	reward: 166.10230244865943	 ewma reward: 59.79622943659922
Episode 1871	length: 176	reward: 18.41362372743984	 ewma reward: 57.727099

Episode 1959	length: 999	reward: 150.6115981943385	 ewma reward: 91.68697185137628
Episode 1960	length: 999	reward: 172.04028148444974	 ewma reward: 95.70463733302995
Episode 1961	length: 999	reward: 87.35272562435564	 ewma reward: 95.28704174759623
Episode 1962	length: 999	reward: 44.16771036641276	 ewma reward: 92.73107517853705
Episode 1963	length: 999	reward: 110.48634913652019	 ewma reward: 93.61883887643621
Episode 1964	length: 999	reward: 148.4401415580607	 ewma reward: 96.35990401051743
Episode 1965	length: 999	reward: 101.15317456901371	 ewma reward: 96.59956753844224
Episode 1966	length: 999	reward: 86.20525495382628	 ewma reward: 96.07985190921144
Episode 1967	length: 999	reward: 61.75742078053601	 ewma reward: 94.36373035277767
Episode 1968	length: 999	reward: 97.3186484849865	 ewma reward: 94.5114762593881
Episode 1969	length: 999	reward: 149.95922000592728	 ewma reward: 97.28386344671506
Episode 1970	length: 281	reward: 9.516866880222054	 ewma reward: 92.8955136183904
Epi

Episode 2057	length: 999	reward: 148.9960980012262	 ewma reward: 116.56107366873378
Episode 2058	length: 999	reward: 97.50538656834001	 ewma reward: 115.6082893137141
Episode 2059	length: 186	reward: -34.96876405034827	 ewma reward: 108.07943664551097
Episode 2060	length: 999	reward: 74.54432848991598	 ewma reward: 106.4026812377312
Episode 2061	length: 999	reward: 173.4263242664202	 ewma reward: 109.75386338916564
Episode 2062	length: 999	reward: 162.28779025384162	 ewma reward: 112.38055973239945
Episode 2063	length: 999	reward: 147.32371642523847	 ewma reward: 114.12771756704139
Episode 2064	length: 188	reward: -32.657255003278934	 ewma reward: 106.78846893852536
Episode 2065	length: 172	reward: 13.570628302411052	 ewma reward: 102.12757690671964
Episode 2066	length: 250	reward: -12.85637058254244	 ewma reward: 96.37837953225653
Episode 2067	length: 999	reward: 50.5581068277494	 ewma reward: 94.08736589703116
Episode 2068	length: 999	reward: 124.2304880649408	 ewma reward: 95.594522

Episode 2155	length: 650	reward: 221.00004358333794	 ewma reward: 117.76085207367036
Episode 2156	length: 999	reward: 135.5084442671767	 ewma reward: 118.64823168334567
Episode 2157	length: 999	reward: 120.552410785872	 ewma reward: 118.74344063847198
Episode 2158	length: 999	reward: 137.96344965053152	 ewma reward: 119.70444108907495
Episode 2159	length: 999	reward: 131.90112759508037	 ewma reward: 120.3142754143752
Episode 2160	length: 999	reward: 158.10907008172236	 ewma reward: 122.20401514774255
Episode 2161	length: 999	reward: 142.3172647516508	 ewma reward: 123.20967762793796
Episode 2162	length: 999	reward: 134.4395389226052	 ewma reward: 123.77117069267132
Episode 2163	length: 999	reward: 127.71358074375276	 ewma reward: 123.96829119522539
Episode 2164	length: 999	reward: 172.92088086080162	 ewma reward: 126.4159206785042
Episode 2165	length: 144	reward: 74.24867082554476	 ewma reward: 123.80755818585621
Episode 2166	length: 999	reward: 134.1037830379821	 ewma reward: 124.3223

Episode 2253	length: 999	reward: 134.33363766897537	 ewma reward: 123.2884592084059
Episode 2254	length: 999	reward: 113.48101388035437	 ewma reward: 122.79808694200332
Episode 2255	length: 999	reward: 134.94877730960914	 ewma reward: 123.40562146038361
Episode 2256	length: 999	reward: 143.47804604569555	 ewma reward: 124.4092426896492
Episode 2257	length: 999	reward: 138.96873869462723	 ewma reward: 125.1372174898981
Episode 2258	length: 999	reward: 167.65306596452947	 ewma reward: 127.26300991362967
Episode 2259	length: 999	reward: 115.46528736733949	 ewma reward: 126.67312378631516
Episode 2260	length: 999	reward: 145.53133223223696	 ewma reward: 127.61603420861124
Episode 2261	length: 999	reward: 114.21183011849018	 ewma reward: 126.94582400410519
Episode 2262	length: 999	reward: 115.50708735356424	 ewma reward: 126.37388717157815
Episode 2263	length: 999	reward: 102.02578452840724	 ewma reward: 125.1564820394196
Episode 2264	length: 999	reward: 137.87450574287385	 ewma reward: 125

Episode 2351	length: 999	reward: 122.26729798785183	 ewma reward: 117.10566551011539
Episode 2352	length: 999	reward: 148.77136625465965	 ewma reward: 118.6889505473426
Episode 2353	length: 999	reward: 96.34249808451987	 ewma reward: 117.57162792420145
Episode 2354	length: 999	reward: 130.80470330249986	 ewma reward: 118.23328169311637
Episode 2355	length: 999	reward: 110.39253083203835	 ewma reward: 117.84124415006247
Episode 2356	length: 999	reward: 106.1584166437161	 ewma reward: 117.25710277474514
Episode 2357	length: 999	reward: 156.7914251294393	 ewma reward: 119.23381889247985
Episode 2358	length: 999	reward: 131.69333832616545	 ewma reward: 119.85679486416413
Episode 2359	length: 999	reward: 137.70465874198467	 ewma reward: 120.74918805805515
Episode 2360	length: 999	reward: 132.82807142813195	 ewma reward: 121.35313222655898
Episode 2361	length: 237	reward: 36.19994576294525	 ewma reward: 117.0954729033783
Episode 2362	length: 999	reward: 176.4993683746792	 ewma reward: 120.06

Episode 2448	length: 999	reward: 135.36422173761565	 ewma reward: 116.76498300077154
Episode 2449	length: 999	reward: 129.56977715240683	 ewma reward: 117.4052227083533
Episode 2450	length: 999	reward: 177.7444732998416	 ewma reward: 120.42218523792772
Episode 2451	length: 999	reward: 162.66847294132904	 ewma reward: 122.53449962309779
Episode 2452	length: 189	reward: 8.253074783706353	 ewma reward: 116.82042838112821
Episode 2453	length: 193	reward: -7.965045111495613	 ewma reward: 110.58115470649702
Episode 2454	length: 298	reward: 236.9855205864421	 ewma reward: 116.90137300049426
Episode 2455	length: 999	reward: 150.43140562457893	 ewma reward: 118.57787463169849
Episode 2456	length: 999	reward: 188.9805619777783	 ewma reward: 122.09800899900247
Episode 2457	length: 999	reward: 146.31269504588215	 ewma reward: 123.30874330134645
Episode 2458	length: 161	reward: 67.25667246191298	 ewma reward: 120.50613975937478
Episode 2459	length: 999	reward: 149.95171529287595	 ewma reward: 121.9

Episode 2545	length: 999	reward: 120.66766623989577	 ewma reward: 131.27703702134673
Episode 2546	length: 999	reward: 144.27841826149586	 ewma reward: 131.92710608335418
Episode 2547	length: 999	reward: 113.03454997270589	 ewma reward: 130.98247827782174
Episode 2548	length: 999	reward: 101.19665622255974	 ewma reward: 129.49318717505864
Episode 2549	length: 999	reward: 109.99830419406604	 ewma reward: 128.51844302600898
Episode 2550	length: 999	reward: 90.1426576339528	 ewma reward: 126.59965375640617
Episode 2551	length: 999	reward: 149.4061472578177	 ewma reward: 127.73997843147674
Episode 2552	length: 999	reward: 106.88012097585343	 ewma reward: 126.69698555869556
Episode 2553	length: 999	reward: 154.75188136655555	 ewma reward: 128.09973034908856
Episode 2554	length: 999	reward: 160.09689607814673	 ewma reward: 129.69958863554146
Episode 2555	length: 999	reward: 103.58960297562014	 ewma reward: 128.3940893525454
Episode 2556	length: 999	reward: 93.99653150036684	 ewma reward: 126.

Episode 2642	length: 999	reward: 182.10017968650305	 ewma reward: 120.20538626700619
Episode 2643	length: 192	reward: 10.755823574293956	 ewma reward: 114.73290813237057
Episode 2644	length: 999	reward: 149.1892500174368	 ewma reward: 116.45572522662387
Episode 2645	length: 328	reward: 237.10600225429243	 ewma reward: 122.4882390780073
Episode 2646	length: 999	reward: 115.93016711098878	 ewma reward: 122.16033547965637
Episode 2647	length: 999	reward: 168.76296622375995	 ewma reward: 124.49046701686154
Episode 2648	length: 999	reward: 86.92046456923839	 ewma reward: 122.61196689448037
Episode 2649	length: 999	reward: 128.59866392368383	 ewma reward: 122.91130174594055
Episode 2650	length: 999	reward: 156.5184934835487	 ewma reward: 124.59166133282095
Episode 2651	length: 999	reward: 115.37181804071881	 ewma reward: 124.13066916821583
Episode 2652	length: 999	reward: 168.57001553769226	 ewma reward: 126.35263648668965
Episode 2653	length: 999	reward: 165.6957323936263	 ewma reward: 128.

Episode 2740	length: 999	reward: 143.371458994577	 ewma reward: 136.24903934973992
Episode 2741	length: 999	reward: 121.06640589320209	 ewma reward: 135.48990767691302
Episode 2742	length: 999	reward: 124.13260091510622	 ewma reward: 134.92204233882268
Episode 2743	length: 999	reward: 170.68862864287817	 ewma reward: 136.71037165402547
Episode 2744	length: 999	reward: 157.31861727820717	 ewma reward: 137.74078393523456
Episode 2745	length: 999	reward: 142.53727578990348	 ewma reward: 137.98060852796797
Episode 2746	length: 999	reward: 159.9564257917788	 ewma reward: 139.0793993911585
Episode 2747	length: 999	reward: 123.2670048888957	 ewma reward: 138.28877966604534
Episode 2748	length: 999	reward: 159.5269627225271	 ewma reward: 139.35068881886943
Episode 2749	length: 999	reward: 122.26657492684043	 ewma reward: 138.496483124268
Episode 2750	length: 999	reward: 149.1594820041638	 ewma reward: 139.02963306826277
Episode 2751	length: 999	reward: 154.1865941883286	 ewma reward: 139.78748

Episode 2838	length: 999	reward: 114.85432533213087	 ewma reward: 125.34633393430666
Episode 2839	length: 989	reward: 225.12887687185946	 ewma reward: 130.3354610811843
Episode 2840	length: 999	reward: 160.94461828844874	 ewma reward: 131.8659189415475
Episode 2841	length: 999	reward: 118.87128845460931	 ewma reward: 131.2161874172006
Episode 2842	length: 999	reward: 189.2498193749351	 ewma reward: 134.1178690150873
Episode 2843	length: 999	reward: 148.383953120875	 ewma reward: 134.83117322037668
Episode 2844	length: 999	reward: 149.31988622200527	 ewma reward: 135.55560887045812
Episode 2845	length: 999	reward: 142.98911597464848	 ewma reward: 135.92728422566762
Episode 2846	length: 999	reward: 142.82721606689609	 ewma reward: 136.27228081772904
Episode 2847	length: 999	reward: 140.36222156753865	 ewma reward: 136.47677785521952
Episode 2848	length: 231	reward: 218.56829829257123	 ewma reward: 140.5813538770871
Episode 2849	length: 999	reward: 144.4947340845086	 ewma reward: 140.7770

Episode 2936	length: 415	reward: 219.7483838592786	 ewma reward: 142.1055720844036
Episode 2937	length: 999	reward: 125.29455249261332	 ewma reward: 141.26502110481408
Episode 2938	length: 999	reward: 155.91437468822463	 ewma reward: 141.9974887839846
Episode 2939	length: 999	reward: 173.05652868263934	 ewma reward: 143.55044077891733
Episode 2940	length: 999	reward: 135.7255563803641	 ewma reward: 143.15919655898966
Episode 2941	length: 813	reward: 214.8996491075834	 ewma reward: 146.74621918641932
Episode 2942	length: 999	reward: 133.3044627470167	 ewma reward: 146.0741313644492
Episode 2943	length: 999	reward: 144.6299573000892	 ewma reward: 146.0019226612312
Episode 2944	length: 999	reward: 126.47231447266431	 ewma reward: 145.02544225180284
Episode 2945	length: 999	reward: 118.63667815066368	 ewma reward: 143.70600404674587
Episode 2946	length: 999	reward: 137.35538791531985	 ewma reward: 143.3884732401746
Episode 2947	length: 173	reward: 23.65003460200215	 ewma reward: 137.401551

Episode 3034	length: 999	reward: 144.00388096721318	 ewma reward: 134.11361717757183
Episode 3035	length: 999	reward: 145.2846649149321	 ewma reward: 134.67216956443983
Episode 3036	length: 999	reward: 141.78953633712044	 ewma reward: 135.02803790307385
Episode 3037	length: 999	reward: 131.11373848224224	 ewma reward: 134.83232293203227
Episode 3038	length: 999	reward: 145.0549791296108	 ewma reward: 135.34345574191119
Episode 3039	length: 999	reward: 157.25012586504786	 ewma reward: 136.438789248068
Episode 3040	length: 999	reward: 143.83456201466183	 ewma reward: 136.80857788639767
Episode 3041	length: 999	reward: 149.1965531586743	 ewma reward: 137.4279766500115
Episode 3042	length: 999	reward: 127.33475579537262	 ewma reward: 136.92331560727953
Episode 3043	length: 999	reward: 131.0460673382011	 ewma reward: 136.6294531938256
Episode 3044	length: 599	reward: 215.5315072199292	 ewma reward: 140.5745558951308
Episode 3045	length: 999	reward: 137.21079611423337	 ewma reward: 140.40636

Episode 3132	length: 999	reward: 172.20093256177944	 ewma reward: 142.72164744084066
Episode 3133	length: 999	reward: 90.24715033265655	 ewma reward: 140.09792258543143
Episode 3134	length: 999	reward: 118.64702559107091	 ewma reward: 139.02537773571342
Episode 3135	length: 999	reward: 141.6472463032518	 ewma reward: 139.15647116409033
Episode 3136	length: 999	reward: 131.40957373179626	 ewma reward: 138.76912629247562
Episode 3137	length: 999	reward: 141.1009304531255	 ewma reward: 138.88571650050812
Episode 3138	length: 999	reward: 93.09269570934833	 ewma reward: 136.59606546095011
Episode 3139	length: 999	reward: 138.5225447070531	 ewma reward: 136.69238942325526
Episode 3140	length: 999	reward: 127.46127308156575	 ewma reward: 136.2308336061708
Episode 3141	length: 999	reward: 141.41674550805902	 ewma reward: 136.49012920126518
Episode 3142	length: 999	reward: 182.06484244971227	 ewma reward: 138.76886486368753
Episode 3143	length: 999	reward: 131.56020455871518	 ewma reward: 138.4

Episode 3230	length: 921	reward: 253.9401725338782	 ewma reward: 144.6394225023468
Episode 3231	length: 999	reward: 151.62404832432642	 ewma reward: 144.98865379344576
Episode 3232	length: 999	reward: 117.45351156551355	 ewma reward: 143.61189668204915
Episode 3233	length: 999	reward: 153.42594066181587	 ewma reward: 144.10259888103747
Episode 3234	length: 999	reward: 127.28900377786549	 ewma reward: 143.26191912587885
Episode 3235	length: 999	reward: 147.56504848411925	 ewma reward: 143.47707559379086
Episode 3236	length: 999	reward: 182.47027371302474	 ewma reward: 145.42673549975254
Episode 3237	length: 999	reward: 155.2230097891376	 ewma reward: 145.91654921422176
Episode 3238	length: 999	reward: 164.80280994563572	 ewma reward: 146.86086225079248
Episode 3239	length: 999	reward: 148.42681057281433	 ewma reward: 146.93915966689357
Episode 3240	length: 275	reward: 243.929024089398	 ewma reward: 151.7886528880188
Episode 3241	length: 155	reward: -6.051386947859797	 ewma reward: 143.8

Episode 3328	length: 999	reward: 151.47189179945022	 ewma reward: 156.122510165856
Episode 3329	length: 999	reward: 147.37457188492002	 ewma reward: 155.68511325180918
Episode 3330	length: 999	reward: 148.4926926082574	 ewma reward: 155.3254922196316
Episode 3331	length: 999	reward: 96.4313196237501	 ewma reward: 152.38078358983753
Episode 3332	length: 999	reward: 149.99853263328876	 ewma reward: 152.26167104201008
Episode 3333	length: 999	reward: 153.34131238349914	 ewma reward: 152.31565310908454
Episode 3334	length: 999	reward: 130.54188555339005	 ewma reward: 151.2269647312998
Episode 3335	length: 999	reward: 161.96022019607943	 ewma reward: 151.76362750453876
Episode 3336	length: 999	reward: 155.16869254525903	 ewma reward: 151.93388075657475
Episode 3337	length: 999	reward: 168.20005565310703	 ewma reward: 152.74718950140135
Episode 3338	length: 999	reward: 118.88336171263524	 ewma reward: 151.05399811196304
Episode 3339	length: 999	reward: 176.4110727631646	 ewma reward: 152.321

Episode 3426	length: 999	reward: 160.80718472929146	 ewma reward: 146.73326274428595
Episode 3427	length: 999	reward: 178.76464758035854	 ewma reward: 148.33483198608957
Episode 3428	length: 999	reward: 169.37448894001054	 ewma reward: 149.38681483378562
Episode 3429	length: 999	reward: 181.35711822418264	 ewma reward: 150.98533000330548
Episode 3430	length: 999	reward: 158.264936703991	 ewma reward: 151.34931033833973
Episode 3431	length: 999	reward: 172.05938274502287	 ewma reward: 152.38481395867387
Episode 3432	length: 999	reward: 149.4473149072192	 ewma reward: 152.23793900610116
Episode 3433	length: 999	reward: 141.31418727220327	 ewma reward: 151.69175141940624
Episode 3434	length: 999	reward: 144.5223688313604	 ewma reward: 151.33328229000395
Episode 3435	length: 999	reward: 141.2135060808279	 ewma reward: 150.82729347954512
Episode 3436	length: 999	reward: 151.1233705276869	 ewma reward: 150.8420973319522
Episode 3437	length: 999	reward: 177.23612193461392	 ewma reward: 152.16

Episode 3524	length: 999	reward: 148.6504408543211	 ewma reward: 139.7157832450206
Episode 3525	length: 999	reward: 179.953688420894	 ewma reward: 141.72767850381427
Episode 3526	length: 999	reward: 192.71405322961564	 ewma reward: 144.27699724010435
Episode 3527	length: 999	reward: 126.38493372034073	 ewma reward: 143.38239406411617
Episode 3528	length: 990	reward: 238.60010796397583	 ewma reward: 148.14327975910916
Episode 3529	length: 247	reward: 222.98972085198625	 ewma reward: 151.885601813753
Episode 3530	length: 146	reward: -14.230953271952657	 ewma reward: 143.57977405946772
Episode 3531	length: 999	reward: 139.06895512780983	 ewma reward: 143.35423311288483
Episode 3532	length: 999	reward: 177.20282211102858	 ewma reward: 145.04666256279202
Episode 3533	length: 999	reward: 179.30921585979087	 ewma reward: 146.75979022764196
Episode 3534	length: 999	reward: 170.121234303008	 ewma reward: 147.92786243141026
Episode 3535	length: 999	reward: 163.17814902454933	 ewma reward: 148.69

Episode 3621	length: 999	reward: 139.88597851105482	 ewma reward: 152.14215170686595
Episode 3622	length: 255	reward: 249.19238023510374	 ewma reward: 156.99466313327784
Episode 3623	length: 135	reward: 62.69729472559112	 ewma reward: 152.2797947128935
Episode 3624	length: 999	reward: 138.3439330853178	 ewma reward: 151.58300163151472
Episode 3625	length: 334	reward: 243.26233997754244	 ewma reward: 156.1669685488161
Episode 3626	length: 999	reward: 152.64131431741805	 ewma reward: 155.9906858372462
Episode 3627	length: 999	reward: 153.41111564098728	 ewma reward: 155.86170732743324
Episode 3628	length: 999	reward: 135.77477023484596	 ewma reward: 154.8573604728039
Episode 3629	length: 999	reward: 189.40667952658563	 ewma reward: 156.58482642549296
Episode 3630	length: 999	reward: 164.4652314230472	 ewma reward: 156.97884667537068
Episode 3631	length: 999	reward: 160.9112207953222	 ewma reward: 157.17546538136824
Episode 3632	length: 999	reward: 169.3004008392642	 ewma reward: 157.7817

Episode 3719	length: 999	reward: 159.43942993664535	 ewma reward: 149.30406649974248
Episode 3720	length: 999	reward: 149.3556499844429	 ewma reward: 149.3066456739775
Episode 3721	length: 999	reward: 156.5930850832286	 ewma reward: 149.67096764444005
Episode 3722	length: 999	reward: 133.9110096195476	 ewma reward: 148.88296974319542
Episode 3723	length: 999	reward: 120.89572663174967	 ewma reward: 147.48360758762314
Episode 3724	length: 999	reward: 148.8819812470647	 ewma reward: 147.55352627059523
Episode 3725	length: 999	reward: 129.72716747009386	 ewma reward: 146.66220833057014
Episode 3726	length: 999	reward: 158.6748638549425	 ewma reward: 147.26284110678876
Episode 3727	length: 999	reward: 159.81851642645574	 ewma reward: 147.8906248727721
Episode 3728	length: 999	reward: 127.93669786405499	 ewma reward: 146.8929285223362
Episode 3729	length: 999	reward: 161.7572396429253	 ewma reward: 147.63614407836565
Episode 3730	length: 999	reward: 169.02218276836638	 ewma reward: 148.7054

Episode 3817	length: 999	reward: 154.58539600478298	 ewma reward: 150.47215590009728
Episode 3818	length: 156	reward: 63.21306542760203	 ewma reward: 146.10920137647253
Episode 3819	length: 997	reward: 256.2986046942767	 ewma reward: 151.61867154236273
Episode 3820	length: 999	reward: 132.5332130117447	 ewma reward: 150.66439861583183
Episode 3821	length: 999	reward: 190.43576656671345	 ewma reward: 152.6529670133759
Episode 3822	length: 999	reward: 159.30924500524017	 ewma reward: 152.98578091296912
Episode 3823	length: 999	reward: 159.82124692209734	 ewma reward: 153.32755421342551
Episode 3824	length: 999	reward: 136.43983751934215	 ewma reward: 152.48316837872136
Episode 3825	length: 999	reward: 117.39155076333526	 ewma reward: 150.72858749795205
Episode 3826	length: 239	reward: 284.05150150334913	 ewma reward: 157.3947331982219
Episode 3827	length: 999	reward: 161.64861643439303	 ewma reward: 157.60742736003044
Episode 3828	length: 999	reward: 138.4572308430301	 ewma reward: 156.6

Episode 3915	length: 999	reward: 141.02929471141508	 ewma reward: 151.65811909246094
Episode 3916	length: 999	reward: 162.0527476280244	 ewma reward: 152.17785051923912
Episode 3917	length: 999	reward: 139.11425336146945	 ewma reward: 151.52467066135063
Episode 3918	length: 999	reward: 137.46469474196124	 ewma reward: 150.82167186538115
Episode 3919	length: 999	reward: 119.5476808300369	 ewma reward: 149.25797231361392
Episode 3920	length: 999	reward: 138.6614266697197	 ewma reward: 148.72814503141922
Episode 3921	length: 999	reward: 140.97699667147066	 ewma reward: 148.3405876134218
Episode 3922	length: 999	reward: 154.1283115073544	 ewma reward: 148.62997380811842
Episode 3923	length: 999	reward: 145.55871018410457	 ewma reward: 148.47641062691773
Episode 3924	length: 426	reward: 215.11861849429604	 ewma reward: 151.80852102028666
Episode 3925	length: 999	reward: 133.35134098015996	 ewma reward: 150.88566201828033
Episode 3926	length: 999	reward: 143.6197154598907	 ewma reward: 150.5

Episode 4013	length: 999	reward: 154.1778031748344	 ewma reward: 158.7846890828897
Episode 4014	length: 999	reward: 169.05893665967992	 ewma reward: 159.2984014617292
Episode 4015	length: 999	reward: 118.67046694966069	 ewma reward: 157.26700473612578
Episode 4016	length: 999	reward: 187.36464933150722	 ewma reward: 158.77188696589485
Episode 4017	length: 999	reward: 189.53452410286363	 ewma reward: 160.3100188227433
Episode 4018	length: 999	reward: 128.35771459068584	 ewma reward: 158.71240361114045
Episode 4019	length: 999	reward: 126.68441506294211	 ewma reward: 157.1110041837305
Episode 4020	length: 999	reward: 117.41186757109931	 ewma reward: 155.12604735309893
Episode 4021	length: 999	reward: 160.51730438592287	 ewma reward: 155.39561020474014
Episode 4022	length: 999	reward: 150.78984583737264	 ewma reward: 155.16532198637177
Episode 4023	length: 153	reward: 34.72569431424765	 ewma reward: 149.14334060276553
Episode 4024	length: 999	reward: 177.64104307567666	 ewma reward: 150.5

Episode 4110	length: 999	reward: 136.2732176033204	 ewma reward: 156.76863626015754
Episode 4111	length: 999	reward: 148.87685531086922	 ewma reward: 156.3740472126931
Episode 4112	length: 999	reward: 165.04138527715858	 ewma reward: 156.80741411591637
Episode 4113	length: 999	reward: 124.49013577136945	 ewma reward: 155.19155019868901
Episode 4114	length: 999	reward: 116.37735825926156	 ewma reward: 153.25084060171764
Episode 4115	length: 386	reward: 246.0138497092865	 ewma reward: 157.88899105709606
Episode 4116	length: 999	reward: 164.52571684808962	 ewma reward: 158.22082734664573
Episode 4117	length: 999	reward: 138.04675662916145	 ewma reward: 157.21212381077152
Episode 4118	length: 999	reward: 174.600791830565	 ewma reward: 158.08155721176118
Episode 4119	length: 999	reward: 131.76009293966587	 ewma reward: 156.7654839981564
Episode 4120	length: 999	reward: 154.72396580606812	 ewma reward: 156.663408088552
Episode 4121	length: 999	reward: 139.84699364072713	 ewma reward: 155.822

Episode 4207	length: 999	reward: 151.81357996859472	 ewma reward: 151.6872926870313
Episode 4208	length: 999	reward: 178.2860748265859	 ewma reward: 153.017231794009
Episode 4209	length: 999	reward: 130.32528703592652	 ewma reward: 151.88263455610488
Episode 4210	length: 252	reward: 258.8379449678164	 ewma reward: 157.23040007669044
Episode 4211	length: 999	reward: 141.19863240714466	 ewma reward: 156.42881169321316
Episode 4212	length: 999	reward: 142.79805969332793	 ewma reward: 155.7472740932189
Episode 4213	length: 999	reward: 185.9510549688833	 ewma reward: 157.2574631370021
Episode 4214	length: 999	reward: 117.7431399590829	 ewma reward: 155.28174697810616
Episode 4215	length: 999	reward: 192.97303063461132	 ewma reward: 157.1663111609314
Episode 4216	length: 999	reward: 160.05644392882274	 ewma reward: 157.31081779932597
Episode 4217	length: 999	reward: 160.56880877210887	 ewma reward: 157.47371734796513
Episode 4218	length: 999	reward: 196.62763005312166	 ewma reward: 159.43141

Episode 4305	length: 999	reward: 174.7869314438045	 ewma reward: 159.24539699753265
Episode 4306	length: 999	reward: 112.39599895094157	 ewma reward: 156.9029270952031
Episode 4307	length: 999	reward: 125.20369394747108	 ewma reward: 155.31796543781647
Episode 4308	length: 999	reward: 153.44486355339768	 ewma reward: 155.22431034359553
Episode 4309	length: 999	reward: 143.82086631795772	 ewma reward: 154.65413814231363
Episode 4310	length: 999	reward: 134.74003592108897	 ewma reward: 153.65843303125237
Episode 4311	length: 999	reward: 135.81152192149733	 ewma reward: 152.7660874757646
Episode 4312	length: 999	reward: 148.09503676411245	 ewma reward: 152.532534940182
Episode 4313	length: 999	reward: 160.1455196550179	 ewma reward: 152.91318417592376
Episode 4314	length: 999	reward: 195.55502371514677	 ewma reward: 155.04527615288492
Episode 4315	length: 999	reward: 126.22690977765096	 ewma reward: 153.60435783412322
Episode 4316	length: 999	reward: 179.04555363325196	 ewma reward: 154.8

Episode 4403	length: 999	reward: 139.6247508233624	 ewma reward: 151.09695423755127
Episode 4404	length: 999	reward: 193.56468661785303	 ewma reward: 153.22034085656637
Episode 4405	length: 999	reward: 175.58940732945908	 ewma reward: 154.338794180211
Episode 4406	length: 999	reward: 115.81893678456801	 ewma reward: 152.41280131042888
Episode 4407	length: 999	reward: 180.24424890694027	 ewma reward: 153.80437369025444
Episode 4408	length: 999	reward: 171.78573900547997	 ewma reward: 154.7034419560157
Episode 4409	length: 999	reward: 132.40437361204317	 ewma reward: 153.58848853881707
Episode 4410	length: 999	reward: 164.38267626677518	 ewma reward: 154.12819792521498
Episode 4411	length: 999	reward: 181.58657177652339	 ewma reward: 155.50111661778038
Episode 4412	length: 999	reward: 140.03543583703285	 ewma reward: 154.727832578743
Episode 4413	length: 999	reward: 152.94707351821978	 ewma reward: 154.63879462571683
Episode 4414	length: 999	reward: 145.7588139092592	 ewma reward: 154.19

Episode 4501	length: 999	reward: 150.51609056198924	 ewma reward: 150.75374091083216
Episode 4502	length: 999	reward: 117.60179567610916	 ewma reward: 149.096143649096
Episode 4503	length: 999	reward: 151.47921390399824	 ewma reward: 149.2152971618411
Episode 4504	length: 110	reward: -3.9859648194464796	 ewma reward: 141.55523406277672
Episode 4505	length: 999	reward: 153.9186172928099	 ewma reward: 142.1734032242784
Episode 4506	length: 999	reward: 142.32278560729657	 ewma reward: 142.1808723434293
Episode 4507	length: 999	reward: 192.24792190418177	 ewma reward: 144.6842248214669
Episode 4508	length: 999	reward: 142.77484797745433	 ewma reward: 144.58875597926627
Episode 4509	length: 999	reward: 145.53665918102942	 ewma reward: 144.6361511393544
Episode 4510	length: 999	reward: 136.6325484910563	 ewma reward: 144.23597100693948
Episode 4511	length: 999	reward: 189.7082389265794	 ewma reward: 146.5095844029215
Episode 4512	length: 999	reward: 172.8476762837556	 ewma reward: 147.826488

Episode 4599	length: 999	reward: 135.8211704885594	 ewma reward: 151.69903969968337
Episode 4600	length: 999	reward: 187.83243381114326	 ewma reward: 153.50570940525637
Episode 4601	length: 999	reward: 128.1367468001265	 ewma reward: 152.23726127499987
Episode 4602	length: 999	reward: 124.22276991038147	 ewma reward: 150.83653670676892
Episode 4603	length: 999	reward: 163.00280385289165	 ewma reward: 151.44485006407507
Episode 4604	length: 999	reward: 169.1956975722165	 ewma reward: 152.33239243948213
Episode 4605	length: 999	reward: 149.68334650358122	 ewma reward: 152.1999401426871
Episode 4606	length: 999	reward: 169.73713402166166	 ewma reward: 153.07679983663581
Episode 4607	length: 999	reward: 149.73996053522262	 ewma reward: 152.90995787156515
Episode 4608	length: 999	reward: 175.15950059538858	 ewma reward: 154.02243500775631
Episode 4609	length: 999	reward: 130.96470414941408	 ewma reward: 152.8695484648392
Episode 4610	length: 999	reward: 180.20002903575403	 ewma reward: 154.

Episode 4697	length: 999	reward: 172.14617542923645	 ewma reward: 159.8434380962536
Episode 4698	length: 999	reward: 129.40579279499323	 ewma reward: 158.32155583119058
Episode 4699	length: 999	reward: 132.72215504738844	 ewma reward: 157.04158579200046
Episode 4700	length: 999	reward: 146.31943043623758	 ewma reward: 156.50547802421232
Episode 4701	length: 999	reward: 181.43614009629783	 ewma reward: 157.7520111278166
Episode 4702	length: 999	reward: 122.05616566476475	 ewma reward: 155.967218854664
Episode 4703	length: 999	reward: 125.1017247174499	 ewma reward: 154.42394414780327
Episode 4704	length: 999	reward: 153.75737933311132	 ewma reward: 154.39061590706868
Episode 4705	length: 999	reward: 150.1153040695089	 ewma reward: 154.17685031519068
Episode 4706	length: 999	reward: 143.2340583190351	 ewma reward: 153.6297107153829
Episode 4707	length: 999	reward: 117.7806157377871	 ewma reward: 151.8372559665031
Episode 4708	length: 999	reward: 189.07484910648887	 ewma reward: 153.69913

Episode 4795	length: 999	reward: 147.34869134864599	 ewma reward: 159.2152688919361
Episode 4796	length: 718	reward: 267.81156891117547	 ewma reward: 164.64508389289807
Episode 4797	length: 999	reward: 173.1001058924575	 ewma reward: 165.06783499287602
Episode 4798	length: 999	reward: 132.1091205290784	 ewma reward: 163.4198992696861
Episode 4799	length: 999	reward: 152.42489439834608	 ewma reward: 162.87014902611912
Episode 4800	length: 999	reward: 119.04112980713637	 ewma reward: 160.67869806516998
Episode 4801	length: 999	reward: 124.4031505606169	 ewma reward: 158.86492068994232
Episode 4802	length: 999	reward: 121.76760618533203	 ewma reward: 157.0100549647118
Episode 4803	length: 999	reward: 171.4130978341351	 ewma reward: 157.73020710818298
Episode 4804	length: 999	reward: 140.94650869259374	 ewma reward: 156.89102218740354
Episode 4805	length: 147	reward: 37.112035048990634	 ewma reward: 150.9020728304829
Episode 4806	length: 999	reward: 161.35995011714672	 ewma reward: 151.424

Episode 4892	length: 999	reward: 152.361401906033	 ewma reward: 159.79243990257055
Episode 4893	length: 999	reward: 187.1788721042945	 ewma reward: 161.16176151265674
Episode 4894	length: 999	reward: 144.78966017021142	 ewma reward: 160.34315644553448
Episode 4895	length: 999	reward: 169.93422370920504	 ewma reward: 160.82270980871797
Episode 4896	length: 999	reward: 173.34911196206974	 ewma reward: 161.44902991638554
Episode 4897	length: 999	reward: 110.5127156516077	 ewma reward: 158.90221420314666
Episode 4898	length: 999	reward: 146.043948977	 ewma reward: 158.2593009418393
Episode 4899	length: 999	reward: 155.3054430256063	 ewma reward: 158.11160804602767
Episode 4900	length: 999	reward: 147.28659604252059	 ewma reward: 157.57035744585232
Episode 4901	length: 999	reward: 147.7768725085451	 ewma reward: 157.08068319898695
Episode 4902	length: 999	reward: 111.99261389897508	 ewma reward: 154.82627973398635
Episode 4903	length: 999	reward: 163.44733041975672	 ewma reward: 155.2573322

Episode 4990	length: 999	reward: 185.0218292391218	 ewma reward: 159.18644426885277
Episode 4991	length: 327	reward: 286.83180883190187	 ewma reward: 165.5687124970052
Episode 4992	length: 999	reward: 166.82548534607545	 ewma reward: 165.6315511394587
Episode 4993	length: 999	reward: 150.8052560391569	 ewma reward: 164.89023638444363
Episode 4994	length: 999	reward: 129.85082980677802	 ewma reward: 163.13826605556034
Episode 4995	length: 999	reward: 178.7190777591905	 ewma reward: 163.91730664074183
Episode 4996	length: 999	reward: 183.72864412431738	 ewma reward: 164.9078735149206
Episode 4997	length: 999	reward: 160.39784275235007	 ewma reward: 164.68237197679207
Episode 4998	length: 999	reward: 166.48211077690803	 ewma reward: 164.77235891679786
Episode 4999	length: 999	reward: 171.17406116862733	 ewma reward: 165.09244402938933
Episode 5000	length: 999	reward: 176.87688716938973	 ewma reward: 165.68166618638932
Episode 5001	length: 999	reward: 193.76571421045236	 ewma reward: 167.0

Episode 5088	length: 999	reward: 146.14656370169257	 ewma reward: 150.60395259232328
Episode 5089	length: 999	reward: 186.82284589786588	 ewma reward: 152.41489725760042
Episode 5090	length: 999	reward: 161.30262956342682	 ewma reward: 152.85928387289172
Episode 5091	length: 999	reward: 180.82870375527926	 ewma reward: 154.2577548670111
Episode 5092	length: 999	reward: 122.48514231618176	 ewma reward: 152.66912423946962
Episode 5093	length: 999	reward: 122.36429407419453	 ewma reward: 151.15388273120587
Episode 5094	length: 999	reward: 169.12408967567262	 ewma reward: 152.05239307842922
Episode 5095	length: 999	reward: 179.86728686765102	 ewma reward: 153.4431377678903
Episode 5096	length: 999	reward: 126.12034299061186	 ewma reward: 152.07699802902638
Episode 5097	length: 999	reward: 152.99686262722517	 ewma reward: 152.1229912589363
Episode 5098	length: 999	reward: 173.2148125205768	 ewma reward: 153.17758232201834
Episode 5099	length: 999	reward: 130.2716669340315	 ewma reward: 152.

Episode 5185	length: 999	reward: 161.42416361769276	 ewma reward: 154.2939168826299
Episode 5186	length: 999	reward: 172.43662201438838	 ewma reward: 155.2010521392178
Episode 5187	length: 999	reward: 146.72081879812959	 ewma reward: 154.77704047216338
Episode 5188	length: 999	reward: 132.5970279583732	 ewma reward: 153.66803984647385
Episode 5189	length: 999	reward: 147.84881189927626	 ewma reward: 153.37707844911395
Episode 5190	length: 999	reward: 199.25357707894543	 ewma reward: 155.67090338060552
Episode 5191	length: 999	reward: 171.43908038323073	 ewma reward: 156.45931223073677
Episode 5192	length: 999	reward: 143.90411794587615	 ewma reward: 155.83155251649373
Episode 5193	length: 999	reward: 114.0517706761302	 ewma reward: 153.74256342447555
Episode 5194	length: 999	reward: 171.54183659370287	 ewma reward: 154.63252708293692
Episode 5195	length: 999	reward: 164.14792251483118	 ewma reward: 155.10829685453163
Episode 5196	length: 999	reward: 122.14373594275769	 ewma reward: 153