In [1]:
import numpy as np
import tensorflow as tf
import random
import dqn
from collections import deque
import gym

In [2]:
env = gym.make('CartPole-v0')
env._max_episode_steps = 10001

In [3]:
#Constants defining our neural network
input_size = env.observation_space.shape[0]
output_size = env.action_space.n

dis = 0.9
REPLAY_MEMORY = 50000

In [4]:
def simple_replay_train(DQN, train_batch):
    x_stack = np.empty(0).reshape(0, DQN.input_size)
    y_stack = np.empty(0).reshape(0, DQN.output_size)
    
    #Get stored information from the buffer
    for state, action, reward, next_state, done in train_batch:
        Q = DQN.predict(state)
        
        #terminal?
        if done:
            Q[0, action] = reward
        else:
            #Obtain the Q' values by feeding the new state through our network
            Q[0, action] = reward + dis * np.max(DQN.predict(next_state))
        
        y_stack = np.vstack([y_stack, Q])
        x_stack = np.vstack([x_stack, state])
        
        #Train our network using target and predicted Q values on each episode
        return DQN.update(x_stack, y_stack)

In [5]:
def bot_play(mainDQN):
    #See our trained network in action
    s = env.reset()
    reward_sum = 0
    while True:
        env.render()
        a = np.argmax(mainDQN.predict(s))
        s, reward, done, _ = env.step(a)
        reward_sum += reward
        if done:
            print("Total score: {}".format(reward_sum))
            break

In [6]:
def main():
    max_episodes = 2000
    
    #store the previous observations in replay memory
    replay_buffer = deque()
    
    with tf.Session() as sess:
        mainDQN = dqn.DQN(sess, input_size, output_size)
        init = tf.global_variables_initializer()
        sess.run(init)
        
        for episode in range(max_episodes):
            e = 1. / ((episode / 10) + 1)
            done = False
            step_count = 0
            
            state = env.reset()
            
            while not done:
                if np.random.rand(1) < e:
                    action = env.action_space.sample()
                else:
                    #Choose an action by greedily form the Q-network
                    action = np.argmax(mainDQN.predict(state))
                    
                #Get new state and reward from environment
                next_state, reward, done, _ = env.step(action)
                if done:
                    reward = -100
                
                #Save the experience to our buffer
                replay_buffer.append((state, action, reward, next_state, done))
                if len(replay_buffer) > REPLAY_MEMORY:
                    replay_buffer.popleft()
                
                state = next_state
                step_count += 1
                if step_count > 10000:
                    break
                    
            print("Episode: {}        steps: {}".format(episode, step_count))
            if step_count > 10000:
                pass
                #break
            
            if episode % 10 == 1:
                #Get  a random batch of experiences.
                for _ in range(50):
                    #Minibatch works better
                    minibatch = random.sample(replay_buffer, 10)
                    loss, _ = simple_replay_train(mainDQN, minibatch)
                    print("Loss: ",loss)
        
        bot_play(mainDQN)                    

In [7]:
if __name__ == "__main__":
    main()

Episode: 0        steps: 15
Episode: 1        steps: 11
('Loss: ', 1.1722537)
('Loss: ', 0.90125346)
('Loss: ', 1.2603921)
('Loss: ', 0.43896282)
('Loss: ', 0.98367488)
('Loss: ', 1.304984)
('Loss: ', 1.1757784)
('Loss: ', 1.2202051)
('Loss: ', 4.5860415)
('Loss: ', 0.2540184)
('Loss: ', 0.73231661)
('Loss: ', 1.6549803)
('Loss: ', 0.51394951)
('Loss: ', 0.84542572)
('Loss: ', 5106.4482)
('Loss: ', 0.60345328)
('Loss: ', 0.60955602)
('Loss: ', 0.5880813)
('Loss: ', 0.49556345)
('Loss: ', 0.4667207)
('Loss: ', 0.53523153)
('Loss: ', 0.45913139)
('Loss: ', 0.45068029)
('Loss: ', 0.57226217)
('Loss: ', 0.46301815)
('Loss: ', 0.67181355)
('Loss: ', 1.6276741)
('Loss: ', 0.52704239)
('Loss: ', 4963.4526)
('Loss: ', 0.5224086)
('Loss: ', 0.54289931)
('Loss: ', 5.2387056)
('Loss: ', 0.83078605)
('Loss: ', 0.71789801)
('Loss: ', 0.38849401)
('Loss: ', 0.82782042)
('Loss: ', 1.5658085)
('Loss: ', 1.0549378)
('Loss: ', 0.57794738)
('Loss: ', 16.149874)
('Loss: ', 1.6432564)
('Loss: ', 15.795099)

('Loss: ', 0.22033469)
('Loss: ', 0.40333197)
('Loss: ', 2.4483232)
('Loss: ', 13.956199)
('Loss: ', 2.5594435)
Episode: 62        steps: 386
Episode: 63        steps: 123
Episode: 64        steps: 65
Episode: 65        steps: 62
Episode: 66        steps: 180
Episode: 67        steps: 125
Episode: 68        steps: 63
Episode: 69        steps: 461
Episode: 70        steps: 59
Episode: 71        steps: 133
('Loss: ', 18.118759)
('Loss: ', 6.6934247)
('Loss: ', 2.3510165)
('Loss: ', 13.783148)
('Loss: ', 20.151533)
('Loss: ', 11.910247)
('Loss: ', 20.689079)
('Loss: ', 1.3400339)
('Loss: ', 21.676657)
('Loss: ', 17.175837)
('Loss: ', 5788.4019)
('Loss: ', 13.233074)
('Loss: ', 38.362118)
('Loss: ', 1.9327459)
('Loss: ', 19.79644)
('Loss: ', 8.5487747)
('Loss: ', 45.571201)
('Loss: ', 9.247611)
('Loss: ', 19.8132)
('Loss: ', 18.425928)
('Loss: ', 3721.5747)
('Loss: ', 4.6680803)
('Loss: ', 0.067518227)
('Loss: ', 11.452203)
('Loss: ', 7.2419658)
('Loss: ', 8.9790707)
('Loss: ', 5.4419136)


('Loss: ', 0.10496031)
('Loss: ', 12.007164)
('Loss: ', 11.716558)
Episode: 132        steps: 137
Episode: 133        steps: 56
Episode: 134        steps: 41
Episode: 135        steps: 56
Episode: 136        steps: 107
Episode: 137        steps: 76
Episode: 138        steps: 106
Episode: 139        steps: 103
Episode: 140        steps: 57
Episode: 141        steps: 122
('Loss: ', 64.243057)
('Loss: ', 79.486443)
('Loss: ', 63.227158)
('Loss: ', 22.248526)
('Loss: ', 0.0049789129)
('Loss: ', 16.099659)
('Loss: ', 21.936665)
('Loss: ', 5.740613)
('Loss: ', 0.10150722)
('Loss: ', 1.6849743)
('Loss: ', 9.5475931)
('Loss: ', 78.642006)
('Loss: ', 21.264875)
('Loss: ', 5.0584259)
('Loss: ', 0.40065333)
('Loss: ', 8.5672035)
('Loss: ', 33.292385)
('Loss: ', 6.3846502)
('Loss: ', 9.6821556)
('Loss: ', 9.950182)
('Loss: ', 19.706842)
('Loss: ', 0.081698678)
('Loss: ', 8.4752569)
('Loss: ', 45.325264)
('Loss: ', 8.3459682)
('Loss: ', 15.519416)
('Loss: ', 1.6401191)
('Loss: ', 92.422768)
('Loss:

Episode: 192        steps: 125
Episode: 193        steps: 263
Episode: 194        steps: 321
Episode: 195        steps: 123
Episode: 196        steps: 191
Episode: 197        steps: 125
Episode: 198        steps: 201
Episode: 199        steps: 169
Episode: 200        steps: 222
Episode: 201        steps: 188
('Loss: ', 0.52270293)
('Loss: ', 1.11251)
('Loss: ', 1.8200818)
('Loss: ', 0.60127068)
('Loss: ', 0.064553842)
('Loss: ', 0.064390428)
('Loss: ', 0.32243425)
('Loss: ', 0.27243543)
('Loss: ', 0.029490799)
('Loss: ', 0.48345301)
('Loss: ', 0.31734645)
('Loss: ', 2.204278)
('Loss: ', 0.17209527)
('Loss: ', 0.98100507)
('Loss: ', 0.72156787)
('Loss: ', 0.74137717)
('Loss: ', 0.46853939)
('Loss: ', 0.42901456)
('Loss: ', 0.35010922)
('Loss: ', 0.50267673)
('Loss: ', 0.97494549)
('Loss: ', 5124.8359)
('Loss: ', 1.6400973)
('Loss: ', 0.48217756)
('Loss: ', 0.58345038)
('Loss: ', 0.20797527)
('Loss: ', 0.071709201)
('Loss: ', 5.6838307)
('Loss: ', 0.084000088)
('Loss: ', 0.35561639)
('Lo

Episode: 252        steps: 106
Episode: 253        steps: 712
Episode: 254        steps: 72
Episode: 255        steps: 155
Episode: 256        steps: 70
Episode: 257        steps: 259
Episode: 258        steps: 62
Episode: 259        steps: 72
Episode: 260        steps: 86
Episode: 261        steps: 65
('Loss: ', 0.018707987)
('Loss: ', 3.9960458)
('Loss: ', 1.6940999)
('Loss: ', 0.0029169943)
('Loss: ', 1.0646049)
('Loss: ', 1.2658277)
('Loss: ', 1.3908211)
('Loss: ', 1.161582)
('Loss: ', 4.1011758)
('Loss: ', 2.3469708)
('Loss: ', 10.067896)
('Loss: ', 0.27535385)
('Loss: ', 0.48156503)
('Loss: ', 1.5368389)
('Loss: ', 2.3605866)
('Loss: ', 1.3214728)
('Loss: ', 1.7339293)
('Loss: ', 0.066045068)
('Loss: ', 2.5328908)
('Loss: ', 3.1650441)
('Loss: ', 0.13492651)
('Loss: ', 0.12772046)
('Loss: ', 1.9539053)
('Loss: ', 0.39213467)
('Loss: ', 1.658065)
('Loss: ', 0.11388308)
('Loss: ', 5.441617)
('Loss: ', 0.036167752)
('Loss: ', 0.00054976816)
('Loss: ', 0.49131957)
('Loss: ', 0.426627

Episode: 312        steps: 1190
Episode: 313        steps: 1027
Episode: 314        steps: 1302
Episode: 315        steps: 400
Episode: 316        steps: 1104
Episode: 317        steps: 168
Episode: 318        steps: 3544
Episode: 319        steps: 905
Episode: 320        steps: 860
Episode: 321        steps: 2590
('Loss: ', 6.1990156)
('Loss: ', 2.2432196)
('Loss: ', 0.19025917)
('Loss: ', 0.0054529482)
('Loss: ', 0.9905718)
('Loss: ', 0.14210206)
('Loss: ', 0.057720881)
('Loss: ', 13.263765)
('Loss: ', 2.7615321)
('Loss: ', 1.6640393)
('Loss: ', 0.10586556)
('Loss: ', 1.4090344)
('Loss: ', 1.0654929)
('Loss: ', 1.7713023)
('Loss: ', 1.4178466)
('Loss: ', 1.2612623)
('Loss: ', 0.78751564)
('Loss: ', 0.48532403)
('Loss: ', 0.034426358)
('Loss: ', 0.013447693)
('Loss: ', 1.4217739)
('Loss: ', 8.9019089)
('Loss: ', 1.1505219)
('Loss: ', 2.6911931)
('Loss: ', 0.79316193)
('Loss: ', 0.69262332)
('Loss: ', 0.089062512)
('Loss: ', 3.1059201)
('Loss: ', 0.0020820191)
('Loss: ', 14.437525)
('L

Episode: 372        steps: 555
Episode: 373        steps: 82
Episode: 374        steps: 881
Episode: 375        steps: 1040
Episode: 376        steps: 2597
Episode: 377        steps: 281
Episode: 378        steps: 71
Episode: 379        steps: 91
Episode: 380        steps: 559
Episode: 381        steps: 594
('Loss: ', 1.9613926)
('Loss: ', 1.8821949)
('Loss: ', 4.3745413)
('Loss: ', 4.2772369)
('Loss: ', 1.9655005)
('Loss: ', 1.9246703)
('Loss: ', 0.0014974286)
('Loss: ', 0.0066509149)
('Loss: ', 1.9644446)
('Loss: ', 2.1993097e-05)
('Loss: ', 2.3123195)
('Loss: ', 0.0024277221)
('Loss: ', 1.7599111)
('Loss: ', 0.0052911662)
('Loss: ', 0.2670489)
('Loss: ', 1.2668574)
('Loss: ', 2.4441118)
('Loss: ', 0.40254933)
('Loss: ', 0.0082713561)
('Loss: ', 0.00015592134)
('Loss: ', 0.025385622)
('Loss: ', 0.026093965)
('Loss: ', 0.0069482955)
('Loss: ', 0.16206248)
('Loss: ', 2.3028226)
('Loss: ', 0.072298817)
('Loss: ', 0.033004671)
('Loss: ', 1.5356579)
('Loss: ', 1.7061288)
('Loss: ', 0.1147

Episode: 434        steps: 96
Episode: 435        steps: 122
Episode: 436        steps: 138
Episode: 437        steps: 230
Episode: 438        steps: 116
Episode: 439        steps: 123
Episode: 440        steps: 217
Episode: 441        steps: 98
('Loss: ', 0.010739945)
('Loss: ', 0.74282557)
('Loss: ', 0.05582013)
('Loss: ', 0.18634088)
('Loss: ', 0.048688043)
('Loss: ', 0.058083855)
('Loss: ', 0.40974954)
('Loss: ', 0.12228796)
('Loss: ', 0.50521463)
('Loss: ', 0.03392072)
('Loss: ', 0.048467107)
('Loss: ', 0.94314307)
('Loss: ', 0.0068304534)
('Loss: ', 0.018867593)
('Loss: ', 1.2645863)
('Loss: ', 1.9015563)
('Loss: ', 1.7138399)
('Loss: ', 0.013272123)
('Loss: ', 0.0065581473)
('Loss: ', 0.01719947)
('Loss: ', 0.026700675)
('Loss: ', 0.48753047)
('Loss: ', 0.55429214)
('Loss: ', 0.79736155)
('Loss: ', 0.54740858)
('Loss: ', 1.1740558)
('Loss: ', 1.1483641)
('Loss: ', 0.33379099)
('Loss: ', 2.9070146)
('Loss: ', 1.7126404)
('Loss: ', 2.7009387)
('Loss: ', 2.7054679)
('Loss: ', 0.721

Episode: 494        steps: 31
Episode: 495        steps: 12
Episode: 496        steps: 11
Episode: 497        steps: 25
Episode: 498        steps: 44
Episode: 499        steps: 40
Episode: 500        steps: 19
Episode: 501        steps: 33
('Loss: ', 3.7492855)
('Loss: ', 1.906378)
('Loss: ', 2.2698469)
('Loss: ', 1.3371431)
('Loss: ', 1.9858238)
('Loss: ', 0.65816808)
('Loss: ', 0.92077935)
('Loss: ', 2.5108559)
('Loss: ', 6.6836586)
('Loss: ', 0.034940295)
('Loss: ', 0.2548936)
('Loss: ', 8.7200775)
('Loss: ', 1.6839088)
('Loss: ', 0.072794959)
('Loss: ', 6.0887046)
('Loss: ', 1.4559734)
('Loss: ', 0.03540273)
('Loss: ', 0.23040332)
('Loss: ', 0.76884377)
('Loss: ', 1.2602674)
('Loss: ', 1.5008014)
('Loss: ', 0.74052852)
('Loss: ', 0.68799818)
('Loss: ', 1.6380162)
('Loss: ', 4.5167246)
('Loss: ', 1.7152834)
('Loss: ', 6.1822691)
('Loss: ', 0.059464484)
('Loss: ', 0.16983245)
('Loss: ', 0.21030138)
('Loss: ', 3.7054706)
('Loss: ', 0.31439054)
('Loss: ', 0.0084868763)
('Loss: ', 0.266

Episode: 552        steps: 925
Episode: 553        steps: 56
Episode: 554        steps: 838
Episode: 555        steps: 100
Episode: 556        steps: 61
Episode: 557        steps: 72
Episode: 558        steps: 92
Episode: 559        steps: 152
Episode: 560        steps: 86
Episode: 561        steps: 41
('Loss: ', 2.6006732)
('Loss: ', 0.032102387)
('Loss: ', 1.2733325)
('Loss: ', 4.4980259)
('Loss: ', 0.18223479)
('Loss: ', 0.17569505)
('Loss: ', 0.017303608)
('Loss: ', 1.0002911)
('Loss: ', 0.96505767)
('Loss: ', 0.40475401)
('Loss: ', 0.030987637)
('Loss: ', 0.047028091)
('Loss: ', 0.015204391)
('Loss: ', 0.15416765)
('Loss: ', 0.034508016)
('Loss: ', 0.095083497)
('Loss: ', 1.6714932)
('Loss: ', 0.36745837)
('Loss: ', 0.16230085)
('Loss: ', 2.0719385)
('Loss: ', 4.3424621)
('Loss: ', 0.0018461255)
('Loss: ', 0.012917862)
('Loss: ', 0.35812578)
('Loss: ', 2.1118395)
('Loss: ', 1.8231801)
('Loss: ', 0.69057494)
('Loss: ', 1.2253582)
('Loss: ', 0.03907622)
('Loss: ', 1.0113269)
('Loss:

('Loss: ', 3.9211702)
('Loss: ', 0.34445161)
('Loss: ', 2.7804358)
('Loss: ', 0.5095793)
('Loss: ', 0.58411491)
('Loss: ', 0.14943197)
Episode: 612        steps: 219
Episode: 613        steps: 113
Episode: 614        steps: 93
Episode: 615        steps: 271
Episode: 616        steps: 271
Episode: 617        steps: 876
Episode: 618        steps: 280
Episode: 619        steps: 266
Episode: 620        steps: 464
Episode: 621        steps: 205
('Loss: ', 0.11400015)
('Loss: ', 0.13817221)
('Loss: ', 2.8320112)
('Loss: ', 0.015406304)
('Loss: ', 2.474987)
('Loss: ', 0.061300628)
('Loss: ', 5.4500685)
('Loss: ', 0.18629457)
('Loss: ', 0.70434576)
('Loss: ', 2.4301188)
('Loss: ', 0.0047876695)
('Loss: ', 0.31186372)
('Loss: ', 0.056651168)
('Loss: ', 0.025071295)
('Loss: ', 2.6924655)
('Loss: ', 0.17011113)
('Loss: ', 0.041818995)
('Loss: ', 0.31636086)
('Loss: ', 0.58296305)
('Loss: ', 4.8931751)
('Loss: ', 3.0140989)
('Loss: ', 2.9292233)
('Loss: ', 0.119237)
('Loss: ', 0.019860018)
('Loss:

('Loss: ', 0.045163177)
('Loss: ', 1.8765453)
('Loss: ', 0.52779621)
('Loss: ', 0.079514734)
('Loss: ', 2.3125672)
('Loss: ', 0.12723801)
('Loss: ', 2.0952048)
('Loss: ', 0.09536349)
('Loss: ', 2.9332128)
('Loss: ', 1.7785752)
('Loss: ', 0.11142717)
('Loss: ', 0.026056249)
('Loss: ', 5.0329866)
('Loss: ', 1.5458708)
('Loss: ', 2.926429)
('Loss: ', 0.083732054)
('Loss: ', 3.4076455)
('Loss: ', 0.013578212)
('Loss: ', 4.7832799)
('Loss: ', 0.0036694696)
('Loss: ', 0.013269627)
('Loss: ', 0.11869203)
('Loss: ', 0.0078350836)
('Loss: ', 1.2372713)
('Loss: ', 0.0075788428)
Episode: 672        steps: 497
Episode: 673        steps: 103
Episode: 674        steps: 104
Episode: 675        steps: 127
Episode: 676        steps: 178
Episode: 677        steps: 244
Episode: 678        steps: 212
Episode: 679        steps: 307
Episode: 680        steps: 115
Episode: 681        steps: 401
('Loss: ', 2.3307877)
('Loss: ', 0.81340855)
('Loss: ', 2.8798962)
('Loss: ', 0.19852984)
('Loss: ', 0.020544942)
(

Episode: 732        steps: 299
Episode: 733        steps: 209
Episode: 734        steps: 183
Episode: 735        steps: 382
Episode: 736        steps: 210
Episode: 737        steps: 124
Episode: 738        steps: 104
Episode: 739        steps: 209
Episode: 740        steps: 638
Episode: 741        steps: 663
('Loss: ', 3.5730231)
('Loss: ', 3.3239772)
('Loss: ', 0.44090858)
('Loss: ', 2.639832)
('Loss: ', 0.27702701)
('Loss: ', 3.2433293)
('Loss: ', 0.015773637)
('Loss: ', 0.45844278)
('Loss: ', 0.42400095)
('Loss: ', 0.031237038)
('Loss: ', 0.21355525)
('Loss: ', 0.46235895)
('Loss: ', 3.5835853)
('Loss: ', 0.10649822)
('Loss: ', 3.2265272)
('Loss: ', 0.051366054)
('Loss: ', 3.278564)
('Loss: ', 0.0903816)
('Loss: ', 0.16749193)
('Loss: ', 0.19842912)
('Loss: ', 0.31885087)
('Loss: ', 0.23217666)
('Loss: ', 0.24185748)
('Loss: ', 0.073013)
('Loss: ', 1.3383734)
('Loss: ', 3.4965281)
('Loss: ', 3.4657221)
('Loss: ', 3.4427433)
('Loss: ', 1.5093329)
('Loss: ', 2.6301503)
('Loss: ', 0.23

Episode: 793        steps: 319
Episode: 794        steps: 471
Episode: 795        steps: 323
Episode: 796        steps: 839
Episode: 797        steps: 80
Episode: 798        steps: 380
Episode: 799        steps: 376
Episode: 800        steps: 2761
Episode: 801        steps: 251
('Loss: ', 3.4174001)
('Loss: ', 11.087499)
('Loss: ', 0.021137916)
('Loss: ', 0.093759201)
('Loss: ', 2.585547)
('Loss: ', 10.054557)
('Loss: ', 3.2662287)
('Loss: ', 2.4307032)
('Loss: ', 0.023238728)
('Loss: ', 0.069643147)
('Loss: ', 2.7075276)
('Loss: ', 10.987551)
('Loss: ', 0.010051751)
('Loss: ', 1.9399922)
('Loss: ', 0.020161601)
('Loss: ', 0.023432005)
('Loss: ', 1.5975676)
('Loss: ', 2.110796)
('Loss: ', 0.018077133)
('Loss: ', 5.8640118)
('Loss: ', 0.55899495)
('Loss: ', 0.074542239)
('Loss: ', 1.7121918)
('Loss: ', 0.010282902)
('Loss: ', 0.0027660977)
('Loss: ', 0.096589386)
('Loss: ', 0.0082967971)
('Loss: ', 0.012888553)
('Loss: ', 1.8451934)
('Loss: ', 2.0148385)
('Loss: ', 0.0013264116)
('Loss:

Episode: 852        steps: 121
Episode: 853        steps: 650
Episode: 854        steps: 465
Episode: 855        steps: 245
Episode: 856        steps: 268
Episode: 857        steps: 82
Episode: 858        steps: 1480
Episode: 859        steps: 76
Episode: 860        steps: 1643
Episode: 861        steps: 431
('Loss: ', 1.3805102)
('Loss: ', 0.059725244)
('Loss: ', 0.28811947)
('Loss: ', 0.075423099)
('Loss: ', 1.9068742)
('Loss: ', 1.5996978)
('Loss: ', 0.31318033)
('Loss: ', 4.192328)
('Loss: ', 1.4793973)
('Loss: ', 0.025571004)
('Loss: ', 2.27074)
('Loss: ', 3.3103299)
('Loss: ', 0.0019516537)
('Loss: ', 6.6429586)
('Loss: ', 0.043205217)
('Loss: ', 0.20323834)
('Loss: ', 0.44213781)
('Loss: ', 1.9641906)
('Loss: ', 0.15075235)
('Loss: ', 2.7036891)
('Loss: ', 0.0012834903)
('Loss: ', 0.00012259241)
('Loss: ', 1.9793944)
('Loss: ', 2.0787995)
('Loss: ', 0.24426009)
('Loss: ', 0.23540546)
('Loss: ', 2.9439023)
('Loss: ', 0.0011211812)
('Loss: ', 0.015381203)
('Loss: ', 5.4575257)
('L

('Loss: ', 3.5401611)
('Loss: ', 0.0012695808)
Episode: 912        steps: 2548
Episode: 913        steps: 10001
Episode: 914        steps: 10001
Episode: 915        steps: 107
Episode: 916        steps: 9529
Episode: 917        steps: 4108
Episode: 918        steps: 3432
Episode: 919        steps: 185
Episode: 920        steps: 1549
Episode: 921        steps: 1635
('Loss: ', 1.344833)
('Loss: ', 0.015489007)
('Loss: ', 0.28194973)
('Loss: ', 0.071328454)
('Loss: ', 0.00034585004)
('Loss: ', 2.1162246e-05)
('Loss: ', 0.019596515)
('Loss: ', 1.9099528)
('Loss: ', 1.8931508)
('Loss: ', 0.0085379798)
('Loss: ', 2.4852486)
('Loss: ', 4.6629496)
('Loss: ', 0.0084821247)
('Loss: ', 4.7065959)
('Loss: ', 1.3412315)
('Loss: ', 0.006673234)
('Loss: ', 0.0014155516)
('Loss: ', 4.5056124)
('Loss: ', 2.4930642)
('Loss: ', 10.116991)
('Loss: ', 0.009430116)
('Loss: ', 0.56062794)
('Loss: ', 1.7337228)
('Loss: ', 0.0028200187)
('Loss: ', 0.015739461)
('Loss: ', 2.4752741)
('Loss: ', 0.0025243766)
('L

Episode: 972        steps: 84
Episode: 973        steps: 477
Episode: 974        steps: 331
Episode: 975        steps: 321
Episode: 976        steps: 441
Episode: 977        steps: 103
Episode: 978        steps: 81
Episode: 979        steps: 316
Episode: 980        steps: 296
Episode: 981        steps: 236
('Loss: ', 9.162179)
('Loss: ', 2.9974492)
('Loss: ', 0.33340442)
('Loss: ', 0.21726899)
('Loss: ', 0.21203278)
('Loss: ', 0.082123667)
('Loss: ', 0.31815878)
('Loss: ', 0.012403261)
('Loss: ', 0.09113612)
('Loss: ', 0.0028339215)
('Loss: ', 0.010151551)
('Loss: ', 6.6141815)
('Loss: ', 2.9101756)
('Loss: ', 4.5309777)
('Loss: ', 4.8643141)
('Loss: ', 0.33212125)
('Loss: ', 0.19300595)
('Loss: ', 3.8657212)
('Loss: ', 1.4776729)
('Loss: ', 0.60393977)
('Loss: ', 1.0673921)
('Loss: ', 8.6220865)
('Loss: ', 0.079477616)
('Loss: ', 0.10820089)
('Loss: ', 0.42239612)
('Loss: ', 0.06440112)
('Loss: ', 0.5448193)
('Loss: ', 0.32881418)
('Loss: ', 0.049307063)
('Loss: ', 0.086336635)
('Loss

('Loss: ', 0.060242523)
('Loss: ', 2.3858752)
('Loss: ', 0.025415285)
('Loss: ', 0.0005523088)
('Loss: ', 1.2970737)
Episode: 1032        steps: 106
Episode: 1033        steps: 2639
Episode: 1034        steps: 506
Episode: 1035        steps: 417
Episode: 1036        steps: 1724
Episode: 1037        steps: 2153
Episode: 1038        steps: 1570
Episode: 1039        steps: 1604
Episode: 1040        steps: 798
Episode: 1041        steps: 662
('Loss: ', 3.2749252)
('Loss: ', 0.0017386094)
('Loss: ', 6.695426)
('Loss: ', 9.5127401)
('Loss: ', 2.3567762)
('Loss: ', 2.2025151)
('Loss: ', 0.18006003)
('Loss: ', 0.023832412)
('Loss: ', 0.005829996)
('Loss: ', 0.032267921)
('Loss: ', 0.12119651)
('Loss: ', 1.0203394)
('Loss: ', 0.022684284)
('Loss: ', 1.122911)
('Loss: ', 0.39987323)
('Loss: ', 5.8568172)
('Loss: ', 4.2959781)
('Loss: ', 0.00088971958)
('Loss: ', 0.092749171)
('Loss: ', 0.36265853)
('Loss: ', 1.7340224)
('Loss: ', 6.9777713)
('Loss: ', 1.4246042)
('Loss: ', 7.5057545)
('Loss: ', 

KeyboardInterrupt: 