In [1]:
from __future__ import division

import gym
import numpy as np
import random
import tensorflow as tf
import tensorflow.contrib.slim as slim
import matplotlib.pyplot as plt
import scipy.misc
import scipy.misc
import os
%matplotlib inline

In [2]:
env = gym.make('Breakout-v0')

[2017-05-21 11:57:44,462] Making new env: Breakout-v0


In [3]:
def processState(states):
    states = scipy.misc.imresize(states, (84, 84))


    return np.reshape(states,[-1,21168])

In [4]:
class Qnetwork():
    def __init__(self,h_size):
        #The network recieves a frame from the game, flattened into an array.
        #It then resizes it and processes it through four convolutional layers.
        self.scalarInput =  tf.placeholder(shape=[None,21168],dtype=tf.float32)
        self.imageIn = tf.reshape(self.scalarInput,shape=[-1,84,84,3])
        self.conv1 = slim.conv2d( \
            inputs=self.imageIn,num_outputs=32,kernel_size=[8,8],stride=[4,4],padding='VALID', biases_initializer=None)
        self.conv2 = slim.conv2d( \
            inputs=self.conv1,num_outputs=64,kernel_size=[4,4],stride=[2,2],padding='VALID', biases_initializer=None)
        self.conv3 = slim.conv2d( \
            inputs=self.conv2,num_outputs=64,kernel_size=[3,3],stride=[1,1],padding='VALID', biases_initializer=None)
        self.conv4 = slim.conv2d( \
            inputs=self.conv3,num_outputs=h_size,kernel_size=[7,7],stride=[1,1],padding='VALID', biases_initializer=None)
        
        #We take the output from the final convolutional layer and split it into separate advantage and value streams.
        self.streamAC,self.streamVC = tf.split(self.conv4,2,3)
        self.streamA = slim.flatten(self.streamAC)
        self.streamV = slim.flatten(self.streamVC)
        xavier_init = tf.contrib.layers.xavier_initializer()
        self.AW = tf.Variable(xavier_init([h_size//2,3]))
        self.VW = tf.Variable(xavier_init([h_size//2,1]))
        self.Advantage = tf.matmul(self.streamA,self.AW)
        self.Value = tf.matmul(self.streamV,self.VW)
        
        #Then combine them together to get our final Q-values.
        self.Qout = self.Value + tf.subtract(self.Advantage,tf.reduce_mean(self.Advantage,axis=1,keep_dims=True))
        self.predict = tf.argmax(self.Qout,1)
        
        #Below we obtain the loss by taking the sum of squares difference between the target and prediction Q values.
        self.targetQ = tf.placeholder(shape=[None],dtype=tf.float32)
        self.actions = tf.placeholder(shape=[None],dtype=tf.int32)
        self.actions_onehot = tf.one_hot(self.actions,3,dtype=tf.float32)
        
        self.Q = tf.reduce_sum(tf.multiply(self.Qout, self.actions_onehot), axis=1)
        
        self.td_error = tf.square(self.targetQ - self.Q)
        self.loss = tf.reduce_mean(self.td_error)
        self.trainer = tf.train.AdamOptimizer(learning_rate=0.0001)
        self.updateModel = self.trainer.minimize(self.loss)

In [5]:
mainQN = Qnetwork(h_size)

NameError: name 'h_size' is not defined

In [6]:
class experience_buffer():
    def __init__(self, buffer_size = 1000000):
        self.buffer = []
        self.buffer_size = buffer_size
    
    def add(self,experience):
        if len(self.buffer) + len(experience) >= self.buffer_size:
            self.buffer[0:(len(experience)+len(self.buffer))-self.buffer_size] = []
        self.buffer.extend(experience)
            
    def sample(self,size):
        return np.reshape(np.array(random.sample(self.buffer,size)),[size,5])

In [7]:
def updateTargetGraph(tfVars,tau):
    total_vars = len(tfVars)
    op_holder = []
    for idx,var in enumerate(tfVars[0:total_vars//2]):
        op_holder.append(tfVars[idx+total_vars//2].assign((var.value()*tau) + ((1-tau)*tfVars[idx+total_vars//2].value())))
    return op_holder

def updateTarget(op_holder,sess):
    for op in op_holder:
        sess.run(op)

In [8]:
batch_size = 32 #How many experiences to use for each training step.
update_freq = 4 #How often to perform a training step.
y = .99 #Discount factor on the target Q-values
startE = 1 #Starting chance of random action
endE = 0.1 #Final chance of random action
anneling_steps = 1000000. #How many steps of training to reduce startE to endE.
num_episodes = 10000 #How many episodes of game environment to train network with.
pre_train_steps = 50000 #How many steps of random actions before training begins.
max_epLength = 50000000000 #The max allowed length of our episode.
load_model = False #Whether to load a saved model.
path = "./dqn" #The path to save our model to.
h_size = 256 #The size of the final convolutional layer before splitting it into Advantage and Value streams.
tau = 0.001 #Rate to update target network toward primary network

In [None]:
tf.reset_default_graph()
mainQN = Qnetwork(h_size)
targetQN = Qnetwork(h_size)

init = tf.global_variables_initializer()

saver = tf.train.Saver()

trainables = tf.trainable_variables()

targetOps = updateTargetGraph(trainables,tau)

myBuffer = experience_buffer()

#Set the rate of random action decrease. 
e = startE
stepDrop = (startE - endE)/anneling_steps

#create lists to contain total rewards and steps per episode
jList = []
rList = []
total_steps = 0

#Make a path for our model to be saved in.
if not os.path.exists(path):
    os.makedirs(path)

with tf.Session() as sess:
    sess.run(init)
    updateTarget(targetOps,sess) #Set the target network to be equal to the primary network.
    for i in range(num_episodes):
        episodeBuffer = experience_buffer()
        #Reset environment and get first new observation
        s = env.reset()
        s = processState(s)
        d = False
        rAll = 0
        j = 0
        #The Q-Network
        while j < max_epLength: #If the agent takes longer than 200 moves to reach either of the blocks, end the trial.
            j+=1
            #Choose an action by greedily (with e chance of random action) from the Q-network
            if np.random.rand(1) < e or total_steps < pre_train_steps:
                a = env.action_space.sample()
            else:
                a = sess.run(mainQN.predict,feed_dict={mainQN.scalarInput:s})
            s1,r,d,_ = env.step(a)
            s1 = processState(s1)
            total_steps += 1
            episodeBuffer.add(np.reshape(np.array([s,a,r,s1,d]),[1,5])) #Save the experience to our episode buffer.
            
            if total_steps > pre_train_steps:
                if e > endE:
                    e -= stepDrop
                
                if total_steps % (update_freq) == 0:
                    trainBatch = myBuffer.sample(batch_size) #Get a random batch of experiences.
                    #Below we perform the Double-DQN update to the target Q-values
                    Q1 = sess.run(mainQN.predict,feed_dict={mainQN.scalarInput:np.vstack(trainBatch[:,3])})
                    Q2 = sess.run(targetQN.Qout,feed_dict={targetQN.scalarInput:np.vstack(trainBatch[:,3])})
                    end_multiplier = -(trainBatch[:,4] - 1)
                    doubleQ = Q2[range(batch_size),Q1]
                    targetQ = trainBatch[:,2] + (y*doubleQ * end_multiplier)
                    #Update the network with our target values.
                    _ = sess.run(mainQN.updateModel, \
                        feed_dict={mainQN.scalarInput:np.vstack(trainBatch[:,0]),mainQN.targetQ:targetQ, mainQN.actions:trainBatch[:,1]})
                    
                    updateTarget(targetOps,sess) #Set the target network to be equal to the primary network.
            rAll += r
            s = s1
            
            if d == True:

                break
        
        myBuffer.add(episodeBuffer.buffer)
        jList.append(j)
        rList.append(rAll)
        #Periodically save the model. 
        print(i,np.mean(rList[-10:]), e)
    saver.save(sess,path+'/model-'+str(i)+'.cptk')
print("Percent of succesful episodes: " + str(sum(rList)/num_episodes) + "%")

0 2.0 1
1 2.0 1
2 2.33333333333 1
3 1.75 1
4 1.8 1
5 2.0 1
6 2.0 1
7 2.125 1
8 2.0 1
9 1.8 1
10 1.9 1
11 1.9 1
12 1.7 1
13 1.8 1
14 1.6 1
15 1.3 1
16 1.6 1
17 1.5 1
18 1.5 1
19 1.7 1
20 1.5 1
21 1.4 1
22 1.3 1
23 1.5 1
24 1.8 1
25 2.0 1
26 1.5 1
27 1.3 1
28 1.3 1
29 1.2 1
30 1.1 1
31 1.1 1
32 1.3 1
33 1.1 1
34 0.8 1
35 0.7 1
36 0.7 1
37 0.7 1
38 0.7 1
39 0.6 1
40 0.6 1
41 0.6 1
42 0.6 1
43 0.6 1
44 0.6 1
45 0.7 1
46 0.8 1
47 0.9 1
48 1.0 1
49 1.3 1
50 1.5 1
51 1.5 1
52 1.5 1
53 1.7 1
54 1.8 1
55 1.6 1
56 1.5 1
57 1.5 1
58 1.6 1
59 1.4 1
60 1.3 1
61 1.2 1
62 1.1 1
63 1.0 1
64 0.9 1
65 1.2 1
66 1.2 1
67 1.3 1
68 1.3 1
69 1.4 1
70 1.3 1
71 1.5 1
72 1.4 1
73 1.2 1
74 1.4 1
75 1.2 1
76 1.4 1
77 1.3 1
78 1.2 1
79 1.8 1
80 2.0 1
81 1.8 1
82 1.8 1
83 2.1 1
84 1.9 1
85 1.8 1
86 1.9 1
87 2.0 1
88 2.1 1
89 1.4 1
90 1.2 1
91 1.2 1
92 1.2 1
93 1.1 1
94 1.1 1
95 1.1 1
96 1.2 1
97 1.2 1
98 1.1 1
99 1.1 1
100 1.2 1
101 1.4 1
102 1.8 1
103 1.8 1
104 1.8 1
105 2.0 1
106 1.7 1
107 1.6 1
108 1.4 1
109 1.5

439 1.4 0.947787400001719
440 1.4 0.9475579000017266
441 1.5 0.9473671000017329
442 1.6 0.9470764000017424
443 1.7 0.9467893000017519
444 1.6 0.9465157000017609
445 1.3 0.9463564000017661
446 1.2 0.9461962000017714
447 1.2 0.9459298000017802
448 1.3 0.9457201000017871
449 1.4 0.9455122000017939
450 1.6 0.9452143000018037
451 1.8 0.9449128000018137
452 1.5 0.944749900001819
453 1.5 0.9444196000018299
454 1.4 0.944233300001836
455 1.4 0.9440596000018417
456 1.5 0.9438346000018492
457 1.5 0.9435493000018585
458 1.4 0.9433918000018637
459 1.6 0.9430696000018743
460 1.6 0.9427933000018834
461 1.3 0.9426205000018891
462 1.4 0.942412600001896
463 1.3 0.9421552000019044
464 1.4 0.9418438000019147
465 1.5 0.9416575000019208
466 1.9 0.9412039000019358
467 1.7 0.9410401000019412
468 1.7 0.9408871000019462
469 1.6 0.9406621000019536
470 1.4 0.9404560000019604
471 1.5 0.9402463000019673
472 1.4 0.9400924000019724
473 1.3 0.9398944000019789
474 1.2 0.9396874000019857
475 1.3 0.9394516000019935
476 1

744 1.2 0.8818804000038889
745 1.2 0.8817220000038941
746 1.2 0.8815582000038995
747 1.0 0.8813953000039049
748 1.1 0.8811415000039132
749 1.2 0.8809075000039209
750 0.9 0.8807257000039269
751 1.1 0.8804341000039365
752 1.3 0.8801902000039445
753 0.9 0.8800327000039497
754 1.0 0.8798149000039569
755 1.0 0.8796574000039621
756 1.0 0.8794945000039674
757 1.2 0.8792542000039754
758 2.0 0.8787583000039917
759 2.1 0.8785153000039997
760 2.1 0.878292100004007
761 2.1 0.8779933000040169
762 2.0 0.8777818000040238
763 2.2 0.8775280000040322
764 2.1 0.8773624000040376
765 2.3 0.8770744000040471
766 2.6 0.8767585000040575
767 2.4 0.8765902000040631
768 1.8 0.8761825000040765
769 1.7 0.8759953000040827
770 2.0 0.8756614000040936
771 1.7 0.8754886000040993
772 1.7 0.8752654000041067
773 1.5 0.8751160000041116
774 1.5 0.8749405000041174
775 1.6 0.8746399000041273
776 1.4 0.8744113000041348
777 1.5 0.8742160000041412
778 1.1 0.8740477000041468
779 1.2 0.8737903000041553
780 0.9 0.8736112000041611
78

1047 1.5 0.8135173000061396
1048 1.5 0.8133040000061467
1049 1.3 0.8129503000061583
1050 1.3 0.8126803000061672
1051 1.0 0.8125282000061722
1052 1.0 0.8123770000061772
1053 1.2 0.8120683000061873
1054 1.5 0.8117704000061972
1055 1.3 0.811592200006203
1056 2.0 0.811168300006217
1057 2.0 0.8110018000062225
1058 2.1 0.8107174000062318
1059 1.8 0.8105437000062375
1060 1.8 0.8102980000062456
1061 2.1 0.8100262000062546
1062 2.1 0.80986330000626
1063 2.1 0.8095609000062699
1064 2.1 0.8092576000062799
1065 2.4 0.8089561000062898
1066 1.6 0.8088013000062949
1067 1.9 0.8085088000063045
1068 1.8 0.8082928000063117
1069 2.2 0.807947200006323
1070 2.2 0.8076970000063313
1071 2.0 0.8074846000063383
1072 2.2 0.8072371000063464
1073 2.2 0.8069311000063565
1074 2.0 0.8067151000063636
1075 1.7 0.8065549000063689
1076 1.9 0.8062966000063774
1077 1.6 0.8061238000063831
1078 1.8 0.8058097000063934
1079 1.5 0.8056063000064001
1080 1.5 0.8053255000064093
1081 1.6 0.8050879000064172
1082 1.5 0.80488270000642

1342 1.5 0.7443262000084176
1343 1.3 0.7441579000084232
1344 1.2 0.7439356000084305
1345 1.3 0.7436116000084412
1346 1.2 0.7433560000084496
1347 1.3 0.743159800008456
1348 1.4 0.7429708000084623
1349 1.4 0.7427269000084703
1350 1.5 0.7424236000084803
1351 1.5 0.7421608000084889
1352 1.5 0.7420141000084938
1353 1.5 0.741854800008499
1354 1.6 0.7416190000085068
1355 1.6 0.741309400008517
1356 1.4 0.7411546000085221
1357 2.0 0.7407640000085349
1358 2.1 0.7405156000085431
1359 2.1 0.7402618000085515
1360 2.1 0.739972900008561
1361 2.1 0.7397254000085691
1362 2.1 0.7395724000085742
1363 2.3 0.7393294000085822
1364 2.4 0.7390027000085929
1365 2.3 0.7387669000086007
1366 2.4 0.7385482000086079
1367 2.0 0.7382692000086171
1368 2.1 0.7379434000086278
1369 1.9 0.7377652000086337
1370 1.9 0.7374610000086437
1371 1.9 0.7372153000086518
1372 2.0 0.7370227000086581
1373 2.1 0.7367437000086673
1374 1.8 0.7365943000086722
1375 1.8 0.7363477000086803
1376 2.0 0.7360147000086913
1377 1.9 0.7357681000086

1636 2.4 0.6687685000109053
1637 2.2 0.6685480000109125
1638 2.1 0.6682411000109226
1639 2.1 0.6680098000109302
1640 2.3 0.6676354000109426
1641 2.5 0.6673537000109518
1642 2.1 0.667197100010957
1643 2.0 0.6669460000109653
1644 1.9 0.6667435000109719
1645 2.0 0.6665014000109799
1646 2.0 0.666225100010989
1647 2.2 0.6659578000109978
1648 2.2 0.6656662000110074
1649 2.1 0.6654853000110134
1650 1.9 0.6652081000110225
1651 2.1 0.6648112000110356
1652 2.5 0.6644566000110472
1653 2.5 0.664190200011056
1654 2.6 0.6639247000110647
1655 2.4 0.6637708000110698
1656 2.5 0.6634234000110812
1657 2.4 0.6631705000110896
1658 2.3 0.6629257000110976
1659 2.4 0.6626863000111055
1660 2.2 0.6625387000111104
1661 1.9 0.6622894000111186
1662 1.6 0.6620995000111248
1663 1.5 0.6618880000111318
1664 1.3 0.6617350000111368
1665 1.6 0.6614317000111468
1666 1.5 0.6611851000111549
1667 1.5 0.6609052000111642
1668 1.4 0.6606784000111716
1669 1.2 0.6605263000111766
1670 1.2 0.6603805000111814
1671 1.1 0.660198700011

1931 1.6 0.5923108000134225
1932 1.8 0.5919652000134339
1933 1.4 0.591811300013439
1934 1.8 0.5913712000134534
1935 1.7 0.591140800013461
1936 1.6 0.5909311000134679
1937 1.8 0.5906629000134768
1938 2.1 0.590350600013487
1939 2.2 0.5901346000134942
1940 2.3 0.5897881000135056
1941 3.0 0.5893057000135214
1942 2.8 0.5890375000135303
1943 3.0 0.5887621000135393
1944 2.6 0.5885452000135465
1945 2.9 0.5881330000135601
1946 2.8 0.5879683000135655
1947 2.8 0.587710000013574
1948 2.7 0.5874445000135827
1949 3.1 0.5870314000135963
1950 3.1 0.5867119000136068
1951 2.4 0.5864239000136163
1952 2.4 0.5861764000136245
1953 2.3 0.5859514000136319
1954 2.6 0.5855986000136435
1955 2.3 0.5853493000136517
1956 2.5 0.5851054000136597
1957 2.5 0.5848633000136677
1958 2.3 0.5847112000136727
1959 1.9 0.5845078000136794
1960 1.6 0.5843566000136844
1961 1.7 0.5840722000136938
1962 1.8 0.5837563000137042
1963 1.9 0.583519600013712
1964 1.7 0.5832775000137199
1965 1.7 0.5830066000137288
1966 1.6 0.58280950001373

2225 2.8 0.5123620000160547
2226 2.7 0.5121739000160609
2227 2.6 0.511987600016067
2228 2.7 0.5116762000160773
2229 3.0 0.5111938000160932
2230 3.2 0.5108554000161043
2231 3.2 0.5104243000161185
2232 3.1 0.5102461000161244
2233 2.9 0.5100121000161321
2234 3.0 0.5098294000161381
2235 2.4 0.5096278000161447
2236 2.5 0.5093794000161529
2237 2.6 0.509101300016162
2238 2.7 0.5087404000161739
2239 2.3 0.5084704000161828
2240 2.2 0.5082103000161914
2241 1.8 0.5080033000161982
2242 1.8 0.5077801000162055
2243 1.7 0.5075803000162121
2244 2.0 0.5072014000162246
2245 2.1 0.5069341000162334
2246 2.1 0.5066983000162412
2247 2.1 0.5064580000162491
2248 1.9 0.5061907000162579
2249 1.7 0.5060404000162628
2250 1.7 0.5057686000162718
2251 1.8 0.5055004000162806
2252 1.9 0.5052232000162897
2253 2.1 0.5049199000162997
2254 2.0 0.5046346000163091
2255 1.8 0.5044807000163142
2256 1.7 0.5042782000163208
2257 1.7 0.504028900016329
2258 1.9 0.5036464000163416
2259 2.1 0.50339350001635
2260 2.1 0.50315140001635

2515 2.4 0.4312909000144859
2516 2.6 0.4309399000144758
2517 2.6 0.43067350001446814
2518 2.9 0.4303009000144574
2519 2.8 0.4300975000144516
2520 2.8 0.4298230000144437
2521 2.6 0.4295737000144365
2522 2.5 0.4293550000144302
2523 2.4 0.4291147000144233
2524 2.2 0.4288429000144155
2525 2.4 0.42846850001440473
2526 2.3 0.4281517000143956
2527 2.3 0.4278979000143883
2528 2.2 0.4275766000143791
2529 2.3 0.42732100001437173
2530 2.2 0.42709600001436526
2531 2.3 0.42678280001435626
2532 2.5 0.4264651000143471
2533 2.3 0.4263139000143428
2534 2.3 0.42599260001433353
2535 2.2 0.4256713000143243
2536 2.1 0.42544720001431785
2537 2.4 0.4250737000143071
2538 2.1 0.42492250001430276
2539 2.2 0.42462370001429417
2540 2.3 0.42438250001428723
2541 2.2 0.4241440000142804
2542 2.1 0.4238695000142725
2543 2.1 0.4237201000142682
2544 2.1 0.42344020001426014
2545 1.9 0.4232476000142546
2546 1.9 0.4229731000142467
2547 1.6 0.42271660001423933
2548 1.8 0.4224475000142316
2549 1.7 0.42220630001422466
2550 1.

2805 2.9 0.3516373000121954
2806 2.7 0.35139250001218836
2807 2.6 0.3511117000121803
2808 2.5 0.3509596000121759
2809 2.5 0.3506986000121684
2810 2.4 0.35039080001215955
2811 2.4 0.35011630001215166
2812 2.3 0.34987240001214465
2813 1.9 0.3495412000121351
2814 1.8 0.3493252000121289
2815 1.6 0.34912180001212306
2816 1.6 0.3488446000121151
2817 1.6 0.34855750001210684
2818 1.8 0.34827220001209863
2819 1.8 0.3480274000120916
2820 1.7 0.34780060001208507
2821 1.8 0.34747570001207573
2822 1.8 0.3472660000120697
2823 1.7 0.34700320001206214
2824 2.0 0.34659730001205047
2825 2.2 0.34626250001204084
2826 2.3 0.345955600012032
2827 2.4 0.34564780001202317
2828 2.4 0.3453508000120146
2829 2.3 0.34515010001200885
2830 2.4 0.34490980001200194
2831 2.3 0.3445921000119928
2832 2.4 0.3443239000119851
2833 2.7 0.34390090001197293
2834 2.5 0.3436561000119659
2835 2.5 0.3433186000119562
2836 2.4 0.34303600001194806
2837 2.3 0.34279210001194105
2838 2.5 0.3423907000119295
2839 2.6 0.34209010001192086
28

3094 2.8 0.26854210000980594
3095 2.7 0.26838190000980133
3096 2.7 0.26807320000979246
3097 2.8 0.2677276000097825
3098 2.8 0.2674558000097747
3099 2.8 0.26702290000976225
3100 2.8 0.2667214000097536
3101 2.9 0.26642170000974497
3102 2.8 0.26615260000973723
3103 2.6 0.26586190000972887
3104 2.6 0.26559640000972123
3105 2.7 0.26534620000971404
3106 2.4 0.26518330000970936
3107 2.2 0.2647981000096983
3108 2.4 0.26442910000968767
3109 2.2 0.2641699000096802
3110 2.2 0.2637946000096694
3111 2.1 0.2635048000096611
3112 2.2 0.2632096000096526
3113 2.3 0.26288470000964326
3114 2.3 0.26255800000963386
3115 2.4 0.26228980000962615
3116 2.6 0.26204950000961924
3117 2.4 0.2618389000096132
3118 2.1 0.26155990000960516
3119 2.0 0.2613205000095983
3120 1.9 0.26104060000959023
3121 1.9 0.2607787000095827
3122 1.9 0.26043940000957294
3123 1.6 0.26029000000956865
3124 1.6 0.2599831000095598
3125 1.7 0.25964290000955004
3126 1.7 0.25937560000954235
3127 1.8 0.25909390000953425
3128 1.8 0.258839200009526

3380 1.9 0.18539650000940738
3381 2.2 0.18489430000940843
3382 2.3 0.18464950000940894
3383 2.4 0.18440830000940944
3384 2.3 0.18416080000940996
3385 2.3 0.18381610000941068
3386 2.3 0.18348940000941136
3387 2.2 0.18324640000941186
3388 2.3 0.18290440000941258
3389 2.3 0.18261280000941318
3390 2.4 0.18238510000941366
3391 2.0 0.18214570000941416
3392 2.2 0.18176860000941494
3393 2.3 0.1814014000094157
3394 2.9 0.18093610000941668
3395 3.1 0.18051490000941756
3396 3.2 0.18012880000941836
3397 3.4 0.17975350000941914
3398 3.2 0.17944660000941978
3399 3.0 0.17930080000942009
3400 3.0 0.17911630000942047
3401 3.1 0.17880670000942112
3402 2.7 0.17865460000942143
3403 2.5 0.17844760000942186
3404 1.8 0.17822980000942232
3405 1.6 0.1779562000094229
3406 1.6 0.1776124000094236
3407 1.4 0.17736490000942412
3408 1.5 0.17707240000942473
3409 1.7 0.17677810000942534
3410 1.8 0.1765072000094259
3411 1.7 0.17625070000942644
3412 2.0 0.17583130000942732
3413 2.1 0.17558290000942783
3414 2.2 0.1753318

3665 2.7 0.09999910000958534
3666 2.8 0.09999910000958534
3667 2.8 0.09999910000958534
3668 2.5 0.09999910000958534
3669 2.7 0.09999910000958534
3670 2.8 0.09999910000958534
3671 2.6 0.09999910000958534
3672 2.4 0.09999910000958534
3673 2.4 0.09999910000958534
3674 2.5 0.09999910000958534
3675 2.5 0.09999910000958534
3676 2.4 0.09999910000958534
3677 2.4 0.09999910000958534
3678 2.4 0.09999910000958534
3679 2.4 0.09999910000958534
3680 2.3 0.09999910000958534
3681 2.5 0.09999910000958534
3682 2.4 0.09999910000958534
3683 2.4 0.09999910000958534
3684 2.3 0.09999910000958534
3685 2.1 0.09999910000958534
3686 2.2 0.09999910000958534
3687 2.3 0.09999910000958534
3688 2.3 0.09999910000958534
3689 2.4 0.09999910000958534
3690 2.4 0.09999910000958534
3691 2.2 0.09999910000958534
3692 2.2 0.09999910000958534
3693 2.2 0.09999910000958534
3694 2.3 0.09999910000958534
3695 2.5 0.09999910000958534
3696 2.4 0.09999910000958534
3697 2.5 0.09999910000958534
3698 2.7 0.09999910000958534
3699 2.9 0.099

3948 2.6 0.09999910000958534
3949 2.6 0.09999910000958534
3950 2.6 0.09999910000958534
3951 2.5 0.09999910000958534
3952 2.6 0.09999910000958534
3953 2.7 0.09999910000958534
3954 2.7 0.09999910000958534
3955 2.7 0.09999910000958534
3956 2.7 0.09999910000958534
3957 2.4 0.09999910000958534
3958 2.4 0.09999910000958534
3959 2.4 0.09999910000958534
3960 2.4 0.09999910000958534
3961 2.2 0.09999910000958534
3962 2.1 0.09999910000958534
3963 2.1 0.09999910000958534
3964 2.1 0.09999910000958534
3965 2.0 0.09999910000958534
3966 2.0 0.09999910000958534
3967 2.2 0.09999910000958534
3968 1.8 0.09999910000958534
3969 1.9 0.09999910000958534
3970 2.1 0.09999910000958534
3971 2.3 0.09999910000958534
3972 2.3 0.09999910000958534
3973 2.2 0.09999910000958534
3974 2.2 0.09999910000958534
3975 2.2 0.09999910000958534
3976 2.2 0.09999910000958534
3977 2.3 0.09999910000958534
3978 2.6 0.09999910000958534
3979 2.6 0.09999910000958534
3980 2.4 0.09999910000958534
3981 2.4 0.09999910000958534
3982 2.5 0.099

4231 2.7 0.09999910000958534
4232 2.6 0.09999910000958534
4233 2.7 0.09999910000958534
4234 2.7 0.09999910000958534
4235 2.6 0.09999910000958534
4236 2.5 0.09999910000958534
4237 2.6 0.09999910000958534
4238 2.4 0.09999910000958534
4239 2.3 0.09999910000958534
4240 2.4 0.09999910000958534
4241 2.6 0.09999910000958534
4242 2.6 0.09999910000958534
4243 2.5 0.09999910000958534
4244 2.5 0.09999910000958534
4245 2.6 0.09999910000958534
4246 2.5 0.09999910000958534
4247 2.6 0.09999910000958534
4248 3.1 0.09999910000958534
4249 3.0 0.09999910000958534
4250 2.9 0.09999910000958534
4251 2.6 0.09999910000958534
4252 2.6 0.09999910000958534
4253 2.7 0.09999910000958534
4254 2.6 0.09999910000958534
4255 2.7 0.09999910000958534
4256 2.7 0.09999910000958534
4257 2.7 0.09999910000958534
4258 2.4 0.09999910000958534
4259 2.4 0.09999910000958534
4260 2.6 0.09999910000958534
4261 2.8 0.09999910000958534
4262 2.8 0.09999910000958534
4263 2.8 0.09999910000958534
4264 2.8 0.09999910000958534
4265 2.7 0.099

4514 2.3 0.09999910000958534
4515 2.5 0.09999910000958534
4516 2.2 0.09999910000958534
4517 2.4 0.09999910000958534
4518 2.5 0.09999910000958534
4519 2.6 0.09999910000958534
4520 2.6 0.09999910000958534
4521 2.6 0.09999910000958534
4522 2.7 0.09999910000958534
4523 2.6 0.09999910000958534
4524 2.6 0.09999910000958534
4525 2.8 0.09999910000958534
4526 3.0 0.09999910000958534
4527 2.8 0.09999910000958534
4528 2.9 0.09999910000958534
4529 2.8 0.09999910000958534
4530 2.9 0.09999910000958534
4531 2.9 0.09999910000958534
4532 2.8 0.09999910000958534
4533 2.9 0.09999910000958534
4534 2.8 0.09999910000958534
4535 2.7 0.09999910000958534
4536 2.7 0.09999910000958534
4537 2.6 0.09999910000958534
4538 2.4 0.09999910000958534
4539 2.2 0.09999910000958534
4540 2.2 0.09999910000958534
4541 2.0 0.09999910000958534
4542 2.1 0.09999910000958534
4543 2.2 0.09999910000958534
4544 2.3 0.09999910000958534
4545 2.1 0.09999910000958534
4546 2.0 0.09999910000958534
4547 2.3 0.09999910000958534
4548 2.3 0.099

In [187]:
a.shape

(80, 80, 3)

In [183]:
s= env.reset()