# DQN

* 기존의 Q Table은 잘 수렴하지만 Q-Network는 수렴하지 않는다. 이유는 아래와 같다.
    * Correlations between samples : state에 따라 받아오는 결과값은 다들 유사하다 상호 연관성이 크다보니 큰 특징을 찾아내지 못한다.
    * Non-stationary targets : target 값인 y가 q_pred와 같은 네트워크를 공유하기 때문에 가중치인 w가 바뀌다보면 target 값 y도 바뀌어버린다.
<br><br>
* 어떻게 해결했는가?
    * Go deep
    * Capture and replay : agent에 행동에 따라 돌려받는 값들을 바로 학습하지 않고 버퍼에 저장해둔 후에 랜덤으로 샘플을 추출하여 학습한다, 랜덤을 통해서 전체 그래프의 양상을 파악할 수 있다.
    * Separate networks(create a traget network)

In [1]:
import numpy as np
import gym
import tensorflow as tf
import matplotlib.pyplot as plt
from collections import deque
import random

env = gym.make('CartPole-v0')

input_size = env.observation_space.shape[0]
output_size = env.action_space.n

dis = 0.9
REPLAY_MEMORY = 50000 

In [2]:
class DQN:
    def __init__(self, session, input_size, output_size, name ='main'):
        self.session = session
        self.input_size = input_size
        self.output_size = output_size
        self.net_name = name
        
        self._build_network()
        
    def _build_network(self, h_size = 10, l_rate =1e-1):
        with tf.variable_scope(self.net_name):
            self._X = tf.placeholder(
                tf.float32, [None, self.input_size], name = 'input_x')
            # First layer
            W1 = tf.get_variable('W1', shape=[self.input_size, h_size],
                                initializer = tf.contrib.layers.xavier_initializer())
            layer1 = tf.nn.tanh(tf.matmul(self._X, W1))
            # Second layer
            W2 = tf.get_variable('W2', shape=[h_size, self.output_size],
                                initializer = tf.contrib.layers.xavier_initializer())
            # Q prediction
            self._Qpred = tf.matmul(layer1, W2)
        
        # policy
        self._Y = tf.placeholder(
            shape=[None, self.output_size], dtype = tf.float32)
        # loss function
        self._loss = tf.reduce_mean(tf.square(self._Y - self._Qpred))
        # learning
        self._train = tf.train.AdamOptimizer(
            learning_rate = l_rate).minimize(self._loss)
        
    def predict(self, state):
        x = np.reshape(state, [1, self.input_size])
        return self.session.run(self._Qpred, feed_dict = {self._X : x})
    
    def update(self, x_stack, y_stack):
        return self.session.run([self._loss, self._train],feed_dict={
            self._X: x_stack, self._Y : y_stack})

In [3]:
def simple_replay_train(DQN, train_batch):
    x_stack = np.empty(0).reshape(0, DQN.input_size)
    y_stack = np.empty(0).reshape(0, DQN.output_size)
    
    for state, action, reward, next_state, done in train_batch:
        Q = DQN.predict(state)
        
        if done:
            Q[0, action] = reward
        else:
            Q[0, action] = reward + dis * np.max(DQN.predict(next_state))
            
        y_stack = np.vstack([y_stack, Q])
        x_stack = np.vstack([x_stack, state])
        
    return DQN.update(x_stack, y_stack)

def bot_play(mainDQN):
    s = env.reset()
    reward_sum = 0
    while True:
        env.render()
        a = np.argmax(mainDQN.predict(s))
        s, reward, done, _ = env.step(a)
        reward_sum += reward
        if done:
            print('Total score : {}'.format(reward_sum))
            break
            
def main():
    max_episodes = 5000
    
    replay_buffer = deque()
    
    with tf.Session() as sess:
        mainDQN = DQN(sess, input_size, output_size)
        tf.global_variables_initializer().run()
        
        for episode in range(max_episodes):
            e = 1. / ((episode / 10) + 1)
            done = False
            step_count = 0
            
            state = env.reset()
            
            while not done:
                if np.random.rand(1) < e:
                    action = env.action_space.sample()
                else:
                    action = np.argmax(mainDQN.predict(state))
                    
                next_state, reward, done, _ = env.step(action)
                if done:
                    reward = -100
                    
                replay_buffer.append((state, action, reward, next_state, done))
                if len(replay_buffer) > REPLAY_MEMORY:
                    replay_buffer.popleft()
                    
                state = next_state
                step_count += 1
                if step_count > 10000:
                    break
                    
            print("Episode : {}  steps : {}".format(episode, step_count))
            if step_count > 10000:
                pass
            
            if episode % 10 == 1:
                for _ in range(50):
                    minibatch = random.sample(replay_buffer, 10)
                    loss, _ = simple_replay_train(mainDQN, minibatch)
                    
                print("Loss : ", loss)
                
        bot_play(mainDQN)
        
if __name__ == '__main__':
    main()

W1129 16:20:28.782868  1972 lazy_loader.py:50] 
The TensorFlow contrib module will not be included in TensorFlow 2.0.
For more information, please see:
  * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md
  * https://github.com/tensorflow/addons
  * https://github.com/tensorflow/io (for I/O related ops)
If you depend on functionality not listed there, please file an issue.



Episode : 0  steps : 12
Episode : 1  steps : 13
Loss :  2.0432572
Episode : 2  steps : 13
Episode : 3  steps : 17
Episode : 4  steps : 13
Episode : 5  steps : 20
Episode : 6  steps : 12
Episode : 7  steps : 14
Episode : 8  steps : 25
Episode : 9  steps : 14
Episode : 10  steps : 15
Episode : 11  steps : 14
Loss :  1007.2809
Episode : 12  steps : 80
Episode : 13  steps : 17
Episode : 14  steps : 81
Episode : 15  steps : 131
Episode : 16  steps : 89
Episode : 17  steps : 91
Episode : 18  steps : 165
Episode : 19  steps : 200
Episode : 20  steps : 124
Episode : 21  steps : 89
Loss :  7.7944474
Episode : 22  steps : 106
Episode : 23  steps : 59
Episode : 24  steps : 61
Episode : 25  steps : 70
Episode : 26  steps : 81
Episode : 27  steps : 34
Episode : 28  steps : 49
Episode : 29  steps : 93
Episode : 30  steps : 70
Episode : 31  steps : 61
Loss :  1.2611461
Episode : 32  steps : 11
Episode : 33  steps : 11
Episode : 34  steps : 9
Episode : 35  steps : 8
Episode : 36  steps : 10
Episode : 

Episode : 300  steps : 25
Episode : 301  steps : 35
Loss :  2.553634
Episode : 302  steps : 54
Episode : 303  steps : 24
Episode : 304  steps : 33
Episode : 305  steps : 39
Episode : 306  steps : 49
Episode : 307  steps : 19
Episode : 308  steps : 44
Episode : 309  steps : 71
Episode : 310  steps : 28
Episode : 311  steps : 60
Loss :  1.6439774
Episode : 312  steps : 200
Episode : 313  steps : 200
Episode : 314  steps : 200
Episode : 315  steps : 71
Episode : 316  steps : 200
Episode : 317  steps : 200
Episode : 318  steps : 200
Episode : 319  steps : 200
Episode : 320  steps : 200
Episode : 321  steps : 200
Loss :  0.81975543
Episode : 322  steps : 24
Episode : 323  steps : 49
Episode : 324  steps : 29
Episode : 325  steps : 131
Episode : 326  steps : 27
Episode : 327  steps : 129
Episode : 328  steps : 34
Episode : 329  steps : 36
Episode : 330  steps : 65
Episode : 331  steps : 30
Loss :  505.91803
Episode : 332  steps : 81
Episode : 333  steps : 117
Episode : 334  steps : 91
Episod

Episode : 598  steps : 26
Episode : 599  steps : 23
Episode : 600  steps : 15
Episode : 601  steps : 16
Loss :  2.9977334
Episode : 602  steps : 22
Episode : 603  steps : 24
Episode : 604  steps : 41
Episode : 605  steps : 71
Episode : 606  steps : 44
Episode : 607  steps : 49
Episode : 608  steps : 33
Episode : 609  steps : 23
Episode : 610  steps : 46
Episode : 611  steps : 46
Loss :  494.13614
Episode : 612  steps : 10
Episode : 613  steps : 10
Episode : 614  steps : 10
Episode : 615  steps : 10
Episode : 616  steps : 10
Episode : 617  steps : 9
Episode : 618  steps : 10
Episode : 619  steps : 10
Episode : 620  steps : 10
Episode : 621  steps : 10
Loss :  1.5331969
Episode : 622  steps : 24
Episode : 623  steps : 21
Episode : 624  steps : 25
Episode : 625  steps : 50
Episode : 626  steps : 42
Episode : 627  steps : 38
Episode : 628  steps : 28
Episode : 629  steps : 38
Episode : 630  steps : 49
Episode : 631  steps : 32
Loss :  7.238122
Episode : 632  steps : 9
Episode : 633  steps 

Episode : 897  steps : 22
Episode : 898  steps : 20
Episode : 899  steps : 21
Episode : 900  steps : 22
Episode : 901  steps : 44
Loss :  5.2146707
Episode : 902  steps : 17
Episode : 903  steps : 10
Episode : 904  steps : 11
Episode : 905  steps : 12
Episode : 906  steps : 12
Episode : 907  steps : 10
Episode : 908  steps : 18
Episode : 909  steps : 8
Episode : 910  steps : 12
Episode : 911  steps : 9
Loss :  470.74542
Episode : 912  steps : 58
Episode : 913  steps : 77
Episode : 914  steps : 98
Episode : 915  steps : 41
Episode : 916  steps : 169
Episode : 917  steps : 41
Episode : 918  steps : 48
Episode : 919  steps : 146
Episode : 920  steps : 163
Episode : 921  steps : 52
Loss :  1.6248977
Episode : 922  steps : 31
Episode : 923  steps : 31
Episode : 924  steps : 47
Episode : 925  steps : 32
Episode : 926  steps : 21
Episode : 927  steps : 43
Episode : 928  steps : 24
Episode : 929  steps : 19
Episode : 930  steps : 31
Episode : 931  steps : 40
Loss :  1.1592582
Episode : 932  st

Episode : 1186  steps : 29
Episode : 1187  steps : 22
Episode : 1188  steps : 38
Episode : 1189  steps : 42
Episode : 1190  steps : 27
Episode : 1191  steps : 18
Loss :  1.1583374
Episode : 1192  steps : 20
Episode : 1193  steps : 34
Episode : 1194  steps : 35
Episode : 1195  steps : 15
Episode : 1196  steps : 37
Episode : 1197  steps : 29
Episode : 1198  steps : 31
Episode : 1199  steps : 26
Episode : 1200  steps : 22
Episode : 1201  steps : 22
Loss :  3.5508766
Episode : 1202  steps : 8
Episode : 1203  steps : 16
Episode : 1204  steps : 9
Episode : 1205  steps : 15
Episode : 1206  steps : 18
Episode : 1207  steps : 24
Episode : 1208  steps : 12
Episode : 1209  steps : 21
Episode : 1210  steps : 11
Episode : 1211  steps : 36
Loss :  5.654504
Episode : 1212  steps : 57
Episode : 1213  steps : 58
Episode : 1214  steps : 68
Episode : 1215  steps : 62
Episode : 1216  steps : 49
Episode : 1217  steps : 69
Episode : 1218  steps : 40
Episode : 1219  steps : 33
Episode : 1220  steps : 54
Epis

Episode : 1476  steps : 23
Episode : 1477  steps : 37
Episode : 1478  steps : 31
Episode : 1479  steps : 31
Episode : 1480  steps : 31
Episode : 1481  steps : 28
Loss :  3.7239494
Episode : 1482  steps : 13
Episode : 1483  steps : 111
Episode : 1484  steps : 13
Episode : 1485  steps : 20
Episode : 1486  steps : 11
Episode : 1487  steps : 11
Episode : 1488  steps : 26
Episode : 1489  steps : 10
Episode : 1490  steps : 21
Episode : 1491  steps : 23
Loss :  7.441088
Episode : 1492  steps : 44
Episode : 1493  steps : 46
Episode : 1494  steps : 83
Episode : 1495  steps : 53
Episode : 1496  steps : 47
Episode : 1497  steps : 71
Episode : 1498  steps : 60
Episode : 1499  steps : 39
Episode : 1500  steps : 55
Episode : 1501  steps : 80
Loss :  1.2655933
Episode : 1502  steps : 60
Episode : 1503  steps : 66
Episode : 1504  steps : 99
Episode : 1505  steps : 62
Episode : 1506  steps : 62
Episode : 1507  steps : 56
Episode : 1508  steps : 44
Episode : 1509  steps : 81
Episode : 1510  steps : 51
E

Loss :  2.924733
Episode : 1762  steps : 21
Episode : 1763  steps : 30
Episode : 1764  steps : 17
Episode : 1765  steps : 12
Episode : 1766  steps : 22
Episode : 1767  steps : 27
Episode : 1768  steps : 33
Episode : 1769  steps : 32
Episode : 1770  steps : 21
Episode : 1771  steps : 24
Loss :  9.609559
Episode : 1772  steps : 23
Episode : 1773  steps : 33
Episode : 1774  steps : 27
Episode : 1775  steps : 22
Episode : 1776  steps : 71
Episode : 1777  steps : 26
Episode : 1778  steps : 22
Episode : 1779  steps : 20
Episode : 1780  steps : 30
Episode : 1781  steps : 26
Loss :  0.9751568
Episode : 1782  steps : 49
Episode : 1783  steps : 54
Episode : 1784  steps : 97
Episode : 1785  steps : 80
Episode : 1786  steps : 48
Episode : 1787  steps : 103
Episode : 1788  steps : 59
Episode : 1789  steps : 62
Episode : 1790  steps : 88
Episode : 1791  steps : 46
Loss :  5.415123
Episode : 1792  steps : 53
Episode : 1793  steps : 80
Episode : 1794  steps : 67
Episode : 1795  steps : 54
Episode : 17

Loss :  7.775611
Episode : 2052  steps : 63
Episode : 2053  steps : 66
Episode : 2054  steps : 53
Episode : 2055  steps : 46
Episode : 2056  steps : 50
Episode : 2057  steps : 50
Episode : 2058  steps : 59
Episode : 2059  steps : 46
Episode : 2060  steps : 72
Episode : 2061  steps : 77
Loss :  3.2911687
Episode : 2062  steps : 68
Episode : 2063  steps : 65
Episode : 2064  steps : 91
Episode : 2065  steps : 54
Episode : 2066  steps : 55
Episode : 2067  steps : 44
Episode : 2068  steps : 65
Episode : 2069  steps : 68
Episode : 2070  steps : 64
Episode : 2071  steps : 66
Loss :  2.4988706
Episode : 2072  steps : 57
Episode : 2073  steps : 51
Episode : 2074  steps : 40
Episode : 2075  steps : 32
Episode : 2076  steps : 55
Episode : 2077  steps : 42
Episode : 2078  steps : 43
Episode : 2079  steps : 56
Episode : 2080  steps : 40
Episode : 2081  steps : 41
Loss :  468.86566
Episode : 2082  steps : 54
Episode : 2083  steps : 47
Episode : 2084  steps : 67
Episode : 2085  steps : 71
Episode : 2

Episode : 2341  steps : 22
Loss :  480.95465
Episode : 2342  steps : 30
Episode : 2343  steps : 46
Episode : 2344  steps : 28
Episode : 2345  steps : 21
Episode : 2346  steps : 36
Episode : 2347  steps : 23
Episode : 2348  steps : 34
Episode : 2349  steps : 40
Episode : 2350  steps : 26
Episode : 2351  steps : 29
Loss :  5.0476847
Episode : 2352  steps : 56
Episode : 2353  steps : 46
Episode : 2354  steps : 50
Episode : 2355  steps : 45
Episode : 2356  steps : 26
Episode : 2357  steps : 32
Episode : 2358  steps : 23
Episode : 2359  steps : 76
Episode : 2360  steps : 29
Episode : 2361  steps : 54
Loss :  486.82715
Episode : 2362  steps : 10
Episode : 2363  steps : 10
Episode : 2364  steps : 10
Episode : 2365  steps : 9
Episode : 2366  steps : 10
Episode : 2367  steps : 8
Episode : 2368  steps : 10
Episode : 2369  steps : 10
Episode : 2370  steps : 9
Episode : 2371  steps : 9
Loss :  4.6729555
Episode : 2372  steps : 25
Episode : 2373  steps : 30
Episode : 2374  steps : 67
Episode : 2375

Episode : 2627  steps : 28
Episode : 2628  steps : 34
Episode : 2629  steps : 31
Episode : 2630  steps : 41
Episode : 2631  steps : 30
Loss :  2.6535606
Episode : 2632  steps : 14
Episode : 2633  steps : 18
Episode : 2634  steps : 27
Episode : 2635  steps : 16
Episode : 2636  steps : 15
Episode : 2637  steps : 25
Episode : 2638  steps : 13
Episode : 2639  steps : 16
Episode : 2640  steps : 23
Episode : 2641  steps : 18
Loss :  865.5021
Episode : 2642  steps : 32
Episode : 2643  steps : 20
Episode : 2644  steps : 15
Episode : 2645  steps : 25
Episode : 2646  steps : 28
Episode : 2647  steps : 33
Episode : 2648  steps : 19
Episode : 2649  steps : 15
Episode : 2650  steps : 24
Episode : 2651  steps : 24
Loss :  3.0719876
Episode : 2652  steps : 19
Episode : 2653  steps : 26
Episode : 2654  steps : 22
Episode : 2655  steps : 22
Episode : 2656  steps : 32
Episode : 2657  steps : 32
Episode : 2658  steps : 23
Episode : 2659  steps : 41
Episode : 2660  steps : 25
Episode : 2661  steps : 26
Lo

Loss :  6.3982954
Episode : 2912  steps : 18
Episode : 2913  steps : 21
Episode : 2914  steps : 31
Episode : 2915  steps : 16
Episode : 2916  steps : 17
Episode : 2917  steps : 26
Episode : 2918  steps : 18
Episode : 2919  steps : 25
Episode : 2920  steps : 17
Episode : 2921  steps : 15
Loss :  498.34808
Episode : 2922  steps : 21
Episode : 2923  steps : 22
Episode : 2924  steps : 18
Episode : 2925  steps : 21
Episode : 2926  steps : 22
Episode : 2927  steps : 23
Episode : 2928  steps : 64
Episode : 2929  steps : 36
Episode : 2930  steps : 60
Episode : 2931  steps : 29
Loss :  3.7231793
Episode : 2932  steps : 48
Episode : 2933  steps : 19
Episode : 2934  steps : 20
Episode : 2935  steps : 49
Episode : 2936  steps : 28
Episode : 2937  steps : 41
Episode : 2938  steps : 34
Episode : 2939  steps : 31
Episode : 2940  steps : 22
Episode : 2941  steps : 40
Loss :  9.513749
Episode : 2942  steps : 87
Episode : 2943  steps : 31
Episode : 2944  steps : 29
Episode : 2945  steps : 22
Episode : 2

Episode : 3199  steps : 20
Episode : 3200  steps : 22
Episode : 3201  steps : 18
Loss :  487.96988
Episode : 3202  steps : 30
Episode : 3203  steps : 32
Episode : 3204  steps : 42
Episode : 3205  steps : 22
Episode : 3206  steps : 14
Episode : 3207  steps : 21
Episode : 3208  steps : 28
Episode : 3209  steps : 27
Episode : 3210  steps : 24
Episode : 3211  steps : 21
Loss :  1.7415409
Episode : 3212  steps : 21
Episode : 3213  steps : 50
Episode : 3214  steps : 58
Episode : 3215  steps : 58
Episode : 3216  steps : 55
Episode : 3217  steps : 82
Episode : 3218  steps : 68
Episode : 3219  steps : 54
Episode : 3220  steps : 20
Episode : 3221  steps : 69
Loss :  504.22623
Episode : 3222  steps : 20
Episode : 3223  steps : 24
Episode : 3224  steps : 21
Episode : 3225  steps : 27
Episode : 3226  steps : 37
Episode : 3227  steps : 31
Episode : 3228  steps : 29
Episode : 3229  steps : 19
Episode : 3230  steps : 24
Episode : 3231  steps : 49
Loss :  502.02896
Episode : 3232  steps : 55
Episode : 

Episode : 3488  steps : 33
Episode : 3489  steps : 23
Episode : 3490  steps : 26
Episode : 3491  steps : 56
Loss :  36.62658
Episode : 3492  steps : 37
Episode : 3493  steps : 39
Episode : 3494  steps : 50
Episode : 3495  steps : 40
Episode : 3496  steps : 49
Episode : 3497  steps : 44
Episode : 3498  steps : 46
Episode : 3499  steps : 56
Episode : 3500  steps : 78
Episode : 3501  steps : 45
Loss :  3.5000324
Episode : 3502  steps : 9
Episode : 3503  steps : 17
Episode : 3504  steps : 16
Episode : 3505  steps : 17
Episode : 3506  steps : 18
Episode : 3507  steps : 20
Episode : 3508  steps : 9
Episode : 3509  steps : 20
Episode : 3510  steps : 13
Episode : 3511  steps : 18
Loss :  2.7474809
Episode : 3512  steps : 28
Episode : 3513  steps : 23
Episode : 3514  steps : 27
Episode : 3515  steps : 21
Episode : 3516  steps : 23
Episode : 3517  steps : 56
Episode : 3518  steps : 27
Episode : 3519  steps : 28
Episode : 3520  steps : 25
Episode : 3521  steps : 24
Loss :  1.0446675
Episode : 352

Episode : 3774  steps : 82
Episode : 3775  steps : 71
Episode : 3776  steps : 53
Episode : 3777  steps : 82
Episode : 3778  steps : 72
Episode : 3779  steps : 103
Episode : 3780  steps : 56
Episode : 3781  steps : 62
Loss :  561.5423
Episode : 3782  steps : 27
Episode : 3783  steps : 22
Episode : 3784  steps : 23
Episode : 3785  steps : 26
Episode : 3786  steps : 23
Episode : 3787  steps : 33
Episode : 3788  steps : 32
Episode : 3789  steps : 30
Episode : 3790  steps : 31
Episode : 3791  steps : 30
Loss :  11.408519
Episode : 3792  steps : 24
Episode : 3793  steps : 24
Episode : 3794  steps : 23
Episode : 3795  steps : 25
Episode : 3796  steps : 25
Episode : 3797  steps : 23
Episode : 3798  steps : 32
Episode : 3799  steps : 24
Episode : 3800  steps : 32
Episode : 3801  steps : 24
Loss :  10.800264
Episode : 3802  steps : 79
Episode : 3803  steps : 50
Episode : 3804  steps : 78
Episode : 3805  steps : 92
Episode : 3806  steps : 68
Episode : 3807  steps : 68
Episode : 3808  steps : 63
E

Episode : 4060  steps : 124
Episode : 4061  steps : 72
Loss :  530.8099
Episode : 4062  steps : 20
Episode : 4063  steps : 28
Episode : 4064  steps : 30
Episode : 4065  steps : 23
Episode : 4066  steps : 22
Episode : 4067  steps : 32
Episode : 4068  steps : 33
Episode : 4069  steps : 24
Episode : 4070  steps : 21
Episode : 4071  steps : 26
Loss :  526.9228
Episode : 4072  steps : 31
Episode : 4073  steps : 38
Episode : 4074  steps : 27
Episode : 4075  steps : 22
Episode : 4076  steps : 28
Episode : 4077  steps : 38
Episode : 4078  steps : 24
Episode : 4079  steps : 25
Episode : 4080  steps : 24
Episode : 4081  steps : 25
Loss :  1.898782
Episode : 4082  steps : 26
Episode : 4083  steps : 22
Episode : 4084  steps : 36
Episode : 4085  steps : 26
Episode : 4086  steps : 48
Episode : 4087  steps : 34
Episode : 4088  steps : 23
Episode : 4089  steps : 26
Episode : 4090  steps : 25
Episode : 4091  steps : 36
Loss :  5.5470276
Episode : 4092  steps : 23
Episode : 4093  steps : 28
Episode : 40

Episode : 4349  steps : 31
Episode : 4350  steps : 22
Episode : 4351  steps : 26
Loss :  8.048412
Episode : 4352  steps : 11
Episode : 4353  steps : 10
Episode : 4354  steps : 14
Episode : 4355  steps : 11
Episode : 4356  steps : 11
Episode : 4357  steps : 14
Episode : 4358  steps : 13
Episode : 4359  steps : 13
Episode : 4360  steps : 12
Episode : 4361  steps : 13
Loss :  3.1795094
Episode : 4362  steps : 17
Episode : 4363  steps : 17
Episode : 4364  steps : 15
Episode : 4365  steps : 41
Episode : 4366  steps : 43
Episode : 4367  steps : 48
Episode : 4368  steps : 33
Episode : 4369  steps : 33
Episode : 4370  steps : 37
Episode : 4371  steps : 46
Loss :  15.93675
Episode : 4372  steps : 78
Episode : 4373  steps : 66
Episode : 4374  steps : 82
Episode : 4375  steps : 64
Episode : 4376  steps : 69
Episode : 4377  steps : 98
Episode : 4378  steps : 63
Episode : 4379  steps : 56
Episode : 4380  steps : 74
Episode : 4381  steps : 61
Loss :  7.241052
Episode : 4382  steps : 12
Episode : 438

Episode : 4639  steps : 15
Episode : 4640  steps : 18
Episode : 4641  steps : 27
Loss :  494.1563
Episode : 4642  steps : 20
Episode : 4643  steps : 23
Episode : 4644  steps : 31
Episode : 4645  steps : 28
Episode : 4646  steps : 26
Episode : 4647  steps : 34
Episode : 4648  steps : 29
Episode : 4649  steps : 26
Episode : 4650  steps : 19
Episode : 4651  steps : 31
Loss :  2.4235084
Episode : 4652  steps : 48
Episode : 4653  steps : 28
Episode : 4654  steps : 54
Episode : 4655  steps : 39
Episode : 4656  steps : 37
Episode : 4657  steps : 56
Episode : 4658  steps : 37
Episode : 4659  steps : 56
Episode : 4660  steps : 25
Episode : 4661  steps : 65
Loss :  5.5609207
Episode : 4662  steps : 30
Episode : 4663  steps : 41
Episode : 4664  steps : 27
Episode : 4665  steps : 25
Episode : 4666  steps : 39
Episode : 4667  steps : 25
Episode : 4668  steps : 46
Episode : 4669  steps : 48
Episode : 4670  steps : 24
Episode : 4671  steps : 26
Loss :  448.07098
Episode : 4672  steps : 9
Episode : 46

Episode : 4925  steps : 71
Episode : 4926  steps : 61
Episode : 4927  steps : 69
Episode : 4928  steps : 58
Episode : 4929  steps : 72
Episode : 4930  steps : 70
Episode : 4931  steps : 65
Loss :  4.345108
Episode : 4932  steps : 19
Episode : 4933  steps : 20
Episode : 4934  steps : 18
Episode : 4935  steps : 34
Episode : 4936  steps : 47
Episode : 4937  steps : 34
Episode : 4938  steps : 17
Episode : 4939  steps : 25
Episode : 4940  steps : 17
Episode : 4941  steps : 20
Loss :  1.5082062
Episode : 4942  steps : 70
Episode : 4943  steps : 63
Episode : 4944  steps : 86
Episode : 4945  steps : 60
Episode : 4946  steps : 95
Episode : 4947  steps : 70
Episode : 4948  steps : 63
Episode : 4949  steps : 58
Episode : 4950  steps : 79
Episode : 4951  steps : 97
Loss :  2.07318
Episode : 4952  steps : 29
Episode : 4953  steps : 31
Episode : 4954  steps : 21
Episode : 4955  steps : 28
Episode : 4956  steps : 22
Episode : 4957  steps : 21
Episode : 4958  steps : 20
Episode : 4959  steps : 34
Epis

## seperate networks

In [3]:
def replay_train(mainDQN, targetDQN, train_batch): # update는 mainDQN
    x_stack = np.empty(0).reshape(0, mainDQN.input_size)
    y_stack = np.empty(0).reshape(0, mainDQN.output_size)
    
    for state, action, reward, next_state, done in train_batch:
        Q = mainDQN.predict(state)
        
        if done:
            Q[0, action] = reward
        else:
            Q[0, action] = reward + dis * np.max(targetDQN.predict(next_state))
            
        y_stack = np.vstack([y_stack, Q])
        x_stack = np.vstack([x_stack, state])
        
    return mainDQN.update(x_stack, y_stack)

def get_copy_var_ops(*, dest_scope_name = 'target', src_scope_name = 'main'):
    op_holder = []
    
    src_vars = tf.get_collection(
        tf.GraphKeys.TRAINABLE_VARIABLES, scope = src_scope_name) # Weight만 가져옴
    dest_vars = tf.get_collection(
        tf.GraphKeys.TRAINABLE_VARIABLES, scope = dest_scope_name)
    
    for src_var, dest_var in zip(src_vars, dest_vars):
        op_holder.append(dest_var.assign(src_var.value()))
        
    return op_holder

def bot_play(mainDQN):
    s = env.reset()
    reward_sum = 0
    while True:
        env.render()
        a = np.argmax(mainDQN.predict(s))
        s, reward, done, _ = env.step(a)
        reward_sum += reward
        if done:
            print('Total score : {}'.format(reward_sum))
            break

def main_s():
    max_episodes = 5000
    
    replay_buffer = deque()
    
    with tf.Session() as sess:
        mainDQN = DQN(sess, input_size, output_size, name = 'main')
        targetDQN = DQN(sess, input_size, output_size, name = 'target')
        tf.global_variables_initializer().run()
        
        copy_ops = get_copy_var_ops(dest_scope_name = 'target',
                                   src_scope_name = 'main')
        # weight copy
        sess.run(copy_ops)
        
        for episode in range(max_episodes):
            e = 1. / ((episode / 10) + 1)
            done = False
            step_count = 0
            
            state = env.reset()
            
            while not done:
                if np.random.rand(1) < e:
                    action = env.action_space.sample()
                else:
                    action = np.argmax(mainDQN.predict(state))
                    
                next_state, reward, done, _ = env.step(action)
                if done:
                    reward = -100
                    
                replay_buffer.append((state, action, reward, next_state, done))
                if len(replay_buffer) > REPLAY_MEMORY:
                    replay_buffer.popleft()
                    
                state = next_state
                step_count += 1
                if step_count > 10000:
                    break
                    
            print("Episode : {}  steps : {}".format(episode, step_count))
            if step_count > 10000:
                pass
            
            if episode % 10 == 1:
                for _ in range(50):
                    minibatch = random.sample(replay_buffer, 10)
                    loss, _ = replay_train(mainDQN, targetDQN, minibatch)
                    
                print("Loss : ", loss)
                sess.run(copy_ops)
        bot_play(mainDQN)
        
if __name__ == '__main__':
    main_s()

W1129 17:04:42.676415 11268 lazy_loader.py:50] 
The TensorFlow contrib module will not be included in TensorFlow 2.0.
For more information, please see:
  * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md
  * https://github.com/tensorflow/addons
  * https://github.com/tensorflow/io (for I/O related ops)
If you depend on functionality not listed there, please file an issue.



Episode : 0  steps : 43
Episode : 1  steps : 40
Loss :  3.5002148
Episode : 2  steps : 12
Episode : 3  steps : 17
Episode : 4  steps : 13
Episode : 5  steps : 11
Episode : 6  steps : 12
Episode : 7  steps : 8
Episode : 8  steps : 11
Episode : 9  steps : 13
Episode : 10  steps : 13
Episode : 11  steps : 13
Loss :  12.120758
Episode : 12  steps : 9
Episode : 13  steps : 12
Episode : 14  steps : 9
Episode : 15  steps : 11
Episode : 16  steps : 13
Episode : 17  steps : 10
Episode : 18  steps : 14
Episode : 19  steps : 11
Episode : 20  steps : 16
Episode : 21  steps : 9
Loss :  402.04584
Episode : 22  steps : 59
Episode : 23  steps : 55
Episode : 24  steps : 63
Episode : 25  steps : 85
Episode : 26  steps : 64
Episode : 27  steps : 120
Episode : 28  steps : 99
Episode : 29  steps : 47
Episode : 30  steps : 84
Episode : 31  steps : 63
Loss :  14.620422
Episode : 32  steps : 12
Episode : 33  steps : 34
Episode : 34  steps : 61
Episode : 35  steps : 60
Episode : 36  steps : 38
Episode : 37  st

Loss :  453.35205
Episode : 302  steps : 200
Episode : 303  steps : 200
Episode : 304  steps : 179
Episode : 305  steps : 200
Episode : 306  steps : 200
Episode : 307  steps : 200
Episode : 308  steps : 200
Episode : 309  steps : 200
Episode : 310  steps : 200
Episode : 311  steps : 200
Loss :  2.755369
Episode : 312  steps : 41
Episode : 313  steps : 41
Episode : 314  steps : 103
Episode : 315  steps : 42
Episode : 316  steps : 49
Episode : 317  steps : 37
Episode : 318  steps : 37
Episode : 319  steps : 45
Episode : 320  steps : 200
Episode : 321  steps : 31
Loss :  5.6985226
Episode : 322  steps : 22
Episode : 323  steps : 28
Episode : 324  steps : 20
Episode : 325  steps : 25
Episode : 326  steps : 35
Episode : 327  steps : 31
Episode : 328  steps : 27
Episode : 329  steps : 15
Episode : 330  steps : 28
Episode : 331  steps : 29
Loss :  3.1154768
Episode : 332  steps : 72
Episode : 333  steps : 60
Episode : 334  steps : 64
Episode : 335  steps : 71
Episode : 336  steps : 67
Episode

Episode : 597  steps : 20
Episode : 598  steps : 26
Episode : 599  steps : 24
Episode : 600  steps : 28
Episode : 601  steps : 19
Loss :  2.3528457
Episode : 602  steps : 37
Episode : 603  steps : 30
Episode : 604  steps : 44
Episode : 605  steps : 51
Episode : 606  steps : 66
Episode : 607  steps : 22
Episode : 608  steps : 71
Episode : 609  steps : 29
Episode : 610  steps : 58
Episode : 611  steps : 35
Loss :  547.5597
Episode : 612  steps : 164
Episode : 613  steps : 200
Episode : 614  steps : 200
Episode : 615  steps : 200
Episode : 616  steps : 200
Episode : 617  steps : 200
Episode : 618  steps : 200
Episode : 619  steps : 200
Episode : 620  steps : 200
Episode : 621  steps : 183
Loss :  2.1594882
Episode : 622  steps : 8
Episode : 623  steps : 9
Episode : 624  steps : 10
Episode : 625  steps : 9
Episode : 626  steps : 10
Episode : 627  steps : 9
Episode : 628  steps : 10
Episode : 629  steps : 8
Episode : 630  steps : 9
Episode : 631  steps : 9
Loss :  0.64303094
Episode : 632  

Episode : 897  steps : 32
Episode : 898  steps : 30
Episode : 899  steps : 24
Episode : 900  steps : 34
Episode : 901  steps : 31
Loss :  2.6474414
Episode : 902  steps : 46
Episode : 903  steps : 27
Episode : 904  steps : 61
Episode : 905  steps : 51
Episode : 906  steps : 29
Episode : 907  steps : 54
Episode : 908  steps : 21
Episode : 909  steps : 20
Episode : 910  steps : 24
Episode : 911  steps : 36
Loss :  2.8237684
Episode : 912  steps : 57
Episode : 913  steps : 83
Episode : 914  steps : 61
Episode : 915  steps : 55
Episode : 916  steps : 47
Episode : 917  steps : 79
Episode : 918  steps : 48
Episode : 919  steps : 64
Episode : 920  steps : 57
Episode : 921  steps : 89
Loss :  553.9912
Episode : 922  steps : 66
Episode : 923  steps : 87
Episode : 924  steps : 124
Episode : 925  steps : 76
Episode : 926  steps : 133
Episode : 927  steps : 55
Episode : 928  steps : 71
Episode : 929  steps : 71
Episode : 930  steps : 76
Episode : 931  steps : 64
Loss :  1.6485236
Episode : 932  st

Loss :  477.7874
Episode : 1192  steps : 188
Episode : 1193  steps : 76
Episode : 1194  steps : 84
Episode : 1195  steps : 200
Episode : 1196  steps : 122
Episode : 1197  steps : 177
Episode : 1198  steps : 59
Episode : 1199  steps : 107
Episode : 1200  steps : 80
Episode : 1201  steps : 200
Loss :  3.6883698
Episode : 1202  steps : 59
Episode : 1203  steps : 140
Episode : 1204  steps : 43
Episode : 1205  steps : 44
Episode : 1206  steps : 33
Episode : 1207  steps : 148
Episode : 1208  steps : 55
Episode : 1209  steps : 42
Episode : 1210  steps : 44
Episode : 1211  steps : 50
Loss :  517.17474
Episode : 1212  steps : 39
Episode : 1213  steps : 22
Episode : 1214  steps : 27
Episode : 1215  steps : 29
Episode : 1216  steps : 21
Episode : 1217  steps : 26
Episode : 1218  steps : 32
Episode : 1219  steps : 30
Episode : 1220  steps : 32
Episode : 1221  steps : 21
Loss :  4.069045
Episode : 1222  steps : 33
Episode : 1223  steps : 22
Episode : 1224  steps : 58
Episode : 1225  steps : 50
Epis

Loss :  4.160854
Episode : 1482  steps : 27
Episode : 1483  steps : 46
Episode : 1484  steps : 21
Episode : 1485  steps : 22
Episode : 1486  steps : 46
Episode : 1487  steps : 27
Episode : 1488  steps : 42
Episode : 1489  steps : 22
Episode : 1490  steps : 70
Episode : 1491  steps : 22
Loss :  3.9295456
Episode : 1492  steps : 24
Episode : 1493  steps : 37
Episode : 1494  steps : 28
Episode : 1495  steps : 22
Episode : 1496  steps : 22
Episode : 1497  steps : 47
Episode : 1498  steps : 21
Episode : 1499  steps : 23
Episode : 1500  steps : 22
Episode : 1501  steps : 34
Loss :  3.31397
Episode : 1502  steps : 8
Episode : 1503  steps : 9
Episode : 1504  steps : 13
Episode : 1505  steps : 11
Episode : 1506  steps : 8
Episode : 1507  steps : 19
Episode : 1508  steps : 9
Episode : 1509  steps : 16
Episode : 1510  steps : 15
Episode : 1511  steps : 17
Loss :  0.6896284
Episode : 1512  steps : 12
Episode : 1513  steps : 15
Episode : 1514  steps : 12
Episode : 1515  steps : 32
Episode : 1516  s

Episode : 1767  steps : 200
Episode : 1768  steps : 185
Episode : 1769  steps : 186
Episode : 1770  steps : 192
Episode : 1771  steps : 162
Loss :  8.608859
Episode : 1772  steps : 26
Episode : 1773  steps : 43
Episode : 1774  steps : 28
Episode : 1775  steps : 27
Episode : 1776  steps : 24
Episode : 1777  steps : 37
Episode : 1778  steps : 39
Episode : 1779  steps : 22
Episode : 1780  steps : 27
Episode : 1781  steps : 23
Loss :  8.5244665
Episode : 1782  steps : 12
Episode : 1783  steps : 15
Episode : 1784  steps : 13
Episode : 1785  steps : 24
Episode : 1786  steps : 24
Episode : 1787  steps : 14
Episode : 1788  steps : 12
Episode : 1789  steps : 40
Episode : 1790  steps : 25
Episode : 1791  steps : 15
Loss :  7.364138
Episode : 1792  steps : 18
Episode : 1793  steps : 19
Episode : 1794  steps : 14
Episode : 1795  steps : 20
Episode : 1796  steps : 16
Episode : 1797  steps : 23
Episode : 1798  steps : 19
Episode : 1799  steps : 17
Episode : 1800  steps : 17
Episode : 1801  steps : 5

Loss :  0.76497394
Episode : 2052  steps : 17
Episode : 2053  steps : 19
Episode : 2054  steps : 23
Episode : 2055  steps : 34
Episode : 2056  steps : 41
Episode : 2057  steps : 19
Episode : 2058  steps : 19
Episode : 2059  steps : 16
Episode : 2060  steps : 35
Episode : 2061  steps : 21
Loss :  580.26355
Episode : 2062  steps : 103
Episode : 2063  steps : 23
Episode : 2064  steps : 21
Episode : 2065  steps : 23
Episode : 2066  steps : 39
Episode : 2067  steps : 22
Episode : 2068  steps : 106
Episode : 2069  steps : 39
Episode : 2070  steps : 52
Episode : 2071  steps : 24
Loss :  520.82117
Episode : 2072  steps : 47
Episode : 2073  steps : 45
Episode : 2074  steps : 42
Episode : 2075  steps : 44
Episode : 2076  steps : 32
Episode : 2077  steps : 45
Episode : 2078  steps : 51
Episode : 2079  steps : 55
Episode : 2080  steps : 49
Episode : 2081  steps : 42
Loss :  513.37805
Episode : 2082  steps : 91
Episode : 2083  steps : 48
Episode : 2084  steps : 34
Episode : 2085  steps : 25
Episode

Episode : 2341  steps : 12
Loss :  553.02966
Episode : 2342  steps : 16
Episode : 2343  steps : 28
Episode : 2344  steps : 16
Episode : 2345  steps : 30
Episode : 2346  steps : 59
Episode : 2347  steps : 76
Episode : 2348  steps : 17
Episode : 2349  steps : 23
Episode : 2350  steps : 19
Episode : 2351  steps : 20
Loss :  511.25815
Episode : 2352  steps : 27
Episode : 2353  steps : 56
Episode : 2354  steps : 58
Episode : 2355  steps : 63
Episode : 2356  steps : 42
Episode : 2357  steps : 25
Episode : 2358  steps : 50
Episode : 2359  steps : 24
Episode : 2360  steps : 26
Episode : 2361  steps : 23
Loss :  4.8489356
Episode : 2362  steps : 34
Episode : 2363  steps : 51
Episode : 2364  steps : 60
Episode : 2365  steps : 41
Episode : 2366  steps : 39
Episode : 2367  steps : 53
Episode : 2368  steps : 52
Episode : 2369  steps : 39
Episode : 2370  steps : 70
Episode : 2371  steps : 41
Loss :  1.5065142
Episode : 2372  steps : 30
Episode : 2373  steps : 28
Episode : 2374  steps : 21
Episode : 

Episode : 2627  steps : 19
Episode : 2628  steps : 20
Episode : 2629  steps : 17
Episode : 2630  steps : 20
Episode : 2631  steps : 34
Loss :  1.1736357
Episode : 2632  steps : 21
Episode : 2633  steps : 14
Episode : 2634  steps : 25
Episode : 2635  steps : 16
Episode : 2636  steps : 11
Episode : 2637  steps : 12
Episode : 2638  steps : 17
Episode : 2639  steps : 21
Episode : 2640  steps : 17
Episode : 2641  steps : 15
Loss :  461.9751
Episode : 2642  steps : 30
Episode : 2643  steps : 54
Episode : 2644  steps : 36
Episode : 2645  steps : 32
Episode : 2646  steps : 30
Episode : 2647  steps : 32
Episode : 2648  steps : 34
Episode : 2649  steps : 38
Episode : 2650  steps : 39
Episode : 2651  steps : 26
Loss :  5.4502845
Episode : 2652  steps : 43
Episode : 2653  steps : 20
Episode : 2654  steps : 27
Episode : 2655  steps : 22
Episode : 2656  steps : 27
Episode : 2657  steps : 25
Episode : 2658  steps : 25
Episode : 2659  steps : 35
Episode : 2660  steps : 30
Episode : 2661  steps : 25
Lo

Loss :  0.8790631
Episode : 2922  steps : 57
Episode : 2923  steps : 28
Episode : 2924  steps : 26
Episode : 2925  steps : 21
Episode : 2926  steps : 24
Episode : 2927  steps : 38
Episode : 2928  steps : 22
Episode : 2929  steps : 26
Episode : 2930  steps : 27
Episode : 2931  steps : 24
Loss :  1.3816468
Episode : 2932  steps : 74
Episode : 2933  steps : 32
Episode : 2934  steps : 35
Episode : 2935  steps : 34
Episode : 2936  steps : 39
Episode : 2937  steps : 32
Episode : 2938  steps : 44
Episode : 2939  steps : 36
Episode : 2940  steps : 59
Episode : 2941  steps : 46
Loss :  1028.3279
Episode : 2942  steps : 20
Episode : 2943  steps : 20
Episode : 2944  steps : 30
Episode : 2945  steps : 33
Episode : 2946  steps : 27
Episode : 2947  steps : 30
Episode : 2948  steps : 28
Episode : 2949  steps : 27
Episode : 2950  steps : 26
Episode : 2951  steps : 26
Loss :  521.116
Episode : 2952  steps : 35
Episode : 2953  steps : 52
Episode : 2954  steps : 74
Episode : 2955  steps : 68
Episode : 29

Episode : 3209  steps : 21
Episode : 3210  steps : 19
Episode : 3211  steps : 19
Loss :  492.43628
Episode : 3212  steps : 28
Episode : 3213  steps : 34
Episode : 3214  steps : 58
Episode : 3215  steps : 42
Episode : 3216  steps : 45
Episode : 3217  steps : 31
Episode : 3218  steps : 49
Episode : 3219  steps : 52
Episode : 3220  steps : 39
Episode : 3221  steps : 47
Loss :  1.4707615
Episode : 3222  steps : 65
Episode : 3223  steps : 27
Episode : 3224  steps : 45
Episode : 3225  steps : 40
Episode : 3226  steps : 52
Episode : 3227  steps : 50
Episode : 3228  steps : 50
Episode : 3229  steps : 42
Episode : 3230  steps : 45
Episode : 3231  steps : 42
Loss :  495.95978
Episode : 3232  steps : 53
Episode : 3233  steps : 26
Episode : 3234  steps : 41
Episode : 3235  steps : 26
Episode : 3236  steps : 48
Episode : 3237  steps : 39
Episode : 3238  steps : 38
Episode : 3239  steps : 28
Episode : 3240  steps : 60
Episode : 3241  steps : 26
Loss :  493.40497
Episode : 3242  steps : 59
Episode : 

Episode : 3499  steps : 34
Episode : 3500  steps : 26
Episode : 3501  steps : 55
Loss :  8.135319
Episode : 3502  steps : 21
Episode : 3503  steps : 21
Episode : 3504  steps : 29
Episode : 3505  steps : 23
Episode : 3506  steps : 27
Episode : 3507  steps : 13
Episode : 3508  steps : 17
Episode : 3509  steps : 11
Episode : 3510  steps : 14
Episode : 3511  steps : 63
Loss :  4.8259897
Episode : 3512  steps : 24
Episode : 3513  steps : 11
Episode : 3514  steps : 12
Episode : 3515  steps : 15
Episode : 3516  steps : 24
Episode : 3517  steps : 13
Episode : 3518  steps : 28
Episode : 3519  steps : 13
Episode : 3520  steps : 12
Episode : 3521  steps : 28
Loss :  3.905397
Episode : 3522  steps : 14
Episode : 3523  steps : 17
Episode : 3524  steps : 10
Episode : 3525  steps : 11
Episode : 3526  steps : 17
Episode : 3527  steps : 9
Episode : 3528  steps : 13
Episode : 3529  steps : 14
Episode : 3530  steps : 21
Episode : 3531  steps : 11
Loss :  4.212012
Episode : 3532  steps : 14
Episode : 3533

Episode : 3789  steps : 22
Episode : 3790  steps : 25
Episode : 3791  steps : 29
Loss :  2.05723
Episode : 3792  steps : 40
Episode : 3793  steps : 34
Episode : 3794  steps : 31
Episode : 3795  steps : 32
Episode : 3796  steps : 36
Episode : 3797  steps : 30
Episode : 3798  steps : 52
Episode : 3799  steps : 54
Episode : 3800  steps : 29
Episode : 3801  steps : 22
Loss :  13.760559
Episode : 3802  steps : 15
Episode : 3803  steps : 14
Episode : 3804  steps : 26
Episode : 3805  steps : 23
Episode : 3806  steps : 20
Episode : 3807  steps : 14
Episode : 3808  steps : 14
Episode : 3809  steps : 14
Episode : 3810  steps : 14
Episode : 3811  steps : 13
Loss :  3.123292
Episode : 3812  steps : 26
Episode : 3813  steps : 21
Episode : 3814  steps : 30
Episode : 3815  steps : 56
Episode : 3816  steps : 21
Episode : 3817  steps : 30
Episode : 3818  steps : 24
Episode : 3819  steps : 37
Episode : 3820  steps : 23
Episode : 3821  steps : 43
Loss :  477.633
Episode : 3822  steps : 20
Episode : 3823 

Episode : 4076  steps : 53
Episode : 4077  steps : 31
Episode : 4078  steps : 29
Episode : 4079  steps : 27
Episode : 4080  steps : 22
Episode : 4081  steps : 71
Loss :  5.1825294
Episode : 4082  steps : 27
Episode : 4083  steps : 27
Episode : 4084  steps : 25
Episode : 4085  steps : 54
Episode : 4086  steps : 31
Episode : 4087  steps : 32
Episode : 4088  steps : 59
Episode : 4089  steps : 25
Episode : 4090  steps : 24
Episode : 4091  steps : 52
Loss :  459.73224
Episode : 4092  steps : 29
Episode : 4093  steps : 23
Episode : 4094  steps : 30
Episode : 4095  steps : 23
Episode : 4096  steps : 35
Episode : 4097  steps : 48
Episode : 4098  steps : 23
Episode : 4099  steps : 25
Episode : 4100  steps : 44
Episode : 4101  steps : 19
Loss :  3.8471851
Episode : 4102  steps : 40
Episode : 4103  steps : 35
Episode : 4104  steps : 30
Episode : 4105  steps : 33
Episode : 4106  steps : 42
Episode : 4107  steps : 60
Episode : 4108  steps : 50
Episode : 4109  steps : 26
Episode : 4110  steps : 64
E

Loss :  7.8280654
Episode : 4362  steps : 21
Episode : 4363  steps : 21
Episode : 4364  steps : 29
Episode : 4365  steps : 19
Episode : 4366  steps : 20
Episode : 4367  steps : 23
Episode : 4368  steps : 33
Episode : 4369  steps : 27
Episode : 4370  steps : 19
Episode : 4371  steps : 22
Loss :  13.494006
Episode : 4372  steps : 28
Episode : 4373  steps : 25
Episode : 4374  steps : 24
Episode : 4375  steps : 33
Episode : 4376  steps : 25
Episode : 4377  steps : 28
Episode : 4378  steps : 24
Episode : 4379  steps : 23
Episode : 4380  steps : 31
Episode : 4381  steps : 21
Loss :  10.580297
Episode : 4382  steps : 30
Episode : 4383  steps : 35
Episode : 4384  steps : 33
Episode : 4385  steps : 49
Episode : 4386  steps : 70
Episode : 4387  steps : 50
Episode : 4388  steps : 42
Episode : 4389  steps : 39
Episode : 4390  steps : 52
Episode : 4391  steps : 132
Loss :  3.5707288
Episode : 4392  steps : 30
Episode : 4393  steps : 35
Episode : 4394  steps : 32
Episode : 4395  steps : 40
Episode :

Episode : 4651  steps : 55
Loss :  4.019697
Episode : 4652  steps : 39
Episode : 4653  steps : 26
Episode : 4654  steps : 29
Episode : 4655  steps : 73
Episode : 4656  steps : 27
Episode : 4657  steps : 33
Episode : 4658  steps : 26
Episode : 4659  steps : 30
Episode : 4660  steps : 25
Episode : 4661  steps : 36
Loss :  1040.2594
Episode : 4662  steps : 21
Episode : 4663  steps : 25
Episode : 4664  steps : 26
Episode : 4665  steps : 15
Episode : 4666  steps : 15
Episode : 4667  steps : 15
Episode : 4668  steps : 24
Episode : 4669  steps : 28
Episode : 4670  steps : 26
Episode : 4671  steps : 15
Loss :  494.93195
Episode : 4672  steps : 23
Episode : 4673  steps : 29
Episode : 4674  steps : 29
Episode : 4675  steps : 30
Episode : 4676  steps : 23
Episode : 4677  steps : 27
Episode : 4678  steps : 21
Episode : 4679  steps : 28
Episode : 4680  steps : 26
Episode : 4681  steps : 21
Loss :  483.64874
Episode : 4682  steps : 39
Episode : 4683  steps : 44
Episode : 4684  steps : 43
Episode : 4

Episode : 4941  steps : 19
Loss :  7.7845435
Episode : 4942  steps : 31
Episode : 4943  steps : 32
Episode : 4944  steps : 26
Episode : 4945  steps : 18
Episode : 4946  steps : 18
Episode : 4947  steps : 21
Episode : 4948  steps : 18
Episode : 4949  steps : 20
Episode : 4950  steps : 24
Episode : 4951  steps : 21
Loss :  6.822042
Episode : 4952  steps : 29
Episode : 4953  steps : 29
Episode : 4954  steps : 28
Episode : 4955  steps : 29
Episode : 4956  steps : 19
Episode : 4957  steps : 27
Episode : 4958  steps : 51
Episode : 4959  steps : 63
Episode : 4960  steps : 37
Episode : 4961  steps : 29
Loss :  445.8803
Episode : 4962  steps : 16
Episode : 4963  steps : 29
Episode : 4964  steps : 15
Episode : 4965  steps : 27
Episode : 4966  steps : 27
Episode : 4967  steps : 17
Episode : 4968  steps : 26
Episode : 4969  steps : 20
Episode : 4970  steps : 16
Episode : 4971  steps : 20
Loss :  2.648374
Episode : 4972  steps : 49
Episode : 4973  steps : 47
Episode : 4974  steps : 50
Episode : 497