In [1]:
from __future__ import print_function
import chainer
import chainer.functions as F
import chainer.links as L
import chainerrl
import numpy as np
import gym
from gym import wrappers
import logging

In [2]:
from tqdm import tqdm

In [3]:
def phi(obs):
    return obs.astype(np.float32)

In [4]:
class A3CLSTMSoftmax(chainer.Chain, chainerrl.agents.a3c.A3CModel, chainerrl.recurrent.RecurrentChainMixin):
    def __init__(self):
        super(A3CLSTMSoftmax, self).__init__()
        with self.init_scope():
            self.conv1 = L.Convolution2D(3, 16, (11, 9), 1, 0)
            self.conv2 = L.Convolution2D(16, 32, (11, 9), 1, 0)
            self.conv3 = L.Convolution2D(32, 64, (10, 9), 1, 0)
            self.l4p = L.LSTM(14976, 1024) # ポリシーネットワーク
            self.l4v = L.LSTM(14976, 1024) # バリューネットワーク
            self.l5p = L.Linear(1024, 1024)
            self.l5v = L.Linear(1024, 1024)
            self.pi = chainerrl.policies.SoftmaxPolicy(L.Linear(1024, 6))
            self.v = L.Linear(1024, 1)
            
    def pi_and_v(self, state):
        state = np.asarray(state.transpose(0, 3, 1, 2), dtype=np.float32)
        h1 = F.max_pooling_2d(F.relu(self.conv1(state)), ksize=2, stride=2)
        h2 = F.max_pooling_2d(F.relu(self.conv2(h1)), ksize=2, stride=2) 
        h3 = F.max_pooling_2d(F.relu(self.conv3(h2)), ksize=2, stride=2)
        h4p = self.l4p(h3)
        h4v = self.l4v(h3)
        h5p = F.relu(self.l5p(h4p))
        h5v = F.relu(self.l5v(h4v))
        pout = self.pi(h5p) # ポリシーネットワークの出力
        vout = self.v(h5v) # バリューネットワークの出力
        return pout, vout

In [5]:
def make_env(process_idx, test=False):
    env = gym.make('SpaceInvaders-v0')
    process_seed = int(process_seeds[process_idx])
    if not test:
        chainerrl.misc.env_modifiers.make_reward_filtered(env, lambda x: x*0.01)
    if process_idx == 0 and not test:
        env = gym.wrappers.Monitor(env, outdir)
    env_seed = 2 ** 32 - 1 - process_seed if test else process_seed
    env.seed(env_seed)
    return env

In [6]:
num_episodes = 100000

In [7]:
model = A3CLSTMSoftmax()
optimizer = chainerrl.optimizers.rmsprop_async.RMSpropAsync(lr=0.001, eps=0.1, alpha=0.99)
optimizer.setup(model)

<chainerrl.optimizers.rmsprop_async.RMSpropAsync at 0x7ff45a6da0b8>

In [9]:
agent = chainerrl.agents.a3c.A3C(
    model, 
    optimizer, 
    t_max = 8,
    gamma = 0.995,
    beta = 0.1,
    phi = phi,
)

In [10]:
gym.logger.set_level(0)
logging.basicConfig(level=logging.DEBUG)

In [13]:
n_process = 3
outdir = 'result'
chainerrl.misc.set_random_seed(0)
process_seeds = np.arange(n_process)

In [None]:
chainerrl.experiments.train_agent_async(
    agent = agent,
    outdir = outdir,
    processes = n_process,
    make_env = make_env,
    profile = True,
    steps = 1000000,
    eval_interval = None,
    max_episode_len = num_episodes,
    logger = gym.logger
)

INFO: Making new env: SpaceInvaders-v0
INFO: Making new env: SpaceInvaders-v0
INFO: Making new env: SpaceInvaders-v0
INFO: Creating monitor directory result
INFO: Starting new video recorder writing to /home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4
DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4
DEBUG: Starting ffmpeg with "ffmpeg -nostats -loglevel error -y -r 30 -f rawvideo -s:v 160x210 -pix_fmt rgb24 -i - -vf scale=trunc(iw/2)*2:trunc(ih/2)*2 -vcodec libx264 -pix_fmt yuv420p /home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4"


DEBUG:chainerrl.agents.a3c:t:1 r:0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.60634923  0.13875708  0.2586704   0.20290095  0.21846941 -0.03822245]] probs:[[0.08493027 0.17891966 0.20171386 0.19077232 0.1937656  0.14989834]] entropy:[1.758759]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:2 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.7547961   0.16335626  0.22208175  0.29930896  0.2036087  -0.14062813]] probs:[[0.07415171 0.1857247  0.1969581  0.21277136 0.19335307 0.13704112]] entropy:[1.7449545]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:3 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.7691495   0.16478455  0.20099114  0.2875876   0.22889172 -0.14234364]] probs:[[0.07329032 0.18648717 0.19336297 0.21085392 0.19883388 0.13717182]] entropy:[1.7443295]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:4 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.73730564  0.15099406  0.2197565   0.23901187  0.28193402 -0.10969054]] probs:[[0.07499674 0.18231653 0.19529411 0.19909102 0.20782247 0.14047924]] entropy:[1.7470868]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:5 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.75023735  0.15719682  0.22093913  0.24254379  0.2991786  -0.09720346]] probs:[[0.07355602 0.1822686  0.19426511 0.19850782 0.21007472 0.1413277 ]] entropy:[1.7458229]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:6 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.759686    0.16171262  0.2248088   0.24335058  0.31272942 -0.08712348]] probs:[[0.07247694 0.18212022 0.19398157 0.19761188 0.21180876 0.14200056]] entropy:[1.7448444]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:7 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.799917    0.17442016  0.19864097  0.28456092  0.2951765  -0.11397191]] probs:[[0.06994582 0.18531519 0.18985847 0.2068924  0.20910038 0.13888775]] entropy:[1.7412661]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:8 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.80110663  0.16760615  0.19409892  0.27544212  0.30147058 -0.11087096]] probs:[[0.07002572 0.18448633 0.1894392  0.20549287 0.21091175 0.13964427]] entropy:[1.7414908]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-1.4929854] v_loss:[[0.00039053]]
DEBUG:chainerrl.agents.a3c:grad norm:1340.929549580207
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:9 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.07060196  0.3239095  -0.21825756 -0.1231495   0.05475973 -0.37249547]] probs:[[0.16212259 0.24053466 0.13986772 0.15382342 0.18377542 0.11987617]] entropy:[1.7663956]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:10 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-2.5427772e-04  2.6618457e-01 -1.8516220e-01 -1.4514636e-01
   8.5996665e-02 -3.1775403e-01]] probs:[[0.17183161 0.22429313 0.14282314 0.14865422 0.18731017 0.12508771]] entropy:[1.7729874]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:11 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.0124065   0.25413674 -0.15417244 -0.13253734  0.10406824 -0.2794628 ]] probs:[[0.17158729 0.21850793 0.14525825 0.14843516 0.18805864 0.12815273]] entropy:[1.7757211]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:12 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.02540056  0.22672644 -0.13837816 -0.08272663  0.047015   -0.25300112]] probs:[[0.16678452 0.21461156 0.14896704 0.15749231 0.17931038 0.13283426]] entropy:[1.7800499]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:13 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.05310221  0.2091924  -0.1313255  -0.07686409  0.04131866 -0.2247425 ]] probs:[[0.16280043 0.21162581 0.15055099 0.15897758 0.1789213  0.1371239 ]] entropy:[1.7819183]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:14 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.0950693   0.18249048 -0.11161964 -0.06294739  0.03489757 -0.18218322]] probs:[[0.15645784 0.20650949 0.15388973 0.16156515 0.17817272 0.14340498]] entropy:[1.784343]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:15 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.1033887   0.12423603 -0.05708922 -0.06071892  0.06743551 -0.11780668]] probs:[[0.15340804 0.19262129 0.16067775 0.16009559 0.18198523 0.15121207]] entropy:[1.7876306]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:16 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.16464636  0.06301253 -0.00204883  0.05791179  0.0636104  -0.01514949]] probs:[[0.14086202 0.17687435 0.16573304 0.17597446 0.17698014 0.16357598]] entropy:[1.7887442]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-0.44003344] v_loss:[[0.01672273]]
DEBUG:chainerrl.agents.a3c:grad norm:2082.9647788408893
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:17 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.28106025  0.10944391 -0.13720883  0.31766114 -0.20630199  0.08053552]] probs:[[0.12554187 0.18551642 0.14496475 0.22845988 0.13528687 0.18023022]] entropy:[1.7697496]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:18 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.2920463   0.08044987 -0.1314371   0.33096007 -0.19341579  0.09568201]] probs:[[0.1239558  0.17990361 0.14555211 0.23111868 0.13680485 0.18266489]] entropy:[1.7691339]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:19 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.2895451   0.07637405 -0.13546251  0.337139   -0.19027944  0.0992527 ]] probs:[[0.12407892 0.17890178 0.14474887 0.23220065 0.13702774 0.18304199]] entropy:[1.7687856]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:20 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.26827458  0.07814651 -0.14660841  0.33531833 -0.18325253  0.1075066 ]] probs:[[0.12631172 0.17860451 0.14265355 0.23098336 0.13752076 0.18392609]] entropy:[1.7695459]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:21 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.26131484  0.07651038 -0.1487644   0.33819586 -0.17836495  0.11036806]] probs:[[0.12692107 0.17793007 0.14204101 0.23115213 0.13789815 0.18405752]] entropy:[1.769666]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:22 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.2533237   0.07388432 -0.14643252  0.33663097 -0.16632572  0.11101367]] probs:[[0.1276444  0.17705429 0.14204438 0.2302586  0.13924657 0.18375176]] entropy:[1.7704861]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:23 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.25241885  0.06239733 -0.12538366  0.3361805  -0.13851447  0.09988107]] probs:[[0.12739003 0.17452529 0.14464588 0.22948848 0.142759   0.1811913 ]] entropy:[1.7720119]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:24 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.2540107   0.03271353 -0.11405039  0.32629794 -0.10850066  0.09427596]] probs:[[0.12751618 0.16985878 0.1466727  0.2278191  0.14748895 0.1806443 ]] entropy:[1.7736987]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-1.9627005] v_loss:[[0.00326085]]
DEBUG:chainerrl.agents.a3c:grad norm:139.10414523323658
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:25 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.34784582  0.03372277 -0.04765838  0.24944603 -0.03069156  0.15845546]] probs:[[0.11541694 0.16903734 0.15582576 0.2097346  0.1584922  0.19149321]] entropy:[1.7754354]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:26 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.3668994   0.03960061 -0.05303971  0.2453057  -0.0554871   0.16864306]] probs:[[0.11378544 0.17085488 0.15573786 0.20987657 0.15535717 0.19438815]] entropy:[1.7741377]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:27 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.3730586   0.0372207  -0.05441283  0.23443471 -0.05591264  0.17699946]] probs:[[0.11331595 0.17079419 0.15583935 0.20802811 0.15560581 0.19641662]] entropy:[1.7740786]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:28 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.3770372   0.03392748 -0.05231324  0.23181662 -0.04160005  0.19228333]] probs:[[0.11241181 0.1695476  0.15553847 0.20664924 0.15721373 0.19863911]] entropy:[1.7737573]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:29 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.39974317  0.01778124 -0.03671023  0.2363295  -0.00586747  0.2448159 ]] probs:[[0.1083096  0.16443546 0.15571488 0.20460196 0.16059239 0.20634568]] entropy:[1.771179]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:30 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.4161059   0.00535838 -0.01871771  0.24377967  0.02177116  0.29178664]] probs:[[0.10497784 0.16000636 0.15620004 0.20308708 0.16265418 0.21307449]] entropy:[1.7684264]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:31 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.42059425  0.00346304 -0.01297753  0.2454364   0.02737352  0.3048496 ]] probs:[[0.10407288 0.15903887 0.15644556 0.2025774  0.16288741 0.2149779 ]] entropy:[1.767607]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:32 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.42440924  0.000513   -0.0122985   0.24633603  0.02723393  0.30976167]] probs:[[0.10362896 0.15849754 0.1564799  0.20266657 0.16278982 0.21593721]] entropy:[1.767107]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-1.5612917] v_loss:[[0.00028252]]
DEBUG:chainerrl.agents.a3c:grad norm:13.11952696816266
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:33 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.32004738  0.02335688  0.02068262  0.17596489  0.04964086  0.24859168]] probs:[[0.11532672 0.16258053 0.16214633 0.18938491 0.16691045 0.2036511 ]] entropy:[1.777461]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:34 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.3148805   0.02201774  0.01531912  0.18190189  0.06268123  0.24516536]] probs:[[0.11567724 0.16201712 0.16093548 0.19010684 0.1687411  0.20252223]] entropy:[1.7776537]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:35 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.31211758  0.02135813  0.01208758  0.18557519  0.0697659   0.24309641]] probs:[[0.11586118 0.16172032 0.16022801 0.19058257 0.16974142 0.20186652]] entropy:[1.7777339]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:36 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.30938518  0.02051465  0.01626917  0.18976857  0.07036845  0.24468626]] probs:[[0.11593744 0.16124915 0.160566   0.19098684 0.16949177 0.20176874]] entropy:[1.7777282]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:37 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.30893636  0.02042816  0.01595388  0.19015397  0.07121714  0.24448076]] probs:[[0.11597053 0.16120882 0.16048914 0.19102922 0.16960795 0.2016943 ]] entropy:[1.7777421]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:38 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.310816    0.02115512  0.01065693  0.18724103  0.07305801  0.2420743 ]] probs:[[0.11594711 0.16159695 0.15990935 0.19079341 0.17020579 0.20154737]] entropy:[1.7777717]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:39 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.31059313  0.0210673   0.01031876  0.1876102   0.0737377   0.2418418 ]] probs:[[0.11596172 0.1615671  0.1598398  0.19084536 0.17030501 0.201481  ]] entropy:[1.7777767]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:40 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.307281    0.01991567  0.01438034  0.19193979  0.07482928  0.24349417]] probs:[[0.11609144 0.16102745 0.16013858 0.19125333 0.17011735 0.20137188]] entropy:[1.7777872]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-1.5696024] v_loss:[[0.00025034]]
DEBUG:chainerrl.agents.a3c:grad norm:4.152852756239151
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:41 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.20775838  0.05443209  0.01460036  0.18616134  0.13317147  0.13297011]] probs:[[0.12748921 0.16570717 0.1592365  0.18903865 0.17928228 0.17924619]] entropy:[1.784206]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:42 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.20777032  0.05442606  0.01469414  0.18613689  0.13310601  0.13300033]] probs:[[0.1274875  0.16570592 0.15925118 0.18903373 0.17927028 0.17925134]] entropy:[1.7842073]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:43 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.20771311  0.0543985   0.01481929  0.1860321   0.13320243  0.13301094]] probs:[[0.12749198 0.16569771 0.15926762 0.18900977 0.17928362 0.1792493 ]] entropy:[1.7842114]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:44 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.20771864  0.05439894  0.01485393  0.18602434  0.1331793   0.13302375]] probs:[[0.12749109 0.16569753 0.15927288 0.18900801 0.1792792  0.17925131]] entropy:[1.7842119]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:45 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.17955005  0.08187295  0.05545888  0.18884443  0.17555773  0.15658298]] probs:[[0.12763293 0.16576669 0.16144544 0.18448217 0.18204723 0.17862548]] entropy:[1.7846699]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:46 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.17703915  0.08706215  0.05928162  0.19035637  0.18023758  0.16014361]] probs:[[0.1274984  0.16603608 0.16148698 0.18410373 0.1822502  0.17862463]] entropy:[1.7846568]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:47 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.17837338  0.08327791  0.06271922  0.19142203  0.1845699   0.16541228]] probs:[[0.12711355 0.16512984 0.16176964 0.18398905 0.18273263 0.17926523]] entropy:[1.7844777]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:48 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.17626044  0.08361863  0.07059471  0.18866965  0.20080805  0.17283039]] probs:[[0.12669514 0.16429487 0.16216898 0.18249337 0.18472205 0.17962559]] entropy:[1.7842866]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-1.2543812] v_loss:[[0.00046001]]
DEBUG:chainerrl.agents.a3c:grad norm:3.6845358836068343
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:49 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.14359494  0.10137572  0.10087998  0.17242199  0.18589914  0.13698614]] probs:[[0.13088405 0.16721533 0.16713247 0.17952755 0.18196344 0.17327723]] entropy:[1.7863133]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:50 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.13768503  0.11263974  0.10744017  0.16719405  0.1946237   0.1440674 ]] probs:[[0.13091886 0.16815776 0.16728568 0.17758633 0.18252489 0.17352648]] entropy:[1.786389]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:51 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.13471813  0.11785172  0.10825285  0.16519578  0.19861908  0.14477368]] probs:[[0.13105853 0.16871549 0.16710377 0.17689529 0.18290763 0.17331935]] entropy:[1.7864331]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:52 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.13477223  0.11835485  0.10877294  0.16486745  0.19954535  0.14569938]] probs:[[0.13099423 0.16872671 0.1671177  0.17676    0.18299718 0.17340413]] entropy:[1.7864136]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:53 r:0.05 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.13440304  0.11852445  0.10884858  0.16461267  0.1992535   0.1449022 ]] probs:[[0.13106187 0.16878015 0.16715492 0.17674096 0.1829707  0.17329143]] entropy:[1.786437]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:54 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.17037018  0.11201362  0.09430213  0.20578131  0.1698104   0.1635246 ]] probs:[[0.12678197 0.16814935 0.1651974  0.1846792  0.17815417 0.17703785]] entropy:[1.7848969]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:55 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.16098326  0.11129253  0.10417514  0.19125931  0.18350145  0.1609422 ]] probs:[[0.12771581 0.16768436 0.16649511 0.18164426 0.18024054 0.17621997]] entropy:[1.7853413]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:56 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.13559154  0.11954979  0.09795438  0.17196022  0.19937146  0.1395012 ]] probs:[[0.13109    0.16919051 0.16557595 0.17829435 0.1832492  0.1726    ]] entropy:[1.7863367]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-1.0737832] v_loss:[[0.00204795]]
DEBUG:chainerrl.agents.a3c:grad norm:14.196384886731272
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:57 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.10038325  0.14086027  0.08617508  0.20906143  0.19989528  0.0819251 ]] probs:[[0.1353031  0.17221798 0.16305308 0.18437323 0.18269096 0.16236158]] entropy:[1.7867609]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:58 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.10138845  0.14148758  0.08572309  0.20888864  0.19957267  0.0803711 ]] probs:[[0.13522726 0.17240268 0.16305187 0.18442334 0.18271324 0.16218156]] entropy:[1.786727]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:59 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.10623926  0.14193793  0.08742945  0.21048395  0.20507093  0.06566653]] probs:[[0.13475667 0.17271589 0.16355339 0.18497005 0.18397151 0.16003244]] entropy:[1.7863781]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:60 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.1075504   0.1379606   0.08874558  0.20663913  0.20370872  0.06987131]] probs:[[0.13470575 0.1721909  0.16392168 0.18443227 0.1838926  0.16085678]] entropy:[1.7864863]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:61 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.10783983  0.13661247  0.08903867  0.20550548  0.20358089  0.07119745]] probs:[[0.13469936 0.17200054 0.16400942 0.1842679  0.1839136  0.16110924]] entropy:[1.7865156]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:62 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.10881498  0.1450924   0.09266563  0.22404636  0.19609682  0.08145912]] probs:[[0.1338096  0.17248757 0.16367757 0.1866582  0.18151343 0.16185355]] entropy:[1.78629]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:63 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.12053756  0.14851822  0.09674333  0.23037814  0.19136606  0.0785649 ]] probs:[[0.13230805 0.17315528 0.16441831 0.18792607 0.18073586 0.16145645]] entropy:[1.7858443]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:64 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.11705586  0.15155096  0.09612899  0.23826455  0.18557549  0.08207291]] probs:[[0.13251796 0.17335214 0.16400601 0.18905513 0.17935187 0.16171686]] entropy:[1.7858548]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-1.7147512] v_loss:[[0.00261354]]
DEBUG:chainerrl.agents.a3c:grad norm:13.680416637247195
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:65 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.08153298  0.16599302  0.07048701  0.14482315  0.15300131  0.08252038]] probs:[[0.14002354 0.17934954 0.16301312 0.17559265 0.17703456 0.16498657]] entropy:[1.7884358]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:66 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.07330146  0.16603018  0.0716082   0.14658041  0.14518476  0.08485227]] probs:[[0.14108746 0.17923748 0.16308795 0.17578503 0.17553988 0.16526227]] entropy:[1.7887034]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:67 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.06942276  0.16616003  0.07162801  0.1462221   0.13945414  0.0858996 ]] probs:[[0.14168082 0.17931779 0.16314307 0.17577797 0.17459232 0.16548806]] entropy:[1.788845]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:68 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.07441457  0.1657419   0.07321732  0.15002912  0.14981729  0.08460367]] probs:[[0.1407285  0.17892899 0.16311646 0.17613949 0.17610218 0.16498438]] entropy:[1.788614]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:69 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.0726765   0.16705397  0.07031988  0.14690118  0.14339799  0.08591639]] probs:[[0.14117835 0.17942451 0.16288109 0.17584479 0.17522985 0.16544138]] entropy:[1.788714]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:70 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.07347398  0.16715853  0.07008476  0.1466518   0.14410844  0.08583419]] probs:[[0.141075   0.17945494 0.16285339 0.17581238 0.17536579 0.16543853]] entropy:[1.7886889]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:71 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.08023237  0.16326524  0.07582331  0.14360239  0.15468672  0.07796919]] probs:[[0.14021966 0.17887866 0.16390154 0.17539576 0.17735071 0.16425362]] entropy:[1.7885028]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:72 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.07508678  0.16411622  0.07520173  0.14666407  0.14991131  0.08160216]] probs:[[0.14079314 0.17884058 0.1636255  0.1757465  0.17631812 0.16467613]] entropy:[1.7886472]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-1.4132718] v_loss:[[8.7391236e-05]]
DEBUG:chainerrl.agents.a3c:grad norm:0.8970892676467966
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:73 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.014888    0.13581026  0.12444028  0.0991144   0.1082377   0.08244622]] probs:[[0.15001146 0.1744102  0.1724384  0.16812608 0.16966695 0.16534694]] entropy:[1.7905837]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:74 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.01572322  0.13555437  0.12484401  0.0984      0.10940506  0.08140603]] probs:[[0.14991531 0.1743994  0.1725415  0.1680386  0.1698981  0.16520709]] entropy:[1.790566]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:75 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.01646437  0.13525857  0.12951797  0.08661944  0.11542476  0.05654488]] probs:[[0.1504592  0.1751101  0.17410775 0.16679671 0.17167121 0.16185506]] entropy:[1.7904259]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:76 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.0216755   0.13307095  0.12716472  0.09020051  0.11925936  0.06749433]] probs:[[0.14945807 0.17447166 0.17344423 0.16715005 0.1720785  0.16339748]] entropy:[1.7904028]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:77 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.0133997   0.13593255  0.13006818  0.08525234  0.11401717  0.05168198]] probs:[[0.15100841 0.17532964 0.17430446 0.1666653  0.17152902 0.16116317]] entropy:[1.790444]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:78 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.00633421  0.13771355  0.1285765   0.08435587  0.10948382  0.04065647]] probs:[[0.15231812 0.17591818 0.17431812 0.16677761 0.17102149 0.15964648]] entropy:[1.7904918]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:79 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.000544   0.13756712 0.1322832  0.08088248 0.10678054 0.03448231]] probs:[[0.15342306 0.17595395 0.17502667 0.16625747 0.17061947 0.15871935]] entropy:[1.7905189]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:80 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.01909529  0.13080454  0.13282542  0.08311467  0.1146046   0.06229756]] probs:[[0.150122   0.1743994  0.1747522  0.16627753 0.17159691 0.1628519 ]] entropy:[1.7904232]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-1.2089198] v_loss:[[0.00114497]]
DEBUG:chainerrl.agents.a3c:grad norm:11.118259200167625
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:81 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.00657944  0.10231699  0.11789706  0.08132569  0.08428051  0.13874389]] probs:[[0.15172167 0.16917679 0.17183322 0.16566256 0.16615278 0.17545299]] entropy:[1.7907388]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:82 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.0055436   0.10213148  0.11890454  0.08147614  0.08342323  0.14045446]] probs:[[0.15180576 0.16906393 0.17192356 0.16560768 0.16593044 0.1756687 ]] entropy:[1.7907332]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:83 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[5.2448595e-06 1.0047723e-01 1.2300955e-01 8.1367902e-02 7.7445149e-02
  1.4329882e-01]] probs:[[0.15253362 0.1686553  0.17249863 0.16546302 0.1648152  0.17603426]] entropy:[1.790757]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:84 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.02117483 0.09261364 0.13335758 0.08144277 0.0480471  0.13323095]] probs:[[0.15623285 0.16780226 0.17478038 0.16593818 0.16048808 0.17475826]] entropy:[1.7909167]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:85 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.05553158 0.0769653  0.14590256 0.08635192 0.00907554 0.12244905]] probs:[[0.16203885 0.16554944 0.17736453 0.16711071 0.15468335 0.17325312]] entropy:[1.7907841]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:86 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.07621075  0.04320788  0.15218598  0.06286438 -0.05913982  0.01478321]] probs:[[0.17102656 0.16547431 0.18452668 0.16875914 0.14937624 0.16083698]] entropy:[1.789728]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:87 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.0743264   0.04001724  0.14431992  0.06015915 -0.06078495  0.01100699]] probs:[[0.1713218  0.16554359 0.18374284 0.16891176 0.14966992 0.16081011]] entropy:[1.789828]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:88 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.0796291   0.03895064  0.14556462  0.06917641 -0.06646527  0.01041193]] probs:[[0.17196617 0.16511121 0.183687   0.17017801 0.14859186 0.16046576]] entropy:[1.7896569]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-0.92878246] v_loss:[[0.00254923]]
DEBUG:chainerrl.agents.a3c:grad norm:51.82784661254655
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:89 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.10422171  0.00273     0.11176426  0.11400598  0.05844339 -0.03747972]] probs:[[0.17409393 0.15729187 0.175412   0.17580566 0.16630386 0.15109268]] entropy:[1.7901043]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:90 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.09500162 -0.00602898  0.09453234  0.09632719  0.0388436  -0.04442834]] probs:[[0.17482498 0.15802525 0.17474295 0.17505687 0.16527775 0.15207219]] entropy:[1.7902703]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:91 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.09499725 -0.00598245  0.10456568  0.09591982  0.04872435 -0.04480606]] probs:[[0.17425248 0.15751578 0.1759278  0.17441332 0.16637303 0.15151763]] entropy:[1.790193]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:92 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.09553441 -0.00544254  0.09331403  0.09708329  0.03726469 -0.04366208]] probs:[[0.17492491 0.15812404 0.17453694 0.17519605 0.16502336 0.15219465]] entropy:[1.7902822]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:93 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.09984008 -0.00406074  0.10745636  0.10274929  0.05078756 -0.04219903]] probs:[[0.17447159 0.1572538  0.17580548 0.1749799  0.16611983 0.15136935]] entropy:[1.790133]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:94 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.09926938 -0.00388884  0.08737808  0.10310377  0.03152629 -0.03861895]] probs:[[0.17544778 0.15825114 0.17337382 0.1761218  0.16395605 0.15284939]] entropy:[1.7903079]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:95 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.09602902 -0.00291346  0.08988702  0.10619302  0.02325095 -0.03875418]] probs:[[0.175021   0.15853311 0.17394932 0.17680898 0.16273578 0.15295178]] entropy:[1.7902652]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:96 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.08028652 -0.00199525  0.1061103   0.10466733  0.02727815 -0.03278801]] probs:[[0.17201905 0.1584317  0.1765191  0.17626455 0.16313808 0.15362749]] entropy:[1.7903492]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-1.5560609] v_loss:[[0.00061023]]
DEBUG:chainerrl.agents.a3c:grad norm:4.2899901364972735
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:97 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.05732596  0.03332276  0.10151005  0.08751241  0.03653198 -0.05239759]] probs:[[0.16870214 0.16470096 0.17632322 0.1738723  0.16523038 0.15117092]] entropy:[1.7905575]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:98 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.08509845  0.04512141  0.07721282  0.08073656  0.03675561 -0.09359416]] probs:[[0.17428361 0.16745369 0.17291467 0.17352505 0.16605864 0.14576435]] entropy:[1.7899607]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:99 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.08255305  0.04952937  0.06822245  0.07100805  0.04536107 -0.10477046]] probs:[[0.17438388 0.16871913 0.17190267 0.17238219 0.16801731 0.14459483]] entropy:[1.7898566]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:100 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.08049561  0.05373628  0.06987657  0.08525016  0.05439739 -0.08645088]] probs:[[0.17276283 0.1682011  0.17093796 0.17358619 0.16831234 0.14619966]] entropy:[1.7901268]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:101 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.06350028  0.08553223  0.05744328  0.10615855  0.11912814 -0.0636012 ]] probs:[[0.16673149 0.17044567 0.16572465 0.17399785 0.17626922 0.14683113]] entropy:[1.7900537]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:102 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.06610184  0.14864506  0.00784914  0.1317771   0.16947915 -0.05985933]] probs:[[0.16426037 0.17839426 0.15496513 0.17541035 0.18214992 0.14482   ]] entropy:[1.788496]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:103 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.08512717  0.182915   -0.0269089   0.15695363  0.1624014  -0.05014443]] probs:[[0.16597436 0.1830247  0.14838307 0.17833428 0.17930846 0.14497507]] entropy:[1.7875946]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:104 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.09818447  0.1816496  -0.05804572  0.14569034  0.15166046 -0.0551047 ]] probs:[[0.16938029 0.18412438 0.1448815  0.17762104 0.17868462 0.14530823]] entropy:[1.7871567]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-1.8846614] v_loss:[[0.00245136]]
DEBUG:chainerrl.agents.a3c:grad norm:247.26010663340523
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:105 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.04705618  0.17558542 -0.04868085  0.16081798  0.18207552  0.00745117]] probs:[[0.14722477 0.18393861 0.14698577 0.18124227 0.18513627 0.15547232]] entropy:[1.7865107]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:106 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.04525408  0.11780501 -0.03542916  0.1512428   0.18604201  0.04700022]] probs:[[0.1479074  0.17410272 0.14936775 0.18002276 0.18639769 0.1622017 ]] entropy:[1.7878616]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:107 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.01751514  0.06321443  0.00440012  0.1330205   0.16138257  0.06131594]] probs:[[0.15275078 0.16559371 0.1561353  0.17756617 0.18267442 0.16527963]] entropy:[1.7897184]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:108 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.01870153  0.05619752  0.01024365  0.12890412  0.16049065  0.0766487 ]] probs:[[0.15238109 0.16423258 0.15685624 0.17661817 0.18228598 0.16762592]] entropy:[1.7898123]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:109 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.02157087  0.05523091  0.00460149  0.12729771  0.16647957  0.06606948]] probs:[[0.15231466 0.16447364 0.15635371 0.17676426 0.1838277  0.166266  ]] entropy:[1.7896276]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:110 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.02152195  0.05495711  0.00356864  0.12779103  0.16727631  0.0610637 ]] probs:[[0.15244338 0.16455951 0.15631665 0.1769923  0.1841207  0.16556749]] entropy:[1.7895923]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:111 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.02110074  0.0550777   0.00425337  0.12853484  0.16710117  0.06149783]] probs:[[0.15245233 0.16451973 0.15636705 0.17705981 0.18402176 0.16557936]] entropy:[1.7896016]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:112 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.0178647   0.05286017  0.00285915  0.12399115  0.16548768  0.06552205]] probs:[[0.15302601 0.16424066 0.1562304  0.17634879 0.1838206  0.16633348]] entropy:[1.7897036]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-1.9376061] v_loss:[[0.00362368]]
DEBUG:chainerrl.agents.a3c:grad norm:16.640556982458435
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:113 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.02967255  0.02132643  0.04016183  0.12695381  0.1934279   0.10661468]] probs:[[0.14947307 0.15729378 0.16028456 0.17481752 0.18683329 0.17129779]] entropy:[1.7890145]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:114 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.03033244  0.01908236  0.03727486  0.12405711  0.19688627  0.10623755]] probs:[[0.14949948 0.15707254 0.15995623 0.17445773 0.18763745 0.1713765 ]] entropy:[1.7889123]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:115 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.03024578  0.01907497  0.03711461  0.12355148  0.1969494   0.10653654]] probs:[[0.14951827 0.1570775  0.15993685 0.17437634 0.18765661 0.17143445]] entropy:[1.7889136]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:116 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.03033313  0.01905734  0.03737278  0.12390289  0.19682509  0.10606823]] probs:[[0.14950772 0.15707737 0.15998082 0.17444055 0.18763644 0.17135705]] entropy:[1.7889158]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:117 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.03368714  0.01871686  0.0348634   0.1211271   0.19039313  0.10279394]] probs:[[0.14948572 0.15752825 0.16009244 0.17451577 0.18703227 0.17134549]] entropy:[1.7890115]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:118 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.02676985  0.02967354  0.03910904  0.12467363  0.19564681  0.10221184]] probs:[[0.14978048 0.15847775 0.15998013 0.17427148 0.18708956 0.17040065]] entropy:[1.7891176]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:119 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.04717048  0.04592541  0.05743043  0.1159986   0.1821677   0.08358158]] probs:[[0.14743778 0.16182284 0.16369537 0.17356902 0.18544242 0.16803266]] entropy:[1.7893406]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:120 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.0782398   0.07011852  0.09342715  0.11134696  0.17343578  0.05882503]] probs:[[0.14308113 0.16596387 0.16987771 0.17294931 0.18402791 0.16410011]] entropy:[1.7889624]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-1.4674652] v_loss:[[0.00010922]]
DEBUG:chainerrl.agents.a3c:grad norm:0.35267968162092916
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:121 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.07739469  0.07272866  0.11778578  0.12438195  0.16605663  0.02982559]] probs:[[0.14306343 0.1662365  0.17389795 0.1750488  0.18249804 0.15925528]] entropy:[1.7887442]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:122 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.07908551  0.0739817   0.12016146  0.12438241  0.16542052  0.02879553]] probs:[[0.1428074  0.16642822 0.17429405 0.1750313  0.18236367 0.15907533]] entropy:[1.7886927]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:123 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.07953162  0.07416383  0.12020068  0.12425298  0.16486171  0.02841291]] probs:[[0.14277396 0.16649382 0.17433783 0.17504574 0.18230042 0.15904818]] entropy:[1.7886894]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:124 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.08371604  0.07217449  0.11526586  0.12170732  0.15581232  0.02545099]] probs:[[0.14279795 0.16688772 0.17423633 0.17536229 0.18144618 0.1592695 ]] entropy:[1.7887663]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:125 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.08020685  0.0738904   0.11951375  0.12393139  0.16354632  0.02796098]] probs:[[0.14276746 0.16655314 0.17432785 0.17509967 0.18217546 0.15907647]] entropy:[1.7886988]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:126 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.07895072  0.0743794   0.12100522  0.12458882  0.16611107  0.02881134]] probs:[[0.14276995 0.16642831 0.17437193 0.17499793 0.1824172  0.1590147 ]] entropy:[1.7886772]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:127 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.0850941   0.06997673  0.1209363   0.12247365  0.16160233  0.02633673]] probs:[[0.1423512  0.16622928 0.17491981 0.17518893 0.18217972 0.15913105]] entropy:[1.7886033]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:128 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.06535503  0.0454141   0.14888455  0.12432344  0.12079682  0.03891148]] probs:[[0.14536113 0.16238828 0.18009071 0.17572136 0.17510276 0.16133575]] entropy:[1.7892158]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-1.7078167] v_loss:[[0.00092044]]
DEBUG:chainerrl.agents.a3c:grad norm:3.0755685309854117
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:129 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.03056978  0.04002851  0.13254464  0.10233074  0.08543532  0.07746295]] probs:[[0.15083995 0.16187389 0.17756447 0.17227979 0.16939351 0.16804841]] entropy:[1.7904432]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:130 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.02419831  0.04523642  0.13318059  0.10165593  0.09175956  0.07506054]] probs:[[0.15142892 0.16231698 0.17723832 0.17173809 0.17004688 0.16723086]] entropy:[1.7905437]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:131 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.03407852  0.02899772  0.14046016  0.11474064  0.09302414  0.09576479]] probs:[[0.14946674 0.15919824 0.17796957 0.17345065 0.16972451 0.1701903 ]] entropy:[1.7900939]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:132 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.02611258  0.04263783  0.13432261  0.10349784  0.09167748  0.07892076]] probs:[[0.15107267 0.16182433 0.17736258 0.17197882 0.16995794 0.1678036 ]] entropy:[1.790479]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:133 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.03363289  0.03634641  0.13564385  0.10730767  0.08860665  0.08579616]] probs:[[0.15003373 0.16090907 0.17770712 0.17274226 0.16954182 0.169066  ]] entropy:[1.7902907]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:134 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.03324802  0.0341621   0.13956003  0.11119034  0.09194535  0.08966928]] probs:[[0.14974721 0.1601897  0.1779952  0.1730165  0.16971862 0.16933277]] entropy:[1.7901975]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:135 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.03600911  0.02770921  0.13574101  0.10917951  0.08633549  0.09471088]] probs:[[0.14971802 0.15956828 0.17777234 0.1731126  0.16920283 0.17062593]] entropy:[1.7901626]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:136 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.03200003  0.03706355  0.13081768  0.10159722  0.08262209  0.07973335]] probs:[[0.15080856 0.16159202 0.17747486 0.17236398 0.1691242  0.16863634]] entropy:[1.7904327]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-1.7345409] v_loss:[[0.00109582]]
DEBUG:chainerrl.agents.a3c:grad norm:11.711132446399937
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:137 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.01434308 0.00429097 0.13311696 0.09887564 0.08770344 0.10465769]] probs:[[0.15686376 0.15529485 0.17664668 0.17070045 0.16880396 0.1716903 ]] entropy:[1.7906382]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:138 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.01436319 0.00428819 0.13309601 0.09886166 0.08770105 0.10465354]] probs:[[0.15686762 0.15529512 0.17664377 0.17069884 0.16880432 0.17169037]] entropy:[1.7906388]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:139 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.01436417 0.00444121 0.13306245 0.09852798 0.0877354  0.10513804]] probs:[[0.15685993 0.15531112 0.17662902 0.17063335 0.16880168 0.17176498]] entropy:[1.7906396]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:140 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.01535511 0.00314148 0.13265806 0.09701183 0.08553187 0.11076694]] probs:[[0.1569806  0.15507497 0.17651843 0.17033704 0.16839275 0.17269623]] entropy:[1.7906175]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:141 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.01683965 -0.00087484  0.13280925  0.09839316  0.08105043  0.1133215 ]] probs:[[0.15728262 0.15452097 0.17662236 0.17064713 0.16771315 0.17321372]] entropy:[1.7905674]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:142 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.0147933  0.006304   0.13277978 0.09471166 0.08855201 0.11129674]] probs:[[0.15679303 0.1554676  0.17642805 0.169838   0.16879506 0.17267825]] entropy:[1.7906449]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:143 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.01450644 0.00549598 0.13289127 0.09633321 0.08813392 0.10857639]] probs:[[0.15681313 0.1554065  0.17652096 0.17018422 0.16879456 0.17228064]] entropy:[1.7906435]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:144 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.01445491 0.00518353 0.13295715 0.09702591 0.08799538 0.10749079]] probs:[[0.15682657 0.1553793  0.17655684 0.17032555 0.16879435 0.17211735]] entropy:[1.7906425]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-1.3091135] v_loss:[[0.00021175]]
DEBUG:chainerrl.agents.a3c:grad norm:0.7715512257079404
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:145 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.05422442 0.00094154 0.1259651  0.08628634 0.0867179  0.09225252]] probs:[[0.16321582 0.15474683 0.17535527 0.16853362 0.16860637 0.16954212]] entropy:[1.7910178]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:146 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.05433273 0.00037315 0.12593682 0.08668289 0.08619515 0.09223314]] probs:[[0.16324976 0.15467432 0.17536779 0.16861728 0.16853505 0.16955574]] entropy:[1.791012]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:147 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.05469245 -0.00073507  0.12588605  0.08715888  0.08496136  0.09297532]] probs:[[0.16332863 0.15452205 0.1753805  0.16871835 0.16834798 0.16970254]] entropy:[1.7909998]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:148 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.05435242 0.00013735 0.12591878 0.08677186 0.0858939  0.09230918]] probs:[[0.16326267 0.15464704 0.17537503 0.16864228 0.16849428 0.1695787 ]] entropy:[1.7910095]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:149 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.05444043 -0.00016778  0.12587894  0.08688205  0.08555847  0.0925366 ]] probs:[[0.16328342 0.1546059  0.1753749  0.16866747 0.16844437 0.1696239 ]] entropy:[1.7910064]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:150 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.05418419 0.0009368  0.12608904 0.08624332 0.08688618 0.09171888]] probs:[[0.1632182  0.15475458 0.17538662 0.1685356  0.168644   0.16946097]] entropy:[1.7910177]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:151 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.05525118 -0.0019426   0.12593485  0.0876464   0.08381318  0.09389121]] probs:[[0.16342676 0.15434204 0.17539641 0.1688077  0.16816185 0.16986518]] entropy:[1.7909847]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:152 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.05725806 0.00420617 0.12457939 0.08820102 0.08456814 0.08895505]] probs:[[0.16368522 0.15522772 0.1750841  0.16882929 0.16821706 0.16895664]] entropy:[1.791085]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-1.2638742] v_loss:[[0.00038154]]
DEBUG:chainerrl.agents.a3c:grad norm:0.8196312773625074
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:153 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.09048934 -0.01553715  0.09192096  0.0768159   0.18814921  0.05469808]] probs:[[0.16793507 0.15104094 0.16817567 0.16565445 0.18516314 0.16203076]] entropy:[1.7899328]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:154 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.05930931 -0.00351135  0.11256891  0.08050856  0.20432957  0.05773688]] probs:[[0.16208157 0.15221274 0.17094798 0.16555426 0.18737651 0.16182691]] entropy:[1.7896845]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:155 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.06000787 -0.00346086  0.11266612  0.08219346  0.20337443  0.05764443]] probs:[[0.16215871 0.15218651 0.17092653 0.1657965  0.18715593 0.1617759 ]] entropy:[1.7897105]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:156 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.07862671 -0.01382988  0.0962582   0.07826623  0.18851003  0.05668861]] probs:[[0.16601464 0.15135369 0.1689677  0.16595481 0.1852969  0.16241226]] entropy:[1.7899555]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:157 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.06022361 -0.00378082  0.11179466  0.08195217  0.2032928   0.05772187]] probs:[[0.16222699 0.15216905 0.1708127  0.16579051 0.18717906 0.16182165]] entropy:[1.7897122]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:158 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.06057458 -0.00351135  0.11280753  0.08238235  0.20414881  0.05756946]] probs:[[0.16220641 0.15213734 0.1709041  0.16578262 0.18724985 0.16171968]] entropy:[1.789695]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:159 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.05962619 -0.00362425  0.11270359  0.08169255  0.20345451  0.05786248]] probs:[[0.16211514 0.15217884 0.17095225 0.1657322  0.18719207 0.16182947]] entropy:[1.7897048]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:160 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.06087536 -0.00474403  0.11068988  0.08199634  0.20476143  0.05812742]] probs:[[0.1623135  0.15200453 0.17060384 0.16577817 0.18743193 0.16186808]] entropy:[1.7896761]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-1.6637211] v_loss:[[0.00058695]]
DEBUG:chainerrl.agents.a3c:grad norm:9.981183604845723
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:161 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.14653246 -0.01294022  0.04472991  0.0498426   0.10952231  0.12290427]] probs:[[0.17844544 0.15214138 0.16117333 0.16199946 0.17196186 0.17427851]] entropy:[1.7902886]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:162 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.12531419 -0.00890776  0.04124672  0.03764394  0.12303012  0.11216043]] probs:[[0.17560871 0.15355155 0.16144924 0.16086861 0.17520806 0.17331392]] entropy:[1.7904631]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:163 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.12622601 -0.00821102  0.04381759  0.03996825  0.12682241  0.11207644]] probs:[[0.17546913 0.15339652 0.16158877 0.16096798 0.17557381 0.17300381]] entropy:[1.7904584]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:164 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.14419611 -0.00618955  0.04507136  0.02938292  0.12601529  0.11868247]] probs:[[0.1781156  0.15324642 0.16130678 0.15879588 0.17490658 0.17362872]] entropy:[1.7901998]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:165 r:0.05 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.15012908 -0.01045473  0.05154495  0.03601301  0.13057633  0.11883242]] probs:[[0.17857954 0.15208663 0.16181439 0.15932052 0.17512172 0.17307715]] entropy:[1.790118]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:166 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.14727776 -0.00859111  0.04782493  0.03816088  0.12190377  0.12015212]] probs:[[0.17838624 0.15264    0.16149889 0.15994568 0.17391682 0.17361243]] entropy:[1.7902328]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:167 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.14709356 -0.01080136  0.04619833  0.04449814  0.11551479  0.12249499]] probs:[[0.17840974 0.15235113 0.16128735 0.16101336 0.1728638  0.17407465]] entropy:[1.7902613]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:168 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.13457614 -0.01657853  0.04384185  0.03526966  0.11750142  0.11995285]] probs:[[0.17708538 0.15224291 0.16172506 0.16034463 0.17408736 0.17451465]] entropy:[1.7902573]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-1.4182371] v_loss:[[0.00072166]]
DEBUG:chainerrl.agents.a3c:grad norm:9.523307865584293
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:169 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.03449944 0.0031862  0.04167453 0.12419979 0.13847324 0.05293296]] probs:[[0.1613321  0.15635854 0.16249382 0.17647253 0.17900947 0.16433358]] entropy:[1.7905495]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:170 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.03460952 0.00337303 0.04185067 0.12427897 0.13896775 0.05315596]] probs:[[0.16131519 0.15635416 0.16248754 0.1764486  0.17905955 0.16433492]] entropy:[1.7905461]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:171 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.03439227 0.00304501 0.04151488 0.12416048 0.13813512 0.05278486]] probs:[[0.16134013 0.15636101 0.16249341 0.1764933  0.17897706 0.16433504]] entropy:[1.790551]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:172 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.03334354 -0.00416967  0.06453184  0.1423843   0.16044006  0.06285575]] probs:[[0.15934864 0.1534817  0.16439676 0.17770685 0.18094462 0.16412145]] entropy:[1.7900729]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:173 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.02845234 -0.01504886  0.07403491  0.1493495   0.16466977  0.0663    ]] probs:[[0.15830094 0.15156227 0.16568369 0.178644   0.18140195 0.16440707]] entropy:[1.789768]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:174 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.02752859 -0.01667797  0.07588478  0.15086423  0.16486879  0.06715506]] probs:[[0.15809762 0.1512609  0.16593048 0.17885016 0.1813725  0.16448827]] entropy:[1.7897189]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:175 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.04128158 0.00863773 0.03970923 0.1278799  0.14095372 0.06261653]] probs:[[0.16173004 0.15653577 0.16147593 0.17635989 0.17868073 0.1652176 ]] entropy:[1.7905837]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:176 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.04165241 0.00869514 0.03913844 0.12758644 0.14048995 0.06253752]] probs:[[0.16181767 0.15657152 0.16141137 0.17633829 0.17862841 0.16523278]] entropy:[1.7905917]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-1.4010534] v_loss:[[9.998472e-05]]
DEBUG:chainerrl.agents.a3c:grad norm:1.284577891135
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:177 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.05343334  0.03566357 -0.00888147  0.08328124  0.1378935   0.06488357]] probs:[[0.16523688 0.1623266  0.15525444 0.1702432  0.17979912 0.16713975]] entropy:[1.7907531]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:178 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.05134461  0.03912258 -0.0097098   0.08040874  0.13671324  0.06118083]] probs:[[0.165095   0.16308948 0.15531676 0.16996376 0.17980802 0.16672693]] entropy:[1.7907805]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:179 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.05674827  0.03627968 -0.00695616  0.08627249  0.1399046   0.06572973]] probs:[[0.16546102 0.16210869 0.15524913 0.17041896 0.17980842 0.16695379]] entropy:[1.7907448]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:180 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.05614561  0.03858151 -0.00969623  0.08353081  0.1359321   0.06024121]] probs:[[0.16573328 0.16284774 0.15517257 0.17033464 0.17949839 0.16641344]] entropy:[1.790785]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:181 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.06991904  0.04116604 -0.00820055  0.08477423  0.13069268  0.04213067]] probs:[[0.16815966 0.16339342 0.15552312 0.17067637 0.17869627 0.16355112]] entropy:[1.7908381]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:182 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.06322791  0.07858243 -0.05083382  0.06263686  0.07698473 -0.01983058]] probs:[[0.17119671 0.17384563 0.15274219 0.17109555 0.1735681  0.15755187]] entropy:[1.7904943]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:183 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.07059678  0.08648263 -0.06027242  0.05377275  0.05671751 -0.04970565]] probs:[[0.17392723 0.17671229 0.15259203 0.17102557 0.17152993 0.15421298]] entropy:[1.7900909]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:184 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.06985081  0.10267245 -0.05704396  0.06565098  0.04388785 -0.06788058]] probs:[[0.17374064 0.17953771 0.15303533 0.1730125  0.16928788 0.1513859 ]] entropy:[1.7896907]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-1.5448005] v_loss:[[0.0001549]]
DEBUG:chainerrl.agents.a3c:grad norm:1.6018956263302573
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:185 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.10201488  0.10378759 -0.08256529  0.03574123  0.0046758  -0.06500369]] probs:[[0.18107629 0.18139757 0.15055647 0.16946472 0.16428116 0.15322384]] entropy:[1.7891183]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:186 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.10330803  0.1123082  -0.08907532  0.02510972 -0.00699945 -0.07693031]] probs:[[0.18216787 0.18381481 0.15028678 0.16846538 0.16314203 0.15212314]] entropy:[1.7886728]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:187 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.10812292  0.10338037 -0.08965311  0.01363918 -0.01076721 -0.08794723]] probs:[[0.18397488 0.18310443 0.15096125 0.16738817 0.16335228 0.15121898]] entropy:[1.7885671]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:188 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.10060079  0.1119824  -0.08283644  0.01720331 -0.00212634 -0.06994977]] probs:[[0.18150765 0.1835853  0.15108772 0.1669844  0.16378763 0.15304732]] entropy:[1.7889369]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:189 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.09160971  0.0637393  -0.05618609  0.00438983  0.00461972 -0.09322891]] probs:[[0.181834   0.17683618 0.15685132 0.16664642 0.16668473 0.1511474 ]] entropy:[1.7897441]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:190 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.10112954  0.06524304 -0.05944699  0.01293681  0.01228916 -0.10663865]] probs:[[0.1831712  0.17671438 0.15599823 0.1677087  0.16760011 0.14880744]] entropy:[1.7893233]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:191 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.09945185  0.08498515 -0.06256911  0.02798321  0.01936167 -0.09777912]] probs:[[0.1814513  0.1788452  0.15431044 0.16893579 0.16748556 0.1489717 ]] entropy:[1.7892262]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:192 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.10354378  0.07884398 -0.05620147  0.02430353  0.01851739 -0.10549974]] probs:[[0.18242751 0.17797679 0.15549408 0.16852981 0.16755748 0.14801438]] entropy:[1.7891806]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-1.5107807] v_loss:[[9.520062e-05]]
DEBUG:chainerrl.agents.a3c:grad norm:4.624137779963852
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:193 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.17177802 -0.05514157  0.16721088 -0.01175604  0.01018806  0.14393437]] probs:[[0.13752162 0.15453453 0.19301541 0.16138665 0.16496728 0.18857457]] entropy:[1.7851405]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:194 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.17087191 -0.02320338  0.15878658  0.01177004  0.02087796  0.1598205 ]] probs:[[0.13599326 0.15763368 0.1890977  0.1632442  0.16473782 0.18929331]] entropy:[1.7855332]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:195 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.16906159 -0.02547675  0.15704882  0.0105459   0.01980752  0.1563233 ]] probs:[[0.13644116 0.15750831 0.18904854 0.16328561 0.16480494 0.18891144]] entropy:[1.7856725]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:196 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.16676748 -0.02248999  0.15463415  0.0135695   0.02214334  0.15735593]] probs:[[0.13656285 0.15775803 0.18832827 0.16355053 0.16495879 0.18884157]] entropy:[1.7858158]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:197 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.15526815 -0.02962084  0.13379435  0.00934345  0.01223184  0.14338692]] probs:[[0.13930716 0.15795791 0.18599947 0.1642341  0.16470917 0.18779226]] entropy:[1.786753]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:198 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.15758522 -0.0229784   0.13697237  0.01310074  0.01759221  0.15096122]] probs:[[0.13839605 0.1583371  0.18580116 0.16415407 0.16489302 0.18841857]] entropy:[1.7865531]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:199 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.16059408 -0.05104253  0.1440383  -0.00876502  0.0027704   0.13108309]] probs:[[0.139821   0.15600914 0.18961494 0.16274622 0.16463444 0.18717428]] entropy:[1.7863388]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:200 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.15348624 -0.03369325  0.1330226   0.00628363  0.01150586  0.13878451]] probs:[[0.13983816 0.15763445 0.18623225 0.16406384 0.16492286 0.18730842]] entropy:[1.7868606]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-1.1641432] v_loss:[[0.00125346]]
DEBUG:chainerrl.agents.a3c:grad norm:15.517807283038742
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:201 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.06063905 -0.03358058  0.06992193 -0.00569055 -0.00762868  0.05010978]] probs:[[0.15637371 0.16066271 0.17818275 0.16520667 0.16488679 0.17468731]] entropy:[1.7907305]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:202 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.06054106 -0.03344078  0.07002948 -0.00557144 -0.00772479  0.05000019]] probs:[[0.15638255 0.16067849 0.17819451 0.16521949 0.1648641  0.1746609 ]] entropy:[1.7907319]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:203 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.06064345 -0.03360194  0.06991532 -0.00572606 -0.00763519  0.05006544]] probs:[[0.15637615 0.1606625  0.17818515 0.16520411 0.16488902 0.17468306]] entropy:[1.7907307]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:204 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.06086615 -0.03211101  0.07421399 -0.00574979 -0.0055766   0.04749095]] probs:[[0.15620711 0.16076407 0.17879912 0.16505836 0.16508695 0.17408434]] entropy:[1.7907091]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:205 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.0539271  -0.02903542  0.09090222 -0.00744145  0.00072086  0.04349998]] probs:[[0.15656486 0.16051093 0.18096428 0.1640147  0.1653589  0.1725863 ]] entropy:[1.7906051]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:206 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.05647709 -0.0305223   0.08336056 -0.00735868 -0.00185168  0.04557168]] probs:[[0.15648696 0.1606017  0.17997362 0.16436525 0.1652729  0.17329952]] entropy:[1.7906606]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:207 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.05495118 -0.03000921  0.08757102 -0.00781357 -0.00033319  0.04424357]] probs:[[0.15656511 0.16051926 0.18054755 0.16412193 0.16535422 0.17289193]] entropy:[1.7906301]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:208 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.06042167 -0.03302183  0.07042924 -0.00541416 -0.00746621  0.0496839 ]] probs:[[0.15637451 0.16071841 0.17823535 0.16521728 0.16487859 0.17457588]] entropy:[1.7907343]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-1.6916826] v_loss:[[0.00070385]]
DEBUG:chainerrl.agents.a3c:grad norm:4.706497978587734
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:209 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.03880729 -0.00831269  0.08514317 -0.00216059 -0.04387605 -0.00168884]] probs:[[0.16043612 0.16540392 0.18160723 0.16642463 0.15962495 0.16650315]] entropy:[1.7908351]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:210 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.04008494 -0.01604075  0.08537463  0.00175646 -0.03985526  0.01004348]] probs:[[0.15993935 0.16383158 0.18131834 0.16677341 0.1599761  0.16816121]] entropy:[1.7908288]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:211 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.03851639 -0.01659456  0.09282255  0.00320721 -0.03513952  0.00993441]] probs:[[0.15979162 0.1633332  0.18221904 0.16659974 0.16033211 0.16772425]] entropy:[1.7907525]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:212 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.03575192 -0.00901354  0.09003465 -0.00247289 -0.04037249 -0.00104891]] probs:[[0.16062501 0.1649778  0.1821552  0.16606039 0.15988454 0.16629703]] entropy:[1.7908001]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:213 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.03641117 -0.01083863  0.09078634 -0.00147325 -0.03900003  0.00144662]] probs:[[0.16043389 0.1645895  0.18219534 0.16613817 0.16001907 0.166624  ]] entropy:[1.7907913]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:214 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.04220723 -0.03059905  0.09806014  0.01453331 -0.02713925  0.02774065]] probs:[[0.15852201 0.16037288 0.18239246 0.16777669 0.1609287  0.17000729]] entropy:[1.7905804]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:215 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.04354097 -0.03084799  0.09192202  0.01537364 -0.03240287  0.02813851]] probs:[[0.15862851 0.16065481 0.18164025 0.16825484 0.1604052  0.17041636]] entropy:[1.7906308]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:216 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.04624643 -0.03275985  0.08469225  0.01639565 -0.03368762  0.03125692]] probs:[[0.15844484 0.16059618 0.18061092 0.16868761 0.16044725 0.17121324]] entropy:[1.7906826]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-1.8531545] v_loss:[[0.00184169]]
DEBUG:chainerrl.agents.a3c:grad norm:10.536341684051733
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:217 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.00348925 -0.06281592 -0.09435579  0.10443337  0.00172845  0.05681155]] probs:[[0.16661501 0.15592586 0.15108472 0.184312   0.16632189 0.17574044]] entropy:[1.7895079]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:218 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.00318498 -0.0623581  -0.09434173  0.10463795  0.00079467  0.05725706]] probs:[[0.16656707 0.15599984 0.15108934 0.18435274 0.16616939 0.17582165]] entropy:[1.7895043]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:219 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.00337966 -0.06304379 -0.09455228  0.10440023  0.00157523  0.05666183]] probs:[[0.16662033 0.1559124  0.15107642 0.18433197 0.16631995 0.17573899]] entropy:[1.7895042]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:220 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.00339233 -0.06303018 -0.09454338  0.10441351  0.00155534  0.05666853]] probs:[[0.16662145 0.15591359 0.15107685 0.18433331 0.16631564 0.17573912]] entropy:[1.789504]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:221 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.00347099 -0.06307644 -0.09464656  0.10439213  0.00172584  0.05658879]] probs:[[0.16663443 0.15590626 0.15106116 0.18432923 0.16634388 0.17572498]] entropy:[1.7895032]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:222 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.00400968 -0.06321024 -0.095205    0.10433578  0.00244726  0.05638037]] probs:[[0.16671461 0.15587643 0.15096813 0.18430823 0.16645434 0.17567824]] entropy:[1.7894967]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:223 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.0079259  -0.06317639 -0.09872426  0.10458778  0.00670229  0.05508174]] probs:[[0.16725884 0.1557793  0.15033896 0.18423359 0.16705431 0.175335  ]] entropy:[1.7894505]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:224 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.02271025 -0.06679757 -0.1127789   0.10870438  0.02435148  0.04831482]] probs:[[0.16934665 0.15484737 0.1478885  0.18455397 0.1696248  0.17373867]] entropy:[1.7891073]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-1.6587856] v_loss:[[0.00061389]]
DEBUG:chainerrl.agents.a3c:grad norm:1.9500316197451368
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:225 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.1516115  -0.08456328 -0.04516387  0.01013727  0.17546251  0.00815718]] probs:[[0.18623385 0.14705819 0.15296784 0.16166542 0.19072911 0.16134563]] entropy:[1.787052]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:226 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.15622811 -0.0892178  -0.03259218  0.02226315  0.18869954  0.00533485]] probs:[[0.18594773 0.14547725 0.15395269 0.16263372 0.1920848  0.1599038 ]] entropy:[1.7867444]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:227 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.15804388 -0.0893067  -0.03210234  0.0223785   0.19152363  0.00557115]] probs:[[0.18609953 0.14531896 0.1538742  0.16248997 0.19243556 0.15978177]] entropy:[1.7866409]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:228 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.15854733 -0.08909369 -0.03185107  0.02257188  0.19209908  0.00600088]] probs:[[0.1861236  0.14529555 0.1538553  0.16246061 0.19247432 0.15979065]] entropy:[1.7866275]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:229 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.15860392 -0.08901171 -0.03182204  0.02220191  0.19233026  0.00618676]] probs:[[0.1861265  0.1453015  0.15385346 0.16239384 0.19251092 0.1598138 ]] entropy:[1.7866219]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:230 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.15925153 -0.08702366 -0.03211552  0.01952016  0.1942423   0.0076667 ]] probs:[[0.18614751 0.14551283 0.1537261  0.16187236 0.19277626 0.15996493]] entropy:[1.7865914]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:231 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.16157202 -0.08270947 -0.03213202  0.01162084  0.197956    0.01217225]] probs:[[0.18635169 0.14596315 0.15353549 0.16040222 0.19325677 0.16049069]] entropy:[1.7865144]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:232 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.16250941 -0.08826745 -0.02935109  0.01401842  0.19597898  0.01282339]] probs:[[0.1865449  0.1451685  0.15397827 0.16080315 0.19289413 0.1606111 ]] entropy:[1.7864938]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-1.5941095] v_loss:[[0.00040713]]
DEBUG:chainerrl.agents.a3c:grad norm:1.284516505410445
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:233 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.14767577 -0.0547483  -0.04936412  0.02148505  0.20460124  0.02628323]] probs:[[0.18302897 0.14948863 0.15029567 0.16133028 0.19375023 0.16210623]] entropy:[1.7869928]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:234 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.14874348 -0.05579329 -0.04967023  0.02106737  0.20489381  0.02691147]] probs:[[0.183209   0.14931986 0.15023698 0.16124927 0.19379054 0.16219439]] entropy:[1.7869449]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:235 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.14795822 -0.05615824 -0.05144707  0.02191498  0.20379773  0.02528287]] probs:[[0.18321256 0.14938556 0.15009099 0.16151594 0.1937341  0.16206081]] entropy:[1.7869501]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:236 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.13926071 -0.05290654 -0.0574926   0.03248537  0.19167764  0.02336646]] probs:[[0.18216018 0.15031292 0.14962514 0.16371238 0.19196314 0.16222629]] entropy:[1.7874128]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:237 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.1367486  -0.05262996 -0.06138562  0.03372867  0.18893042  0.0177887 ]] probs:[[0.1821078  0.15068933 0.1493757  0.16428109 0.19186282 0.16168322]] entropy:[1.7874364]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:238 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.1334607  -0.09097584 -0.05290607  0.06951938  0.15953283 -0.0039086 ]] probs:[[0.18298066 0.14619537 0.1518683  0.17164685 0.18781409 0.15949477]] entropy:[1.7874781]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:239 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.12819844 -0.06426878 -0.0634406   0.04753458  0.17526707  0.0052    ]] probs:[[0.18163691 0.14983618 0.14996031 0.16756074 0.19039072 0.16061518]] entropy:[1.7876325]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:240 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.12088414 -0.08111315 -0.0659047   0.06792829  0.15498604 -0.01771794]] probs:[[0.18180488 0.14855225 0.15082876 0.17242773 0.1881117  0.15827467]] entropy:[1.7876548]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-1.513313] v_loss:[[7.578992e-05]]
DEBUG:chainerrl.agents.a3c:grad norm:2.2553406857870133
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:241 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.0671284  -0.0530373   0.01235033  0.01269522  0.17641667  0.06112523]] probs:[[0.16977958 0.15055604 0.16072953 0.16078496 0.18938638 0.16876343]] entropy:[1.7892176]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:242 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.03388171 -0.05369381  0.02867427  0.03476002  0.16265516  0.08771737]] probs:[[0.16381148 0.15007584 0.16296066 0.16395542 0.18632449 0.17287208]] entropy:[1.7895862]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:243 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.04260371 -0.0504711   0.03086928  0.02950303  0.17065205  0.09437966]] probs:[[0.16457677 0.14995006 0.16265684 0.16243476 0.18705925 0.17332233]] entropy:[1.7894449]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:244 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.04323906 -0.0517069   0.03327749  0.02641913  0.1699428   0.08532821]] probs:[[0.16499202 0.15004739 0.1633566  0.16224007 0.18727925 0.1720846 ]] entropy:[1.7894887]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:245 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.05254942 -0.04876357  0.02630589  0.02241236  0.17433994  0.07616638]] probs:[[0.16662566 0.1505713  0.16230969 0.16167897 0.1882066  0.17060769]] entropy:[1.7894413]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:246 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.0522675  -0.04860009  0.02502893  0.02259719  0.1735629   0.07884376]] probs:[[0.16656011 0.15057912 0.16208449 0.16169082 0.18803944 0.17104602]] entropy:[1.7894456]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:247 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.05219724 -0.04975249  0.02668978  0.02964338  0.17598033  0.0413065 ]] probs:[[0.16732125 0.15110362 0.16310728 0.16358973 0.18936922 0.16550887]] entropy:[1.7894566]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:248 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.05834074 -0.04742962  0.02683789  0.01824094  0.17850861  0.07466883]] probs:[[0.16740714 0.1506047  0.16221555 0.16082695 0.1887827  0.170163  ]] entropy:[1.7893515]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-1.2755363] v_loss:[[0.00025056]]
DEBUG:chainerrl.agents.a3c:grad norm:1.9391352858189252
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:249 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.01187583 -0.03453833  0.01689691  0.06337924  0.10552725  0.05857185]] probs:[[0.16237687 0.1550125  0.16319422 0.17095892 0.17831852 0.17013903]] entropy:[1.7907612]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:250 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.01205528 -0.03447371  0.0170532   0.06311414  0.10573439  0.0590359 ]] probs:[[0.16238402 0.15500154 0.16319764 0.17089048 0.17833132 0.17019497]] entropy:[1.7907603]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:251 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.01205903 -0.03440687  0.01676217  0.06355088  0.10553063  0.05825921]] probs:[[0.1624058  0.1550321  0.16317141 0.17098743 0.17831823 0.170085  ]] entropy:[1.7907633]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:252 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.01187361 -0.03444957  0.01696321  0.06340382  0.10551444  0.05879186]] probs:[[0.16236617 0.15501639 0.16319464 0.17095225 0.17830487 0.17016563]] entropy:[1.7907617]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:253 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.01204863 -0.03429307  0.01686869  0.06337876  0.10558207  0.0588093 ]] probs:[[0.16238679 0.15503322 0.16317141 0.17093977 0.17830838 0.17016044]] entropy:[1.7907633]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:254 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.01188367 -0.03432092  0.01705486  0.06341305  0.10550925  0.05909112]] probs:[[0.1623535  0.15502268 0.16319522 0.17093876 0.17828824 0.17020157]] entropy:[1.7907625]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:255 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.01187175 -0.03438079  0.01699656  0.063432    0.10549304  0.05898491]] probs:[[0.1623578  0.15501936 0.16319199 0.17094858 0.17829221 0.17019005]] entropy:[1.7907622]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:256 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.01205387 -0.03433211  0.01711969  0.06317194  0.10569108  0.05943979]] probs:[[0.16236699 0.15500745 0.1631916  0.17088267 0.17830515 0.17024611]] entropy:[1.790761]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-1.5145445] v_loss:[[7.792622e-05]]
DEBUG:chainerrl.agents.a3c:grad norm:0.21697868299697476
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:257 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.03933238 0.02561712 0.05546382 0.02979959 0.09619353 0.07361224]] probs:[[0.16429707 0.16205907 0.16696891 0.16273831 0.1739099  0.1700268 ]] entropy:[1.7914426]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:258 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.03760973 0.02530517 0.05556637 0.03011166 0.09550182 0.07323238]] probs:[[0.16408814 0.1620815  0.16706124 0.16286242 0.17386791 0.17003876]] entropy:[1.7914443]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:259 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.03754674 0.0252722  0.05566055 0.03002542 0.09550671 0.07335036]] probs:[[0.16407669 0.16207504 0.16707583 0.16284725 0.17386757 0.17005765]] entropy:[1.7914431]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:260 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.03871226 0.02523907 0.05612263 0.02912237 0.09615778 0.07420424]] probs:[[0.1642065  0.16200896 0.16709043 0.1626393  0.17391564 0.17013918]] entropy:[1.7914345]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:261 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.03756698 0.02532561 0.05571169 0.03002951 0.09553383 0.0735461 ]] probs:[[0.16407028 0.16207409 0.16707446 0.16283827 0.17386198 0.17008087]] entropy:[1.7914426]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:262 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.0387392  0.02530509 0.05616247 0.02913719 0.09618387 0.0743989 ]] probs:[[0.16420077 0.16200963 0.16708675 0.16263165 0.17390941 0.17016177]] entropy:[1.791434]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:263 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.03875592 0.025345   0.05618382 0.02914851 0.09619839 0.07451693]] probs:[[0.1641974  0.16201007 0.16708411 0.16262744 0.17390548 0.17017554]] entropy:[1.7914339]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:264 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.0376214  0.02542677 0.05577357 0.03006055 0.09558076 0.07385655]] probs:[[0.16406254 0.16207401 0.16706783 0.16282678 0.17385247 0.1701164 ]] entropy:[1.7914419]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-0.9433603] v_loss:[[0.00331461]]
DEBUG:chainerrl.agents.a3c:grad norm:15.135964252309527
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:265 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.16856363 -0.02407066  0.14625363 -0.0165396   0.10360609  0.02236352]] probs:[[0.18399838 0.15175883 0.17993882 0.15290606 0.17242621 0.15897179]] entropy:[1.7888224]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:266 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.1685342  -0.0240742   0.14622618 -0.01648592  0.10358112  0.02230238]] probs:[[0.18399602 0.15176082 0.17993687 0.1529168  0.17242478 0.15896472]] entropy:[1.7888236]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:267 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.16840842 -0.02415383  0.1460149  -0.01622684  0.10343394  0.02188171]] probs:[[0.18399604 0.15176784 0.1799215  0.15297568 0.1724211  0.15891786]] entropy:[1.7888284]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:268 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.16853553 -0.02405587  0.14623718 -0.01647877  0.10359153  0.0223118 ]] probs:[[0.18399453 0.15176217 0.17993717 0.15291646 0.17242494 0.15896472]] entropy:[1.7888236]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:269 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.1684095  -0.02413566  0.14602585 -0.01621934  0.10344432  0.021891  ]] probs:[[0.18399452 0.15176918 0.17992179 0.1529754  0.17242128 0.15891786]] entropy:[1.7888286]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:270 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.16840988 -0.02412672  0.14603086 -0.01621596  0.10344879  0.02189432]] probs:[[0.18399382 0.1517699  0.17992194 0.15297528 0.17242134 0.15891773]] entropy:[1.7888288]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:271 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.16853692 -0.02402865  0.14625305 -0.01646771  0.10360624  0.0223243 ]] probs:[[0.1839923  0.15176426 0.1799376  0.15291609 0.17242515 0.15896456]] entropy:[1.788824]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:272 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.16853742 -0.02401929  0.14625904 -0.01646352  0.10361205  0.02233029]] probs:[[0.18399145 0.15176488 0.17993774 0.15291594 0.17242527 0.1589647 ]] entropy:[1.788824]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-1.67528] v_loss:[[0.00068104]]
DEBUG:chainerrl.agents.a3c:grad norm:1.8348428672963464
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:273 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.273416   -0.11425451  0.06646237 -0.0321455   0.17417146  0.05834727]] probs:[[0.20241235 0.13736428 0.16457285 0.14911912 0.18328868 0.16324273]] entropy:[1.7836235]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:274 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.2715063  -0.11365325  0.0685339  -0.03162795  0.17275079  0.05800166]] probs:[[0.20206693 0.13747463 0.16494739 0.14922641 0.1830654  0.16321924]] entropy:[1.7837483]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:275 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.26916066 -0.11315648  0.07100763 -0.03087838  0.1711743   0.0579891 ]] probs:[[0.20162879 0.13756701 0.16538487 0.14936446 0.18280903 0.16324577]] entropy:[1.7838933]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:276 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.26663554 -0.11219505  0.07349405 -0.03000576  0.16927136  0.05753103]] probs:[[0.20117189 0.13773467 0.16583914 0.1495332  0.18250827 0.16321287]] entropy:[1.7840601]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:277 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.26420254 -0.11097998  0.0757327  -0.0289898   0.16734065  0.0568479 ]] probs:[[0.20073546 0.13793814 0.16625422 0.1497243  0.18220383 0.163144  ]] entropy:[1.7842284]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:278 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.26220965 -0.10997362  0.07764672 -0.02806879  0.16581972  0.05635215]] probs:[[0.20036829 0.13809942 0.16659974 0.14988655 0.18195641 0.16308957]] entropy:[1.7843654]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:279 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.26061845 -0.10905753  0.07926985 -0.02733579  0.16458887  0.05591508]] probs:[[0.20007104 0.13824071 0.16688816 0.15001245 0.18175195 0.16303569]] entropy:[1.7844763]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:280 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.2593583  -0.10817402  0.08063851 -0.02674054  0.16351774  0.05535397]] probs:[[0.19983864 0.13837644 0.16713308 0.15011646 0.18157516 0.16296019]] entropy:[1.7845678]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-1.6477183] v_loss:[[0.00056588]]
DEBUG:chainerrl.agents.a3c:grad norm:7.716711774030173
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:281 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.24662948 -0.09182095  0.12264787  0.03886662  0.09858038  0.00851252]] probs:[[0.19766092 0.14090736 0.1746129  0.16057968 0.17046057 0.15577865]] entropy:[1.7862338]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:282 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.24601756 -0.09241132  0.12213555  0.03896415  0.09867945  0.00765204]] probs:[[0.19761804 0.14087982 0.17459238 0.16065876 0.17054479 0.15570614]] entropy:[1.7862333]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:283 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.24596895 -0.09250198  0.12219173  0.03892548  0.09874433  0.00770694]] probs:[[0.19760828 0.14086694 0.17460206 0.16065243 0.17055573 0.15571457]] entropy:[1.7862327]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:284 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.24592793 -0.0925035   0.12208223  0.03897125  0.09870589  0.00755132]] probs:[[0.19761023 0.1408739  0.17459182 0.16066796 0.17055784 0.15569825]] entropy:[1.7862334]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:285 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.24595666 -0.09251183  0.1221871   0.03892785  0.09874886  0.00769504]] probs:[[0.19760692 0.14086631 0.1746022  0.16065367 0.17055741 0.15571356]] entropy:[1.7862327]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:286 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.2459263  -0.09250206  0.12208425  0.03897288  0.09870735  0.00755155]] probs:[[0.19760975 0.14087398 0.17459203 0.16066809 0.17055796 0.15569818]] entropy:[1.7862333]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:287 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.24595657 -0.09250876  0.12218996  0.03892942  0.09874986  0.00769681]] probs:[[0.19760658 0.14086652 0.17460242 0.16065367 0.1705573  0.15571359]] entropy:[1.7862328]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:288 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.24592644 -0.09249873  0.12208723  0.03897446  0.09870824  0.00755358]] probs:[[0.19760942 0.1408742  0.17459226 0.16066806 0.17055781 0.15569822]] entropy:[1.7862334]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-1.6055341] v_loss:[[0.00036342]]
DEBUG:chainerrl.agents.a3c:grad norm:1.0748191896374066
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:289 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.21989997 -0.07171829  0.02954883  0.07491813  0.10263842 -0.03409548]] probs:[[0.19593213 0.14637183 0.16197099 0.16948876 0.17425275 0.15198365]] entropy:[1.7871122]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:290 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.21982475 -0.07170792  0.02952217  0.0747131   0.10269897 -0.03402339]] probs:[[0.1959234  0.14637782 0.16197164 0.1694592  0.17426865 0.15199925]] entropy:[1.7871153]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:291 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.21981615 -0.07171067  0.02953142  0.07469235  0.10270439 -0.03402695]] probs:[[0.19592245 0.14637798 0.16197373 0.1694563  0.17427024 0.1519993 ]] entropy:[1.7871158]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:292 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.21937156 -0.07247657  0.02987207  0.0751463   0.10182588 -0.03623616]] probs:[[0.19594422 0.14634721 0.16211899 0.16962749 0.17421399 0.15174817]] entropy:[1.7870886]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:293 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.21603918 -0.07232812  0.02696417  0.07506107  0.10096046 -0.03824478]] probs:[[0.19559953 0.14659916 0.1619025  0.16987982 0.17433707 0.15168187]] entropy:[1.787154]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:294 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.21393117 -0.07222886  0.02507006  0.07508549  0.10036518 -0.03951849]] probs:[[0.19538233 0.14675994 0.16175732 0.17005342 0.17440711 0.1516399 ]] entropy:[1.7871945]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:295 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.2151293  -0.07140579  0.02536975  0.07469977  0.10154064 -0.03703294]] probs:[[0.19543666 0.14674571 0.16165699 0.1698315  0.17445165 0.15187746]] entropy:[1.7872055]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:296 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.21544789 -0.07140531  0.02561078  0.07478341  0.10161592 -0.03682759]] probs:[[0.1954677  0.14672233 0.16167013 0.16981858 0.17443691 0.15188439]] entropy:[1.7871995]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-1.5759052] v_loss:[[0.00024458]]
DEBUG:chainerrl.agents.a3c:grad norm:0.9132217295461302
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:297 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.14727828 -0.04809861 -0.0764526   0.23766899  0.03428722 -0.0183829 ]] probs:[[0.18324588 0.15072425 0.14651063 0.20058128 0.16366765 0.15527034]] entropy:[1.785238]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:298 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.15285048 -0.04751213 -0.07699403  0.23607355  0.02861563 -0.01925172]] probs:[[0.1843338  0.15086503 0.14648218 0.20033108 0.16279854 0.15518937]] entropy:[1.7851645]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:299 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.15359595 -0.04692366 -0.07636063  0.23571923  0.02848643 -0.01899303]] probs:[[0.18442199 0.15091352 0.14653584 0.20020661 0.16273402 0.15518805]] entropy:[1.7851883]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:300 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.15634404 -0.04462558 -0.07365059  0.23406644  0.02803274 -0.01718093]] probs:[[0.18472086 0.15109009 0.14676772 0.19965048 0.16247669 0.15529412]] entropy:[1.7853073]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:301 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.15684825 -0.04416703 -0.07307268  0.23378542  0.02797438 -0.01677928]] probs:[[0.18476892 0.15112248 0.14681673 0.19954567 0.16242756 0.1553186 ]] entropy:[1.7853311]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:302 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.15536119 -0.04544729 -0.07473315  0.23477536  0.02829639 -0.01805129]] probs:[[0.18461603 0.15102866 0.1466698  0.19987503 0.16258702 0.15522344]] entropy:[1.7852567]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:303 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.15498969 -0.04576224 -0.07517082  0.23504044  0.02838295 -0.01834744]] probs:[[0.18457684 0.15100515 0.14662898 0.19995986 0.16262698 0.15520221]] entropy:[1.7852373]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:304 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.15486185 -0.04585753 -0.0752917   0.23514038  0.02840943 -0.01843383]] probs:[[0.18456152 0.15099753 0.14661781 0.1999888  0.16263859 0.15519574]] entropy:[1.7852311]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-1.605564] v_loss:[[0.00035442]]
DEBUG:chainerrl.agents.a3c:grad norm:1.3071824802860625
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:305 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.15231283 -0.03295115 -0.0550114   0.21719879  0.01952639 -0.01176964]] probs:[[0.18399255 0.15287663 0.14954107 0.19632693 0.16111346 0.15614933]] entropy:[1.7864623]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:306 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.15236047 -0.0327431  -0.05506438  0.21726882  0.01950851 -0.01164805]] probs:[[0.18398981 0.15289888 0.1495238  0.19632839 0.1611005  0.15615855]] entropy:[1.7864624]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:307 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.15240745 -0.03253506 -0.05511691  0.2173385   0.01949045 -0.01152659]] probs:[[0.18398699 0.15292117 0.14950663 0.19632985 0.16108756 0.15616779]] entropy:[1.7864625]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:308 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.15243065 -0.03231753 -0.05514153  0.21737956  0.01942195 -0.0114661 ]] probs:[[0.18398385 0.15294828 0.14949693 0.19633    0.16107003 0.15617095]] entropy:[1.786464]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:309 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.15247513 -0.03207965 -0.05521099  0.2174381   0.01937237 -0.01133243]] probs:[[0.18398124 0.1529757  0.14947776 0.19632997 0.16105261 0.15618266]] entropy:[1.7864646]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:310 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.15412405 -0.01724882 -0.05547323  0.2303569   0.03254026  0.00913791]] probs:[[0.1823758  0.15365297 0.14789051 0.1968225  0.16149685 0.15776134]] entropy:[1.7865194]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:311 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.15537825 -0.01584725 -0.05144345  0.23395814  0.03516033  0.00965409]] probs:[[0.18219353 0.15352201 0.14815333 0.19708781 0.16155595 0.15748738]] entropy:[1.7864985]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:312 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.15527542 -0.0163045  -0.05057006  0.23456836  0.03538829  0.00975627]] probs:[[0.18213587 0.15341903 0.14825109 0.19716597 0.16155826 0.15746981]] entropy:[1.7864926]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-1.6596282] v_loss:[[0.00073919]]
DEBUG:chainerrl.agents.a3c:grad norm:1.7711433633335756
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:313 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.00384302  0.07573728  0.02226648  0.04896449  0.09609337  0.19087245]] probs:[[0.15423581 0.16701156 0.15831587 0.16259952 0.1704461  0.18739116]] entropy:[1.7897398]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:314 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.0020665   0.08675677  0.02372191  0.04780813  0.09830175  0.19876161]] probs:[[0.15389012 0.16818458 0.1579103  0.16175994 0.17013751 0.18811752]] entropy:[1.789582]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:315 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.00042001 0.08723854 0.02691173 0.04804537 0.10130022 0.19349815]] probs:[[0.15419143 0.16817641 0.15833083 0.16171254 0.17055796 0.18703076]] entropy:[1.7897458]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:316 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.00084017 0.08945006 0.02629755 0.04738218 0.10006718 0.1969325 ]] probs:[[0.15415359 0.16843657 0.1581283  0.16149779 0.17023441 0.1875493 ]] entropy:[1.7896698]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:317 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[1.0089681e-04 1.1130087e-01 1.6788263e-02 4.9671136e-02 9.3370810e-02
  2.0593879e-01]] probs:[[0.15357262 0.17163558 0.15615685 0.16137709 0.16858558 0.18867226]] entropy:[1.7893307]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:318 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.00209264  0.11881489  0.01087403  0.05181111  0.0938189   0.21496484]] probs:[[0.15290445 0.17255582 0.15490003 0.16137277 0.16829607 0.18997087]] entropy:[1.7889956]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:319 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.00162362  0.12200327  0.01319787  0.05468421  0.10029568  0.20409676]] probs:[[0.15290163 0.17302251 0.15518473 0.1617582  0.16930707 0.18782584]] entropy:[1.7892667]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:320 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-2.0925596e-04  1.2620947e-01  1.1323321e-02  5.3151794e-02
   9.5881179e-02  2.1054196e-01]] probs:[[0.15298402 0.17359972 0.15475854 0.16136914 0.16841377 0.18887486]] entropy:[1.7890913]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-1.5181403] v_loss:[[0.00017981]]
DEBUG:chainerrl.agents.a3c:grad norm:6.887473093532802
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:321 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.15337193  0.13169578  0.03149839  0.10622642 -0.02832676  0.25957868]] probs:[[0.17350057 0.16978021 0.15359317 0.16551061 0.14467388 0.1929416 ]] entropy:[1.7875719]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:322 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.16670421  0.11470838  0.05336912  0.10732385 -0.01889987  0.2473037 ]] probs:[[0.17546718 0.16657674 0.15666611 0.16535118 0.14574346 0.1901953 ]] entropy:[1.7882599]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:323 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.18216431  0.11917521  0.06089165  0.09146488 -0.02398546  0.27289894]] probs:[[0.17709371 0.1662828  0.15686826 0.1617383  0.14410314 0.19391379]] entropy:[1.7873559]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:324 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.17705423  0.12380753  0.05121487  0.09041341 -0.03233845  0.26446173]] probs:[[0.17701223 0.16783345 0.15608169 0.16232136 0.1435705  0.19318075]] entropy:[1.7873551]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:325 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.170973    0.1254615   0.04610807  0.0955736  -0.03255806  0.26930368]] probs:[[0.1759117  0.16808514 0.1552625  0.16313575 0.14351666 0.19408822]] entropy:[1.7872355]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:326 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.154562    0.1330957   0.07108629  0.11408516 -0.04191698  0.18997192]] probs:[[0.174927   0.17121197 0.1609177  0.1679879  0.14372328 0.18123212]] entropy:[1.7891152]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:327 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.15232587  0.1450427   0.03850832  0.114001   -0.04232391  0.24433032]] probs:[[0.17339869 0.17214037 0.15474458 0.16687894 0.14272842 0.19010906]] entropy:[1.787715]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:328 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.16733554  0.15825088  0.04374425  0.09407478 -0.04504887  0.25801528]] probs:[[0.17520413 0.17361966 0.15483506 0.16282745 0.14167948 0.1918342 ]] entropy:[1.7871408]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-1.7492417] v_loss:[[0.00140244]]
DEBUG:chainerrl.agents.a3c:grad norm:10.807020500468116
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:329 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.21673208  0.16403435  0.03130969  0.05543266 -0.15869226  0.297587  ]] probs:[[0.185128   0.17562477 0.15379569 0.15755081 0.1271825  0.20071828]] entropy:[1.7814114]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:330 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.20366724  0.1676848   0.02753624  0.05032477 -0.15569265  0.28599033]] probs:[[0.18365707 0.1771661  0.15399794 0.15754761 0.12821522 0.19941604]] entropy:[1.7820013]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:331 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.22000542  0.16987342  0.02672684  0.05825163 -0.16942061  0.31375468]] probs:[[0.18512563 0.17607372 0.15259023 0.15747723 0.12541255 0.20332058]] entropy:[1.7802902]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:332 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.19525628  0.1647445   0.03767451  0.0534101  -0.20040736  0.31713837]] probs:[[0.18199141 0.1765224  0.15545833 0.15792392 0.12252265 0.2055813 ]] entropy:[1.7795013]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:333 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.23468842  0.15985364  0.04032187  0.06466284 -0.15595663  0.3198686 ]] probs:[[0.18655221 0.1731012  0.15359889 0.15738352 0.12622502 0.20313919]] entropy:[1.7806176]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:334 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.20913546  0.16118887  0.04407692  0.07494144 -0.20408821  0.32405207]] probs:[[0.18319233 0.17461613 0.15531853 0.16018711 0.12118435 0.20550148]] entropy:[1.7791843]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:335 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.21016003  0.16240263  0.04359654  0.07683767 -0.1940373   0.319967  ]] probs:[[0.18319407 0.17465083 0.15508644 0.16032833 0.12228432 0.20445606]] entropy:[1.7797335]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:336 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.22197951  0.1669396   0.02548878  0.07022941 -0.16329451  0.3179627 ]] probs:[[0.18490222 0.17500022 0.15191732 0.15886854 0.12578236 0.20352937]] entropy:[1.780444]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-1.3997957] v_loss:[[0.00014661]]
DEBUG:chainerrl.agents.a3c:grad norm:7.244260161093262
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:337 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.07871978  0.21020846  0.07781012  0.17086923 -0.12984896  0.11974612]] probs:[[0.16420983 0.18728541 0.16406052 0.18006079 0.13329655 0.17108686]] entropy:[1.7863234]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:338 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.08652881  0.17801945  0.08996683  0.18645337 -0.12478982  0.11934528]] probs:[[0.16536537 0.18120845 0.16593488 0.18274322 0.13386594 0.1708821 ]] entropy:[1.7868686]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:339 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.07859249  0.20718421  0.08225701  0.16905335 -0.12250492  0.10945304]] probs:[[0.16434528 0.18689768 0.16494863 0.17990527 0.13440695 0.16949613]] entropy:[1.7866714]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:340 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.08456922  0.21607591  0.07468224  0.17579372 -0.12141038  0.13183683]] probs:[[0.16424477 0.18732864 0.16262889 0.1799326  0.13367055 0.17219463]] entropy:[1.7863505]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:341 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.08290368  0.21643351  0.0769603   0.17518532 -0.11999345  0.12497777]] probs:[[0.16412446 0.18757051 0.1631519  0.17999098 0.133985   0.17117716]] entropy:[1.786427]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:342 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.08324564  0.16967723  0.10370841  0.1714036  -0.1501906   0.13316792]] probs:[[0.16539563 0.18032701 0.16881496 0.1806386  0.1309616  0.17386214]] entropy:[1.7863412]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:343 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.08077632  0.19294839  0.08533081  0.152876   -0.12784128  0.11284426]] probs:[[0.16549918 0.18514481 0.16625465 0.1778723  0.1343366  0.17089242]] entropy:[1.786984]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:344 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.08156099  0.18123247  0.09357236  0.16929622 -0.1267583   0.10248697]] probs:[[0.16551676 0.18286422 0.16751683 0.18069448 0.13439097 0.16901685]] entropy:[1.7870506]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-1.6183943] v_loss:[[0.00057597]]
DEBUG:chainerrl.agents.a3c:grad norm:10.6469838745199
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:345 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.13741769  0.17408244  0.11347751  0.12948869 -0.0883804   0.06875684]] probs:[[0.17429759 0.18080677 0.17017442 0.17292105 0.13906841 0.16273178]] entropy:[1.7883817]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:346 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.16233255  0.17293951  0.12296878  0.13064417 -0.08072864  0.05824041]] probs:[[0.17773864 0.17963393 0.17087808 0.17219469 0.13938682 0.1601678 ]] entropy:[1.7882702]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:347 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.17610814  0.16958779  0.146545    0.10596245 -0.05703001  0.11221004]] probs:[[0.17772177 0.17656673 0.17254466 0.16568254 0.14076349 0.16672088]] entropy:[1.7888808]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:348 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.17011537  0.17244373  0.12963131  0.10523482 -0.06308515  0.10972742]] probs:[[0.17751808 0.17793189 0.17047495 0.16636628 0.14059338 0.16711539]] entropy:[1.7888423]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:349 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.15675987  0.17557684  0.0969734   0.12963986 -0.08310109  0.04776595]] probs:[[0.17800948 0.18139079 0.16767882 0.17324674 0.14004669 0.15962748]] entropy:[1.7882187]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:350 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.15442163  0.17608337  0.09694925  0.12647922 -0.08202641  0.05250245]] probs:[[0.17758773 0.18147656 0.16766909 0.17269419 0.14019252 0.16037993]] entropy:[1.7883153]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:351 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.14787756  0.18103553  0.09117766  0.11635775 -0.07530313  0.06240766]] probs:[[0.17650336 0.18245398 0.16677406 0.17102678 0.14119744 0.16204435]] entropy:[1.7885747]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:352 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.13575256  0.17111938  0.12005442  0.09388047 -0.05739873  0.07433572]] probs:[[0.17408848 0.18035561 0.17137696 0.16694954 0.1435111  0.16371824]] entropy:[1.789268]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-1.6402606] v_loss:[[0.00052296]]
DEBUG:chainerrl.agents.a3c:grad norm:5.081507818905284
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:353 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.13990945  0.13562849  0.19999461  0.04851    -0.01375396  0.05947494]] probs:[[0.17389537 0.17315254 0.18466419 0.15870614 0.14912583 0.16045593]] entropy:[1.7892741]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:354 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.12749515  0.1239315   0.21565564  0.06135249 -0.04670189  0.0760551 ]] probs:[[0.17198102 0.17136922 0.18783136 0.16097376 0.14448664 0.16335797]] entropy:[1.7886419]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:355 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.1230726   0.10647119  0.21413104  0.07488804 -0.05728341  0.06977016]] probs:[[0.17197222 0.1691408  0.18836685 0.16388229 0.14359215 0.16304569]] entropy:[1.7885629]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:356 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.12398618  0.12012357  0.21322621  0.06554973 -0.05382066  0.0736618 ]] probs:[[0.17179912 0.17113681 0.18783538 0.16204749 0.14381374 0.16336739]] entropy:[1.7885898]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:357 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.12333357  0.12054498  0.21160102  0.06717148 -0.05783098  0.07246542]] probs:[[0.17183359 0.1713551  0.18769044 0.16244906 0.14336044 0.16331133]] entropy:[1.788542]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:358 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.1246629   0.12380382  0.21101657  0.06594948 -0.05628439  0.07521994]] probs:[[0.17186415 0.17171657 0.18736489 0.16206393 0.14341709 0.16357332]] entropy:[1.7885725]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:359 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.12547666  0.10319164  0.20850493  0.0765574  -0.0634032   0.07443695]] probs:[[0.17256497 0.16876188 0.18750435 0.16432638 0.14286412 0.1639783 ]] entropy:[1.7885666]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:360 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.12399695  0.12233422  0.21155229  0.06488162 -0.05509993  0.07305925]] probs:[[0.17185667 0.17157117 0.18758202 0.16199176 0.14367646 0.16332191]] entropy:[1.7885833]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-1.5319513] v_loss:[[0.00116429]]
DEBUG:chainerrl.agents.a3c:grad norm:6.591299743242894
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:361 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.13120814  0.12027069  0.13161343  0.07858937 -0.00824743  0.07325047]] probs:[[0.17385891 0.1719677  0.17392938 0.16494718 0.15122798 0.1640689 ]] entropy:[1.7905998]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:362 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.12717448  0.12035794  0.1455395   0.0827617  -0.02055547  0.08664121]] probs:[[0.17267455 0.1715015  0.17587502 0.1651734  0.14896013 0.16581544]] entropy:[1.7903488]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:363 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.12720852  0.12080263  0.14462136  0.08300266 -0.02156664  0.08627204]] probs:[[0.17272381 0.1716209  0.17575777 0.16525471 0.14884697 0.16579588]] entropy:[1.7903376]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:364 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.12667291  0.12136339  0.14849104  0.08192538 -0.01836886  0.08837081]] probs:[[0.17240162 0.17148867 0.17620443 0.16485712 0.14912501 0.16592313]] entropy:[1.7903562]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:365 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.12690516  0.12178468  0.14580677  0.08258147 -0.02096503  0.08695086]] probs:[[0.1725925  0.171711   0.1758858  0.16510962 0.14886847 0.16583262]] entropy:[1.7903337]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:366 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.1271249   0.12116303  0.14472657  0.08292078 -0.02162816  0.08635398]] probs:[[0.17269956 0.171673   0.17576626 0.1652318  0.14882934 0.16580003]] entropy:[1.7903341]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:367 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.12681894  0.12184572  0.14614956  0.08239941 -0.02068057  0.0870626 ]] probs:[[0.17256264 0.17170659 0.17593084 0.16506526 0.14889792 0.16583678]] entropy:[1.7903357]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:368 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.12671973  0.12216014  0.14656617  0.08225925 -0.02045979  0.0872976 ]] probs:[[0.17251812 0.1717333  0.1759762  0.16501589 0.14890712 0.1658494 ]] entropy:[1.7903343]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-1.741667] v_loss:[[0.00120845]]
DEBUG:chainerrl.agents.a3c:grad norm:45.793225277210475
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:369 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.10868906  0.14392322  0.12253015  0.08146551 -0.02556986  0.11097126]] probs:[[0.16950327 0.17558204 0.17186569 0.16495103 0.1482075  0.16989055]] entropy:[1.790319]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:370 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.10814316  0.14578012  0.12340916  0.08066677 -0.02584243  0.11141568]] probs:[[0.1693619  0.17585765 0.17196722 0.1647718  0.14812437 0.16991705]] entropy:[1.7902911]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:371 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.10866445  0.14468898  0.12231624  0.08137919 -0.02616957  0.11092949]] probs:[[0.1695019  0.17571944 0.17183177 0.16493952 0.14812109 0.16988626]] entropy:[1.7903024]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:372 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.10845108  0.14571181  0.12380251  0.08079959 -0.02584733  0.11146452]] probs:[[0.16939083 0.17582151 0.17201127 0.16477108 0.14810333 0.16990204]] entropy:[1.7902892]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:373 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.11100937  0.14345284  0.12519373  0.08231944 -0.02565213  0.11131861]] probs:[[0.16973452 0.17533161 0.17215925 0.16493404 0.14805356 0.16978702]] entropy:[1.790302]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:374 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.12184479  0.1384473   0.13895123  0.08608942 -0.01888089  0.11530992]] probs:[[0.17061687 0.1734732  0.17356063 0.16462418 0.14821959 0.16950555]] entropy:[1.7903379]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:375 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.13766395  0.12801641  0.16086528  0.09371357 -0.00212416  0.1232549 ]] probs:[[0.17164113 0.16999318 0.17566998 0.1642608  0.14924924 0.16918567]] entropy:[1.7904342]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:376 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.14382975 0.11919686 0.16850637 0.09726672 0.00609862 0.12709032]] probs:[[0.17212035 0.16793232 0.17642054 0.16428964 0.14997412 0.16926312]] entropy:[1.790484]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-1.5456223] v_loss:[[0.00016129]]
DEBUG:chainerrl.agents.a3c:grad norm:1.5161781237566545
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:377 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.15030396 0.12462649 0.15153475 0.07527707 0.04177914 0.13077737]] probs:[[0.17296793 0.16858308 0.17318095 0.16046555 0.15517932 0.16962323]] entropy:[1.790957]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:378 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.15053502 0.12501429 0.1518674  0.0754912  0.04206974 0.13111182]] probs:[[0.17295614 0.16859803 0.17318673 0.1604519  0.15517798 0.1696292 ]] entropy:[1.7909563]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:379 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.15055573 0.12507664 0.15193026 0.07550152 0.04211773 0.13116176]] probs:[[0.17295235 0.16860135 0.17319025 0.16044672 0.15517883 0.16963044]] entropy:[1.790956]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:380 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.15046315 0.12489348 0.15203907 0.07534193 0.04213385 0.13111798]] probs:[[0.17294648 0.16858035 0.17321925 0.16043052 0.15519042 0.16963297]] entropy:[1.7909558]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:381 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.15055318 0.12508689 0.15195428 0.0754941  0.04212518 0.13117237]] probs:[[0.17295069 0.16860189 0.17319317 0.1604444  0.15517887 0.16963103]] entropy:[1.7909559]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:382 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.15055694 0.12509711 0.15194736 0.07550075 0.04212768 0.13117449]] probs:[[0.1729508  0.16860308 0.17319144 0.16044497 0.15517879 0.16963087]] entropy:[1.790956]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:383 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.1504632  0.12489746 0.1520421  0.07534175 0.04213552 0.13112025]] probs:[[0.17294617 0.16858073 0.17321946 0.1604302  0.1551904  0.16963303]] entropy:[1.7909557]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:384 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.1505531  0.12508823 0.15195498 0.07549397 0.04212574 0.13117318]] probs:[[0.17295058 0.16860202 0.1731932  0.16044429 0.15517889 0.16963108]] entropy:[1.790956]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-1.5001597] v_loss:[[5.666515e-05]]
DEBUG:chainerrl.agents.a3c:grad norm:0.2200122316384363
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:385 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.12379647 0.12631099 0.16127257 0.08722223 0.07100904 0.10516133]] probs:[[0.16849454 0.16891874 0.17492886 0.16244331 0.15983082 0.1653837 ]] entropy:[1.7913318]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:386 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.12380405 0.12632947 0.16125809 0.08723561 0.07101355 0.10516455]] probs:[[0.16849492 0.16892098 0.1749254  0.16244462 0.15983069 0.16538335]] entropy:[1.791332]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:387 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.1240212  0.1268306  0.16096734 0.08761666 0.07104735 0.10527661]] probs:[[0.16850519 0.16897926 0.17484723 0.16248114 0.15981112 0.16537604]] entropy:[1.7913349]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:388 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.12382147 0.1263721  0.16122402 0.0872669  0.07102403 0.10517225]] probs:[[0.16849582 0.16892613 0.17491733 0.16244774 0.15983044 0.16538261]] entropy:[1.7913322]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:389 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.12402348 0.12683628 0.16096291 0.08762071 0.07104877 0.10527772]] probs:[[0.16850528 0.16897993 0.17484617 0.16248153 0.15981108 0.16537595]] entropy:[1.7913349]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:390 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.12382169 0.12637255 0.16122368 0.08726726 0.07102413 0.10517235]] probs:[[0.16849582 0.16892616 0.17491724 0.16244777 0.15983042 0.16538261]] entropy:[1.7913321]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:391 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.12402357 0.12683633 0.16096292 0.08762085 0.07104874 0.10527772]] probs:[[0.1685053  0.16897993 0.17484617 0.16248156 0.15981108 0.16537595]] entropy:[1.7913349]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:392 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.12382179 0.12637258 0.16122372 0.08726732 0.07102416 0.10517237]] probs:[[0.16849583 0.16892616 0.17491724 0.16244777 0.15983042 0.16538261]] entropy:[1.7913324]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-1.2214221] v_loss:[[0.00059972]]
DEBUG:chainerrl.agents.a3c:grad norm:1.3062783515468972
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:393 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.120977   0.11774702 0.13781495 0.09834491 0.06930546 0.13340649]] probs:[[0.16796769 0.16742603 0.17081988 0.16420893 0.15950897 0.17006847]] entropy:[1.7914927]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:394 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.12148511 0.11892168 0.13732599 0.09923901 0.06923664 0.13368881]] probs:[[0.1679888  0.16755873 0.17067109 0.16429296 0.159437   0.17005146]] entropy:[1.791494]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:395 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.1216271  0.11926876 0.13704672 0.09949429 0.06932184 0.13375321]] probs:[[0.16799572 0.1676     0.17060623 0.16431835 0.15943451 0.17004526]] entropy:[1.7914956]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:396 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.12141699 0.11882273 0.13695057 0.09913596 0.0695682  0.13360606]] probs:[[0.16798915 0.16755389 0.17061898 0.16428757 0.15950106 0.1700493 ]] entropy:[1.7914978]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:397 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.12162637 0.11926685 0.13704848 0.09949294 0.06932116 0.13375294]] probs:[[0.16799568 0.16759975 0.1706066  0.1643182  0.15943447 0.17004529]] entropy:[1.7914956]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:398 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.1214165  0.11882164 0.13695166 0.09913515 0.06956784 0.13360599]] probs:[[0.16798912 0.16755377 0.17061923 0.1642875  0.15950106 0.17004935]] entropy:[1.791498]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:399 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.12134004 0.11863478 0.1371024  0.09899772 0.06952184 0.13357127]] probs:[[0.1679854  0.16753155 0.17065421 0.16427381 0.15950237 0.17005268]] entropy:[1.7914971]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:400 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.12188473 0.11951774 0.13594374 0.09973349 0.06964462 0.13367562]] probs:[[0.16804327 0.16764599 0.17042248 0.16436182 0.15949002 0.17003638]] entropy:[1.7915022]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-1.4424858] v_loss:[[2.5192257e-05]]
DEBUG:chainerrl.agents.a3c:grad norm:0.03597161035065431
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:401 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.07041119 0.10494658 0.18037166 0.0918631  0.12520498 0.10789747]] probs:[[0.1595513  0.1651577  0.17809656 0.16301094 0.16853765 0.1656458 ]] entropy:[1.7911613]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:402 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.07042217 0.10497402 0.18035032 0.09188354 0.12521261 0.10790246]] probs:[[0.15955178 0.16516094 0.17809136 0.16301298 0.16853762 0.16564532]] entropy:[1.7911618]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:403 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.06993215 0.1042197  0.17715088 0.09104533 0.12760438 0.10744409]] probs:[[0.15956624 0.16513225 0.1776256  0.16297102 0.16903934 0.16566557]] entropy:[1.7911851]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:404 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.06993355 0.10422316 0.17716102 0.09104845 0.12759694 0.1074455 ]] probs:[[0.15956613 0.1651325  0.17762703 0.16297118 0.16903771 0.16566546]] entropy:[1.791185]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:405 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.07042411 0.10497991 0.18034904 0.09188746 0.1252119  0.10790397]] probs:[[0.1595518  0.16516161 0.17809081 0.16301334 0.1685372  0.16564526]] entropy:[1.7911618]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:406 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.07042421 0.1049789  0.18034732 0.09188725 0.12521397 0.10790369]] probs:[[0.15955184 0.16516148 0.17809054 0.16301334 0.16853757 0.16564524]] entropy:[1.7911619]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:407 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.06993268 0.10422078 0.1771509  0.0910463  0.12760481 0.10744443]] probs:[[0.15956625 0.16513236 0.1776255  0.16297108 0.16903931 0.16566554]] entropy:[1.7911854]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:408 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.06993387 0.10422371 0.17716157 0.09104885 0.1275972  0.10744591]] probs:[[0.15956612 0.16513251 0.17762706 0.16297118 0.1690377  0.16566546]] entropy:[1.791185]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-1.4896842] v_loss:[[4.8457827e-05]]
DEBUG:chainerrl.agents.a3c:grad norm:0.14235457358350873
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:409 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.01114485 0.12262332 0.16714746 0.11740445 0.09709396 0.13226043]] probs:[[0.15111731 0.16893852 0.17663033 0.16805914 0.16468021 0.17057447]] entropy:[1.7906396]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:410 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.01267339 0.12547636 0.17975521 0.12144492 0.08707768 0.13462341]] probs:[[0.15098648 0.16901597 0.1784435  0.16833596 0.162649   0.17056906]] entropy:[1.7904719]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:411 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.01266101 0.12544975 0.17965753 0.12141377 0.08715721 0.1346047 ]] probs:[[0.15098752 0.16901474 0.17842953 0.16833398 0.16266508 0.17056917]] entropy:[1.7904735]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:412 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.01115058 0.12263755 0.16720976 0.11742381 0.09704947 0.13227063]] probs:[[0.15111637 0.1689389  0.17663923 0.16806039 0.16467091 0.17057417]] entropy:[1.7906392]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:413 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.01115718 0.12265361 0.16728133 0.11744589 0.09699767 0.13228229]] probs:[[0.15111531 0.16893934 0.17664948 0.16806184 0.16466017 0.17057385]] entropy:[1.7906382]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:414 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.01115712 0.1226535  0.16728097 0.1174458  0.09699788 0.1322822 ]] probs:[[0.15111533 0.16893932 0.17664944 0.16806184 0.16466022 0.17057385]] entropy:[1.7906382]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:415 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.01267351 0.12547643 0.17975487 0.12144484 0.08707871 0.13462356]] probs:[[0.15098648 0.16901596 0.17844342 0.16833593 0.16264915 0.17056906]] entropy:[1.790472]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:416 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.01266135 0.12545018 0.17965811 0.12141404 0.08715763 0.13460511]] probs:[[0.1509875  0.16901472 0.17842954 0.16833393 0.16266507 0.17056915]] entropy:[1.7904735]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-1.3794878] v_loss:[[5.643947e-05]]
DEBUG:chainerrl.agents.a3c:grad norm:1.0660851905637183
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:417 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.02965514 0.10602093 0.15847187 0.1343696  0.08413227 0.12824422]] probs:[[0.15415843 0.16639203 0.17535238 0.17117651 0.1627895  0.17013119]] entropy:[1.7909133]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:418 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.0296529  0.10602574 0.15847361 0.1343688  0.08413382 0.1282438 ]] probs:[[0.15415795 0.16639268 0.17535254 0.17117624 0.16278961 0.17013098]] entropy:[1.7909133]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:419 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.02965251 0.10602483 0.15814082 0.13425635 0.08410585 0.12831461]] probs:[[0.15416874 0.16640425 0.17530653 0.17116903 0.16279651 0.170155  ]] entropy:[1.7909166]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:420 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.02965251 0.10602486 0.15814102 0.13425642 0.08410576 0.12831466]] probs:[[0.15416873 0.16640423 0.17530654 0.17116903 0.16279648 0.17015499]] entropy:[1.7909164]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:421 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.0422869  0.07822479 0.15539221 0.14130823 0.07471909 0.12783785]] probs:[[0.15667321 0.1624061  0.1754348  0.17298129 0.16183776 0.1706668 ]] entropy:[1.7909355]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:422 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.04115862 0.07153786 0.16299604 0.13946185 0.07976381 0.12635313]] probs:[[0.1564455  0.16127114 0.17671622 0.1726059  0.16260321 0.17035803]] entropy:[1.790859]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:423 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.03546357 0.0802777  0.16660403 0.13579918 0.08435124 0.126751  ]] probs:[[0.15534729 0.16246739 0.17711578 0.17174295 0.16313057 0.17019598]] entropy:[1.7908403]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:424 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.03555021 0.08253333 0.16517936 0.1360645  0.08283481 0.12789473]] probs:[[0.15534191 0.16281453 0.17684218 0.17176768 0.16286361 0.1703701 ]] entropy:[1.7908545]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-1.344637] v_loss:[[0.00034793]]
DEBUG:chainerrl.agents.a3c:grad norm:1.2472053716844358
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:425 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.08021203 0.05959456 0.22419837 0.06157974 0.13714513 0.20700362]] probs:[[0.15848444 0.15525036 0.18302867 0.15555885 0.16776925 0.17990844]] entropy:[1.7894821]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:426 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.06259011 0.0620634  0.19348074 0.09558375 0.12446114 0.15248352]] probs:[[0.15795933 0.15787615 0.18004885 0.16325791 0.16804111 0.17281662]] entropy:[1.7906154]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:427 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.07362599 0.05126798 0.20632    0.08076315 0.1281705  0.178231  ]] probs:[[0.15889952 0.15538627 0.18144745 0.16003767 0.16780734 0.17642169]] entropy:[1.7901306]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:428 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.0826893  0.05037537 0.22364669 0.06373336 0.13720272 0.20529646]] probs:[[0.1590516  0.15399417 0.18312813 0.15606503 0.16796273 0.17979833]] entropy:[1.7894471]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:429 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.0722022  0.06375631 0.21581282 0.05539462 0.13745955 0.19325897]] probs:[[0.15809079 0.15676118 0.18250546 0.15545587 0.16875143 0.17843533]] entropy:[1.7896998]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:430 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.07512733 0.05847619 0.22208716 0.05727802 0.13987237 0.19900952]] probs:[[0.15815595 0.15554428 0.18319322 0.15535803 0.16873452 0.17901397]] entropy:[1.7895133]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:431 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.08145048 0.05143721 0.22627503 0.06190894 0.13986152 0.2052922 ]] probs:[[0.15875776 0.15406372 0.18349805 0.15568551 0.16830716 0.17968786]] entropy:[1.7893832]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:432 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.07374856 0.05130327 0.21715954 0.06298742 0.13163373 0.19635281]] probs:[[0.15844506 0.15492833 0.18287794 0.15674916 0.16788733 0.17911217]] entropy:[1.7896067]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-1.3961779] v_loss:[[0.00020366]]
DEBUG:chainerrl.agents.a3c:grad norm:1.5727870934398287
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:433 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.016838   0.1256549  0.21876125 0.01806038 0.05977257 0.208625  ]] probs:[[0.15162502 0.16905557 0.18555175 0.15181048 0.15827675 0.18368044]] entropy:[1.7882729]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:434 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.01964588 0.12375712 0.22071637 0.0227788  0.06180243 0.21341534]] probs:[[0.15168875 0.16833265 0.18547152 0.15216473 0.15822014 0.18412231]] entropy:[1.7882825]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:435 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.02949618 0.12685929 0.2239184  0.02515723 0.06133477 0.21314315]] probs:[[0.15275379 0.16837448 0.18553613 0.15209244 0.1576955  0.18354766]] entropy:[1.7883933]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:436 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.02926271 0.13109624 0.23175995 0.02715969 0.04652303 0.21230711]] probs:[[0.15272227 0.16909397 0.18700181 0.15240143 0.15538119 0.18339925]] entropy:[1.7881156]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:437 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.03163797 0.13040343 0.22854722 0.02738472 0.05191706 0.21035261]] probs:[[0.15306056 0.16894938 0.18637167 0.15241094 0.15619618 0.18301137]] entropy:[1.788311]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:438 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.03573321 0.12193098 0.22956109 0.03218195 0.06468011 0.22502548]] probs:[[0.15294896 0.16671772 0.1856628  0.15240677 0.15744106 0.18482262]] entropy:[1.7882918]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:439 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.03299212 0.13209307 0.22829351 0.02817332 0.04095742 0.22502944]] probs:[[0.15302779 0.16896988 0.18603241 0.15229216 0.15425158 0.18542618]] entropy:[1.7879547]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:440 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.03461461 0.12837355 0.22258058 0.02767324 0.03872353 0.243383  ]] probs:[[0.15303513 0.16807772 0.18468164 0.15197654 0.15366523 0.1885637 ]] entropy:[1.7876725]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-1.4028746] v_loss:[[0.00011731]]
DEBUG:chainerrl.agents.a3c:grad norm:2.2942967637406264
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:441 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.15050294  0.2175774   0.10874686  0.17148745  0.14702688  0.14643927]] probs:[[0.12799095 0.18494156 0.16587083 0.17661107 0.17234346 0.17224222]] entropy:[1.7854302]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:442 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.14731824  0.21621484  0.11235155  0.17380983  0.14774945  0.15307446]] probs:[[0.128087   0.18424067 0.16606505 0.17659126 0.17204869 0.17296728]] entropy:[1.7855132]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:443 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.1424916   0.24357516  0.07594682  0.18656196  0.19426681  0.17099227]] probs:[[0.12700748 0.18685122 0.15801412 0.17649624 0.17786135 0.17376952]] entropy:[1.7844088]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:444 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.1376751   0.24768063  0.07456843  0.1914748   0.19677174  0.17165592]] probs:[[0.12729038 0.18713434 0.15738806 0.17690642 0.17784597 0.17343484]] entropy:[1.7844088]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:445 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.12854902  0.24906342  0.07455394  0.18987864  0.21956016  0.16004248]] probs:[[0.12804271 0.1867884  0.15687777 0.17605415 0.18135804 0.17087896]] entropy:[1.7844882]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:446 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.12454775  0.25057945  0.07274406  0.18842463  0.21563236  0.16879623]] probs:[[0.1284214  0.1868758  0.15643004 0.17561418 0.18045783 0.17220077]] entropy:[1.7846091]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:447 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.1283826   0.24559474  0.07259726  0.18672618  0.2079015   0.17279233]] probs:[[0.12824303 0.18640178 0.15678994 0.17574532 0.17950647 0.1733135 ]] entropy:[1.7846649]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:448 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.12486506  0.22660097  0.0669717   0.17581242  0.20680176  0.17398344]] probs:[[0.12945002 0.18396781 0.15682517 0.17485766 0.18036123 0.17453815]] entropy:[1.7851614]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-1.7312963] v_loss:[[0.00154187]]
DEBUG:chainerrl.agents.a3c:grad norm:107.26217003701424
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:449 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.01261751  0.15070798  0.14408572  0.07595136  0.14026414  0.10680039]] probs:[[0.14855076 0.17490661 0.17375216 0.16230798 0.17308941 0.16739304]] entropy:[1.7902069]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:450 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.01191316  0.1514812   0.14499776  0.07888021  0.1396642   0.10803588]] probs:[[0.14851023 0.17487094 0.17374085 0.16262504 0.17281666 0.16743629]] entropy:[1.7902225]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:451 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.01277423  0.15520306  0.14247829  0.08458055  0.13693786  0.10663062]] probs:[[0.14833626 0.1754684  0.17324975 0.16350384 0.17229252 0.16714914]] entropy:[1.7902309]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:452 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.0127117   0.15652636  0.14234793  0.08769318  0.1348173   0.10818804]] probs:[[0.148253   0.17559116 0.17311911 0.16391125 0.17182031 0.16730525]] entropy:[1.7902414]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:453 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.00734586 0.17447379 0.16986841 0.07884266 0.13916442 0.11673872]] probs:[[0.14949924 0.17669398 0.1758821  0.16057934 0.17056388 0.16678144]] entropy:[1.7901393]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:454 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.00129576  0.16456448  0.15540077  0.09187303  0.14652146  0.10853694]] probs:[[0.14874117 0.17557529 0.17397372 0.1632653  0.17243579 0.16600873]] entropy:[1.7902322]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:455 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.00245141  0.16493551  0.15725166  0.08949842  0.1465413   0.11242293]] probs:[[0.14849837 0.1755565  0.17421271 0.16280021 0.17235678 0.16657545]] entropy:[1.7901886]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:456 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.00144404  0.16783626  0.15652671  0.08527427  0.1451559   0.11278379]] probs:[[0.1486973  0.17612483 0.17414416 0.16216771 0.17217521 0.1666908 ]] entropy:[1.790174]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-1.8837653] v_loss:[[0.00259617]]
DEBUG:chainerrl.agents.a3c:grad norm:24.268176281868765
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:457 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.06214562  0.07318404  0.14706025  0.18847153  0.21464436  0.14146756]] probs:[[0.13875122 0.15885822 0.17103845 0.17827006 0.1829975  0.17008455]] entropy:[1.7878314]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:458 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.05685212  0.08623908  0.16912676  0.1841748   0.22515751  0.17525132]] probs:[[0.13760872 0.15877776 0.1724993  0.17511469 0.18244046 0.17355902]] entropy:[1.787699]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:459 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.05583877  0.09034719  0.17233586  0.17727137  0.22563459  0.18179594]] probs:[[0.13755985 0.15921333 0.17281708 0.17367212 0.18227789 0.1744597 ]] entropy:[1.7877402]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:460 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.05867301  0.08515145  0.1655142   0.18090658  0.22923674  0.18033676]] probs:[[0.13735643 0.15860291 0.17187482 0.17454085 0.1831836  0.17444141]] entropy:[1.787583]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:461 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.0600902   0.08270954  0.16235265  0.18242516  0.23052548  0.17932333]] probs:[[0.1372716  0.15834263 0.1714693  0.17494589 0.18356651 0.17440408]] entropy:[1.7875113]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:462 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.05784029  0.08587991  0.16771403  0.18300189  0.22746566  0.17831306]] probs:[[0.13742992 0.15867123 0.17220204 0.17485486 0.182805   0.17403692]] entropy:[1.7876289]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:463 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.07028122  0.05859088  0.12772684  0.20725505  0.21896304  0.12918079]] probs:[[0.13826372 0.15728116 0.16853964 0.18249069 0.18463986 0.16878487]] entropy:[1.787231]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:464 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.06874175  0.08216666  0.14323641  0.17903668  0.24267496  0.17802761]] probs:[[0.13651077 0.15874703 0.16874382 0.17489433 0.1863861  0.17471795]] entropy:[1.7871315]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-1.2846533] v_loss:[[0.00025819]]
DEBUG:chainerrl.agents.a3c:grad norm:3.225247147516697
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:465 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.0207597   0.08424642  0.10985445  0.19257066  0.20940343  0.16493766]] probs:[[0.14386623 0.15979472 0.16393958 0.17807668 0.18109956 0.17322324]] entropy:[1.7888405]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:466 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.02065451  0.08623072  0.11886611  0.19674385  0.20370518  0.16023992]] probs:[[0.14377777 0.15999684 0.16530454 0.17869264 0.17994091 0.17228729]] entropy:[1.7889371]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:467 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.01854024  0.07937437  0.11403143  0.19214706  0.20422335  0.15862522]] probs:[[0.1444563  0.1593163  0.16493455 0.17833509 0.18050176 0.17245606]] entropy:[1.788978]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:468 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.00493921  0.04956869  0.09856158  0.18907978  0.18473145  0.14060491]] probs:[[0.14826442 0.15657032 0.16443215 0.18001069 0.17922966 0.17149283]] entropy:[1.7893251]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:469 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.02579249  0.09399275  0.11422814  0.18793862  0.2004939   0.14631535]] probs:[[0.14372809 0.16201818 0.16533008 0.17797701 0.18022566 0.17072108]] entropy:[1.7890708]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:470 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.02806524  0.09508083  0.11169756  0.19630867  0.19812004  0.14896551]] probs:[[0.14326513 0.16203998 0.16475505 0.17930189 0.17962697 0.17101097]] entropy:[1.788943]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:471 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.02495501  0.09108651  0.11169717  0.20192385  0.20120434  0.15839618]] probs:[[0.14328368 0.16091366 0.16426462 0.17977487 0.17964557 0.17211755]] entropy:[1.7888353]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:472 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.02727799  0.09714571  0.10582666  0.19215104  0.19954859  0.14942868]] probs:[[0.14351065 0.16252519 0.1639422  0.17872322 0.18005024 0.17124854]] entropy:[1.7889845]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-1.5565461] v_loss:[[0.00022075]]
DEBUG:chainerrl.agents.a3c:grad norm:5.733192323130563
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:473 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.03098785 0.10927179 0.17294091 0.1161301  0.1736079  0.1249978 ]] probs:[[0.15209852 0.16448386 0.17529696 0.16561581 0.17541394 0.16709097]] entropy:[1.79064]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:474 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.03242895 0.1083978  0.18414125 0.11881095 0.1706336  0.12641856]] probs:[[0.1519818  0.16397755 0.17688026 0.165694   0.1745071  0.16695935]] entropy:[1.7905794]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:475 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.03270933 0.10859133 0.17521963 0.11412349 0.17520441 0.12570357]] probs:[[0.15226673 0.16427071 0.17558865 0.165182   0.17558599 0.16710594]] entropy:[1.7906251]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:476 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.00608257  0.09949473  0.12541723  0.14200558  0.19136874  0.11509008]] probs:[[0.14795923 0.16443479 0.16875307 0.17157577 0.18025781 0.16701931]] entropy:[1.7900281]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:477 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.01110056 0.11404399 0.1461402  0.11912182 0.17883794 0.10990067]] probs:[[0.15029562 0.16659197 0.17202568 0.16744006 0.1777435  0.16590317]] entropy:[1.7904787]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:478 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.0141198  0.11735719 0.15177676 0.11212289 0.17455252 0.10719785]] probs:[[0.15080981 0.16721107 0.1730666  0.16633812 0.17705354 0.1655209 ]] entropy:[1.7905364]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:479 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.02033192 0.12352529 0.16171972 0.10232221 0.16843036 0.10341627]] probs:[[0.15169252 0.16818237 0.17473026 0.16465393 0.17590675 0.16483417]] entropy:[1.7905931]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:480 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.02529062 0.1225859  0.16154344 0.10546952 0.16938905 0.1060869 ]] probs:[[0.1521888  0.16774033 0.17440404 0.16489366 0.17577772 0.16499548]] entropy:[1.7906688]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-1.5224829] v_loss:[[0.00024817]]
DEBUG:chainerrl.agents.a3c:grad norm:47.68296978691835
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:481 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.0665271  0.09639941 0.14500752 0.02689173 0.13243039 0.12026446]] probs:[[0.16138309 0.16627671 0.17455877 0.15511173 0.17237706 0.17029265]] entropy:[1.7909445]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:482 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.06697063 0.09665021 0.14473191 0.02746244 0.13261916 0.12080531]] probs:[[0.16140975 0.16627212 0.17446208 0.15515707 0.17236161 0.17033735]] entropy:[1.7909524]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:483 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.06575605 0.09570447 0.1457414  0.02645584 0.13178399 0.11910444]] probs:[[0.16133745 0.16624233 0.17477222 0.15511984 0.1723498  0.17017826]] entropy:[1.7909367]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:484 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.06643047 0.09620395 0.14503771 0.0267508  0.13225502 0.12013433]] probs:[[0.1613864  0.16626367 0.17458448 0.15510802 0.17236702 0.17029043]] entropy:[1.7909433]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:485 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.0638056  0.09479838 0.14783151 0.02490997 0.13082065 0.11578703]] probs:[[0.16119532 0.16626945 0.17532524 0.1550459  0.17236803 0.1697961 ]] entropy:[1.7909065]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:486 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.06269336 0.09762697 0.14413609 0.0343035  0.10669927 0.11093741]] probs:[[0.16163374 0.16737999 0.17534855 0.15710951 0.16890542 0.16962278]] entropy:[1.7911384]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:487 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.06384903 0.10293058 0.14534356 0.04220213 0.11168152 0.11754254]] probs:[[0.16109534 0.16751583 0.1747735  0.1576456  0.16898818 0.16998154]] entropy:[1.7911716]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:488 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.06160716 0.10098758 0.12748548 0.05615495 0.12223019 0.12975612]] probs:[[0.16036311 0.16680425 0.1712833  0.15949115 0.17038552 0.17167267]] entropy:[1.7913029]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-1.9247608] v_loss:[[0.00320297]]
DEBUG:chainerrl.agents.a3c:grad norm:21.836923938607985
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:489 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.0965163   0.13476717  0.07998666 -0.00089193  0.14573972  0.11294018]] probs:[[0.16675566 0.17325778 0.16402191 0.15127833 0.17516932 0.16951706]] entropy:[1.7906389]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:490 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.1038815   0.14106336  0.09747508 -0.01688028  0.13823204  0.1058485 ]] probs:[[0.16793579 0.17429748 0.16686335 0.14883225 0.17380469 0.16826643]] entropy:[1.7904339]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:491 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.10627881  0.14223018  0.10092747 -0.02203358  0.13672115  0.10510811]] probs:[[0.16833353 0.17449544 0.16743512 0.14806256 0.17353678 0.16813657]] entropy:[1.7903429]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:492 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.10684064  0.14243136  0.10135558 -0.02302255  0.13649131  0.10499   ]] probs:[[0.16842893 0.17453139 0.1675076  0.1479169  0.17349774 0.16811751]] entropy:[1.7903244]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:493 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.09959126  0.1378505   0.08721747 -0.00866943  0.14245506  0.1095257 ]] probs:[[0.16728348 0.17380764 0.1652263  0.15011913 0.17460978 0.16895363]] entropy:[1.790549]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:494 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.09907648  0.13664058  0.08314869 -0.0072506   0.14403118  0.11136989]] probs:[[0.16722529 0.17362641 0.16458285 0.15035735 0.17491437 0.16929373]] entropy:[1.7905554]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:495 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.10491139  0.14147022  0.09836581 -0.01935127  0.13758366  0.10561651]] probs:[[0.16813007 0.17439045 0.16703315 0.14848371 0.17371398 0.16824865]] entropy:[1.7903919]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:496 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.09894019  0.13976637  0.09739888 -0.00880781  0.13914715  0.1057533 ]] probs:[[0.16706073 0.17402232 0.16680343 0.14999612 0.17391458 0.16820282]] entropy:[1.7905736]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-1.2765344] v_loss:[[0.00024255]]
DEBUG:chainerrl.agents.a3c:grad norm:0.7551251355451827
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:497 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.06660281 0.0935646  0.14524381 0.02608587 0.09201135 0.10725494]] probs:[[0.16295505 0.16740838 0.17628737 0.15648457 0.16714855 0.16971602]] entropy:[1.7910976]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:498 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.06552265 0.09290197 0.1483076  0.03006044 0.09534873 0.10901909]] probs:[[0.16249709 0.16700761 0.17652191 0.15683556 0.16741674 0.16972111]] entropy:[1.7910954]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:499 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.05758486 0.09480584 0.13565145 0.02984993 0.0938316  0.09974346]] probs:[[0.16202901 0.16817352 0.17518489 0.15759689 0.16800976 0.16900596]] entropy:[1.7912003]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:500 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.05620706 0.09316285 0.13484015 0.03382601 0.09493881 0.10072821]] probs:[[0.16175105 0.16784051 0.17498347 0.1581711  0.16813886 0.1691151 ]] entropy:[1.7912334]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:501 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.05166435 0.08953573 0.12019732 0.04471352 0.10023725 0.10478219]] probs:[[0.16111018 0.16732864 0.17253867 0.1599942  0.16912894 0.16989936]] entropy:[1.7913775]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:502 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.07289211 0.09547005 0.15355119 0.0338512  0.08515128 0.10956857]] probs:[[0.16344571 0.16717796 0.17717537 0.15718761 0.16546176 0.16955161]] entropy:[1.7910988]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:503 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.05863388 0.09854917 0.13423656 0.03635976 0.08923048 0.10431787]] probs:[[0.16194221 0.16853693 0.17466019 0.15837498 0.16697368 0.16951197]] entropy:[1.7912586]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:504 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.07718183 0.10682037 0.19472681 0.03542982 0.06763683 0.11537595]] probs:[[0.16277748 0.1676742  0.18308109 0.15612113 0.16123118 0.16911489]] entropy:[1.7904794]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-1.5835117] v_loss:[[0.00023643]]
DEBUG:chainerrl.agents.a3c:grad norm:1.4845125749963515
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:505 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.06177086 0.16108586 0.20602214 0.05508983 0.05495547 0.0739196 ]] probs:[[0.15978625 0.17647019 0.18458098 0.15872227 0.15870094 0.16173929]] entropy:[1.789943]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:506 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.06258331 0.16320711 0.2153817  0.05762976 0.05756195 0.07662398]] probs:[[0.15935898 0.1762288  0.18566756 0.15857153 0.15856078 0.16161226]] entropy:[1.7898059]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:507 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.06386249 0.16405687 0.21908122 0.05798233 0.05714478 0.07739566]] probs:[[0.15937865 0.17617488 0.18614046 0.15844423 0.15831158 0.16155021]] entropy:[1.7897372]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:508 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.06101767 0.17441913 0.21210799 0.08980415 0.07768665 0.09674801]] probs:[[0.15709504 0.17595926 0.18271752 0.16168298 0.1597356  0.1628096 ]] entropy:[1.7902093]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:509 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.05595256 0.17128165 0.20473506 0.08664969 0.07915087 0.09215482]] probs:[[0.15688314 0.17606096 0.18205042 0.16177368 0.1605651  0.16266672]] entropy:[1.7902836]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:510 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.05404821 0.16741107 0.19779007 0.08644979 0.07822807 0.08771335]] probs:[[0.15707788 0.17593324 0.18135992 0.1622508  0.16092229 0.16245595]] entropy:[1.790383]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:511 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.05349517 0.16441771 0.19209443 0.08922194 0.08190635 0.09464756]] probs:[[0.15690768 0.17531426 0.18023415 0.16261482 0.16142954 0.16349952]] entropy:[1.7905465]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:512 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.05602839 0.16213456 0.18785334 0.09508374 0.08458851 0.10113988]] probs:[[0.1570403  0.17461938 0.17916861 0.1632949  0.16159004 0.16428684]] entropy:[1.7907021]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-1.4771132] v_loss:[[0.0002402]]
DEBUG:chainerrl.agents.a3c:grad norm:2.1715412488350996
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:513 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.08953168 0.17078075 0.16655916 0.0592862  0.12845881 0.0947488 ]] probs:[[0.16181634 0.17551264 0.17477328 0.1569954  0.1682396  0.16266277]] entropy:[1.7909214]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:514 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.08937266 0.18003768 0.16867168 0.06906316 0.13789728 0.09501123]] probs:[[0.16096057 0.1762361  0.17424433 0.15772451 0.16896373 0.16187073]] entropy:[1.7908915]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:515 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.0889351  0.16702014 0.1656693  0.05491635 0.12494104 0.09448574]] probs:[[0.1620811  0.17524444 0.17500788 0.15666004 0.1680233  0.16298324]] entropy:[1.7909204]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:516 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.08817299 0.16346818 0.16505668 0.05211386 0.12207246 0.09402049]] probs:[[0.1622574  0.17494632 0.17522444 0.15651077 0.16785213 0.16320899]] entropy:[1.7909261]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:517 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.08535631 0.17106467 0.16886035 0.0647832  0.13399492 0.09430691]] probs:[[0.16089983 0.17529853 0.17491253 0.15762344 0.16891922 0.16234644]] entropy:[1.7909164]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:518 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.08446287 0.1612498  0.16675602 0.05573924 0.12543638 0.09358352]] probs:[[0.16159636 0.17449369 0.17545713 0.15702076 0.16835505 0.16307697]] entropy:[1.7909406]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:519 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.08435269 0.16354643 0.16746995 0.05844451 0.12798353 0.09368178]] probs:[[0.16135597 0.17465396 0.17534058 0.15722923 0.16855192 0.16286832]] entropy:[1.7909371]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:520 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.08315291 0.16879997 0.17150049 0.08380459 0.13321313 0.09620187]] probs:[[0.16007914 0.1743937  0.17486529 0.16018349 0.1682967  0.16218169]] entropy:[1.791056]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[0.7816461] v_loss:[[0.04774295]]
DEBUG:chainerrl.agents.a3c:grad norm:785.7925522060194
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:521 r:0.15 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.04448035  0.1823267   0.22755778  0.22899301  0.11239722  0.07628149]] probs:[[0.13927662 0.17473474 0.18281965 0.18308222 0.16293308 0.15715362]] entropy:[1.7873213]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:522 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.04482257  0.18239409  0.22298533  0.22434789  0.1157721   0.07997248]] probs:[[0.13931066 0.17484906 0.18209243 0.1823407  0.16357982 0.15782732]] entropy:[1.7875074]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:523 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.04794302  0.18402866  0.2167797   0.22366127  0.11287889  0.07626545]] probs:[[0.13921857 0.17556633 0.1814115  0.1826642  0.16350885 0.1576305 ]] entropy:[1.7874725]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:524 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.04826277  0.18422946  0.21377687  0.22207396  0.11337808  0.07605242]] probs:[[0.13928479 0.17574127 0.18101145 0.18251957 0.16372062 0.1577223 ]] entropy:[1.7875311]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:525 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.04832588  0.18388204  0.21082862  0.22000757  0.11439864  0.07628658]] probs:[[0.13938408 0.17581655 0.18061861 0.18228413 0.16401498 0.15788165]] entropy:[1.7876117]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:526 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.0482089   0.18364544  0.21085344  0.22001228  0.11434507  0.07634585]] probs:[[0.13940307 0.17577836 0.1806266  0.18228853 0.16400936 0.15789406]] entropy:[1.7876167]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:527 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.04822968  0.18370387  0.21089152  0.22002871  0.11434743  0.07633635]] probs:[[0.13939793 0.17578581 0.18063056 0.18228859 0.16400711 0.15789002]] entropy:[1.7876148]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:528 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.0482275   0.18370675  0.21089168  0.22002701  0.11434938  0.07633679]] probs:[[0.13939811 0.17578615 0.18063043 0.18228811 0.16400729 0.15788995]] entropy:[1.787615]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-1.4031435] v_loss:[[7.849329e-05]]
DEBUG:chainerrl.agents.a3c:grad norm:0.29336936239355393
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:529 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.01776729 0.11790359 0.23259152 0.13310415 0.1278768  0.11097136]] probs:[[0.1496714  0.16543503 0.18553925 0.16796894 0.1670932  0.16429216]] entropy:[1.7898043]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:530 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.0183132  0.11749451 0.23551224 0.13357584 0.12753193 0.11169304]] probs:[[0.14964883 0.16525218 0.18595235 0.16793114 0.16691925 0.16429625]] entropy:[1.7897563]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:531 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.01827774 0.11748184 0.2353121  0.13352409 0.12753963 0.11171541]] probs:[[0.14965075 0.1652581  0.18592413 0.16793057 0.1669286  0.16430788]] entropy:[1.7897601]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:532 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.01831122 0.1174935  0.23548833 0.13356878 0.12754126 0.11169765]] probs:[[0.14964908 0.16525264 0.1859486  0.16793059 0.16692142 0.16429763]] entropy:[1.7897569]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:533 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.01827789 0.1174819  0.23531236 0.13352412 0.12753956 0.11171541]] probs:[[0.14965077 0.16525808 0.18592416 0.16793056 0.16692857 0.16430786]] entropy:[1.78976]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:534 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.01831121 0.11749351 0.23548836 0.13356876 0.12754126 0.11169774]] probs:[[0.14964908 0.16525263 0.1859486  0.16793057 0.16692142 0.16429764]] entropy:[1.789757]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:535 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.01831107 0.11749339 0.23548622 0.13356827 0.12754191 0.11169786]] probs:[[0.14964913 0.16525269 0.18594828 0.16793057 0.1669216  0.16429773]] entropy:[1.789757]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:536 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.01827794 0.11748189 0.2353124  0.1335242  0.12753959 0.1117155 ]] probs:[[0.14965077 0.16525806 0.18592414 0.16793056 0.16692856 0.16430786]] entropy:[1.7897599]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-1.5638562] v_loss:[[0.00021101]]
DEBUG:chainerrl.agents.a3c:grad norm:1.1873896042715248
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:537 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.03766846 0.10977208 0.21460307 0.11057891 0.15770465 0.11269359]] probs:[[0.15268567 0.16410147 0.18223843 0.16423392 0.17215884 0.16458158]] entropy:[1.7903098]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:538 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.03768777 0.10963745 0.21458237 0.11077423 0.15775016 0.11287434]] probs:[[0.15268148 0.16407172 0.18222617 0.16425835 0.17215864 0.16460367]] entropy:[1.7903106]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:539 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.03768852 0.10963548 0.21458167 0.11077876 0.15775436 0.11287716]] probs:[[0.15268137 0.16407114 0.18222575 0.16425884 0.17215909 0.16460387]] entropy:[1.7903107]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:540 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.03766532 0.10978043 0.21460605 0.11055966 0.15768786 0.11268181]] probs:[[0.15268621 0.16410393 0.18224019 0.16423185 0.1721571  0.16458075]] entropy:[1.7903098]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:541 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.03768767 0.10963766 0.21458243 0.11077367 0.15774986 0.11287411]] probs:[[0.1526815  0.16407177 0.1822262  0.16425827 0.1721586  0.16460364]] entropy:[1.7903109]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:542 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.03768852 0.10963553 0.21458162 0.11077872 0.15775439 0.11287718]] probs:[[0.15268137 0.16407114 0.18222573 0.16425882 0.17215909 0.16460386]] entropy:[1.7903107]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:543 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.03766531 0.10978044 0.21460606 0.11055966 0.15768789 0.11268186]] probs:[[0.1526862  0.16410393 0.18224019 0.16423185 0.1721571  0.16458075]] entropy:[1.7903098]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:544 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.03768766 0.10963767 0.21458243 0.1107737  0.15774986 0.11287418]] probs:[[0.1526815  0.16407177 0.1822262  0.16425827 0.1721586  0.16460367]] entropy:[1.7903109]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-1.5892242] v_loss:[[0.00031357]]
DEBUG:chainerrl.agents.a3c:grad norm:1.0026084420423351
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:545 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.06894904 0.11477974 0.16590856 0.12970297 0.12907587 0.13520677]] probs:[[0.15768334 0.16507825 0.173738   0.16756023 0.16745518 0.16848499]] entropy:[1.7913432]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:546 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.06896741 0.1146924  0.1658923  0.1298472  0.12915082 0.13532375]] probs:[[0.15767962 0.1650569  0.17372787 0.16757736 0.1674607  0.1684976 ]] entropy:[1.791343]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:547 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.0689494  0.11477855 0.16590816 0.12970555 0.12907834 0.13520843]] probs:[[0.15768327 0.1650779  0.17373778 0.1675605  0.16745543 0.16848512]] entropy:[1.7913431]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:548 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.06894879 0.11478042 0.16590877 0.12970138 0.12907453 0.13520598]] probs:[[0.1576834  0.16507845 0.17373814 0.16756004 0.16745505 0.16848494]] entropy:[1.7913433]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:549 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.06896739 0.11469242 0.16589226 0.12984718 0.12915085 0.13532376]] probs:[[0.15767962 0.1650569  0.17372787 0.16757736 0.1674607  0.16849762]] entropy:[1.7913429]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:550 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.06894948 0.11477856 0.16590819 0.1297055  0.1290783  0.13520847]] probs:[[0.15768327 0.1650779  0.17373778 0.16756049 0.16745542 0.16848512]] entropy:[1.7913431]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:551 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.06896743 0.11469236 0.16589223 0.12984726 0.12915091 0.13532378]] probs:[[0.15767962 0.16505688 0.17372786 0.16757736 0.16746071 0.16849762]] entropy:[1.791343]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:552 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.06894949 0.11477856 0.16590817 0.12970552 0.12907834 0.13520843]] probs:[[0.15768327 0.1650779  0.17373778 0.16756049 0.16745543 0.16848512]] entropy:[1.7913432]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-1.5731843] v_loss:[[0.00023808]]
DEBUG:chainerrl.agents.a3c:grad norm:0.4084194077092616
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:553 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.10914945 0.13548993 0.21232566 0.07772513 0.09432885 0.12446156]] probs:[[0.16379586 0.16816765 0.1815983  0.15872872 0.1613862  0.16632321]] entropy:[1.7908012]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:554 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.10920987 0.13551968 0.21243349 0.07766704 0.0944654  0.12436593]] probs:[[0.16380061 0.16816737 0.18161218 0.15871452 0.1614032  0.1663021 ]] entropy:[1.7907999]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:555 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.10921077 0.13552068 0.21243767 0.07766044 0.0944552  0.12435975]] probs:[[0.16380121 0.16816801 0.18161345 0.1587139  0.16140199 0.16630153]] entropy:[1.7907997]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:556 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.10912824 0.13552237 0.21231888 0.07770884 0.0942501  0.12438498]] probs:[[0.16379686 0.1681777  0.18160203 0.15873046 0.1613779  0.16631503]] entropy:[1.7908005]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:557 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.10921047 0.13552031 0.21243614 0.07766266 0.09445845 0.12436173]] probs:[[0.16380101 0.1681678  0.181613   0.15871412 0.16140236 0.16630171]] entropy:[1.7907996]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:558 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.1091282  0.13552237 0.21231869 0.07770906 0.09425047 0.12438516]] probs:[[0.16379686 0.1681777  0.181602   0.1587305  0.16137797 0.16631505]] entropy:[1.7908007]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:559 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.10912774 0.13552204 0.21231695 0.07771164 0.09425434 0.12438752]] probs:[[0.16379662 0.16817747 0.1816015  0.15873075 0.16137843 0.16631527]] entropy:[1.7908006]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:560 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.10921032 0.13552025 0.21243593 0.07766302 0.09445897 0.12436202]] probs:[[0.16380097 0.16816776 0.18161294 0.15871416 0.16140242 0.16630174]] entropy:[1.7907996]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-1.5458714] v_loss:[[0.00015261]]
DEBUG:chainerrl.agents.a3c:grad norm:0.5493415133052889
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:561 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.04414487 0.16368768 0.21263734 0.04566839 0.04548488 0.10666418]] probs:[[0.15679187 0.17670155 0.1855662  0.15703094 0.15700212 0.16690731]] entropy:[1.7895614]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:562 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.04415557 0.16395175 0.21243978 0.04556516 0.04513959 0.10651372]] probs:[[0.15680669 0.17676301 0.18554509 0.15702787 0.15696107 0.16689616]] entropy:[1.7895583]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:563 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.04416192 0.16390482 0.21240778 0.04557678 0.0450039  0.10658112]] probs:[[0.15681106 0.17675853 0.18554315 0.15703309 0.15694314 0.166911  ]] entropy:[1.7895584]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:564 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.04416186 0.16390482 0.21240775 0.04557692 0.04500399 0.1065811 ]] probs:[[0.15681106 0.17675853 0.18554315 0.1570331  0.15694317 0.16691102]] entropy:[1.7895584]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:565 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.04415547 0.163952   0.21243961 0.04556517 0.04513974 0.10651367]] probs:[[0.15680668 0.17676307 0.18554507 0.15702789 0.1569611  0.16689618]] entropy:[1.7895584]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:t:566 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.04416195 0.16390486 0.21240777 0.04557689 0.04500386 0.10658113]] probs:[[0.15681107 0.17675853 0.18554315 0.1570331  0.15694314 0.16691102]] entropy:[1.7895585]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-3.6233706] v_loss:[[0.08777954]]
DEBUG:chainerrl.agents.a3c:grad norm:170.01311230130904
DEBUG:chainerrl.agents.a3c:update


INFO: outdir:result global_step:1158 local_step:566 R:0.25
INFO: statistics:[('average_value', 0.18676570081024457), ('average_entropy', 0.7730909341807232)]
DEBUG: Closing video encoder: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000000.mp4
INFO: Starting new video recorder writing to /home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4
DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4
DEBUG: Starting ffmpeg with "ffmpeg -nostats -loglevel error -y -r 30 -f rawvideo -s:v 160x210 -pix_fmt rgb24 -i - -vf scale=trunc(iw/2)*2:trunc(ih/2)*2 -vcodec libx264 -pix_fmt yuv420p /home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4"


DEBUG:chainerrl.agents.a3c:t:567 r:0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.09309492  0.05113813  0.09102289  0.19298878  0.2720712   0.44822404]] probs:[[0.12742849 0.14719942 0.1531891  0.16963328 0.183593   0.21895675]] entropy:[1.7766675]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:568 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.1489963   0.00358015  0.10685686  0.18885222  0.34640613  0.51361394]] probs:[[0.11849281 0.13802417 0.15304093 0.1661184  0.19446549 0.22985817]] entropy:[1.7679198]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:569 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.15817365 -0.00283927  0.11369969  0.18605006  0.35759345  0.52529293]] probs:[[0.11699985 0.1366615  0.15355304 0.16507442 0.19596574 0.23174538]] entropy:[1.7663219]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:570 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.15802298 -0.00270892  0.11638615  0.1833121   0.36756513  0.5093001 ]] probs:[[0.11721861 0.13691424 0.15423073 0.16490602 0.1982698  0.2284606 ]] entropy:[1.7671741]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:571 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.15914959 -0.00304222  0.11805345  0.18295413  0.36832222  0.5110942 ]] probs:[[0.11701863 0.13678911 0.15439837 0.16475125 0.19830473 0.22873792]] entropy:[1.7669964]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:572 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.16000229 -0.00332189  0.1191818   0.18261355  0.36858186  0.5124733 ]] probs:[[0.11687828 0.13670337 0.154519   0.16463795 0.19828732 0.22897401]] entropy:[1.7668655]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:573 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.1639637  -0.00423038  0.12091812  0.18410826  0.36000618  0.53367746]] probs:[[0.1160515  0.13615139 0.15430264 0.1643677  0.1959783  0.23314841]] entropy:[1.7654628]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:574 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.16359048 -0.0044631   0.12152583  0.18304536  0.3591818   0.5345144 ]] probs:[[0.11609895 0.13612454 0.15440194 0.16419894 0.19582377 0.23335193]] entropy:[1.7654363]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-1.408028] v_loss:[[0.00016105]]
DEBUG:chainerrl.agents.a3c:grad norm:16.61926025454351
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:575 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.08183816  0.04049822  0.12458498  0.13966875  0.35840937  0.45844367]] probs:[[0.12695277 0.14347367 0.15605964 0.15843146 0.19716927 0.21791323]] entropy:[1.7745423]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:576 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.08145884  0.04019291  0.12446586  0.1393431   0.35821593  0.45862132]] probs:[[0.12700921 0.14343922 0.15605122 0.1583902  0.197144   0.21796615]] entropy:[1.77454]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:577 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.07740735  0.04144498  0.12535584  0.14210567  0.36194023  0.4617189 ]] probs:[[0.12718345 0.14323446 0.15577205 0.15840317 0.19734986 0.21805707]] entropy:[1.7744794]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:578 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.07709862  0.04125613  0.1253602   0.1417912   0.36177844  0.46192244]] probs:[[0.12722583 0.1432109  0.15577653 0.15835723 0.19732276 0.21810678]] entropy:[1.7744763]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:579 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.08068941  0.03969286  0.12429292  0.13856946  0.3577926   0.45907602]] probs:[[0.12712067 0.14338295 0.15604106 0.15828475 0.19708179 0.21808879]] entropy:[1.7745332]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:580 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.08100488  0.03990111  0.12437368  0.138868    0.3579466   0.45891038]] probs:[[0.127075   0.14340653 0.15604681 0.15832508 0.1971035  0.21804309]] entropy:[1.7745354]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:581 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.08114771  0.04000702  0.124415    0.13901418  0.35802653  0.45883974]] probs:[[0.12705342 0.14341785 0.15604906 0.15834396 0.19711393 0.21802181]] entropy:[1.7745365]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:582 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.07732498  0.04142315  0.12539063  0.14201532  0.36187753  0.46182615]] probs:[[0.12719272 0.14322998 0.155776   0.15838736 0.19733562 0.21807837]] entropy:[1.7744772]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-1.380722] v_loss:[[2.1745242e-05]]
DEBUG:chainerrl.agents.a3c:grad norm:1.0152998387839984
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:583 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.03376742  0.0312422   0.10977118  0.13735205  0.3847369   0.3830786 ]] probs:[[0.13431817 0.14334023 0.15505037 0.1593863  0.20412156 0.20378335]] entropy:[1.7783235]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:584 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.0351226   0.03265456  0.11048556  0.13707554  0.38408923  0.38154414]] probs:[[0.1341842  0.1435941  0.1552166  0.15939918 0.20406228 0.20354359]] entropy:[1.7784052]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:585 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.03549445  0.03295128  0.11055145  0.137454    0.3843611   0.38139358]] probs:[[0.1341225  0.14362407 0.15521318 0.15944548 0.2040998  0.20349503]] entropy:[1.7784002]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:586 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.03565719  0.03308143  0.11058226  0.13762267  0.38447958  0.38132355]] probs:[[0.13409552 0.14363724 0.15521197 0.15946622 0.20411614 0.20347294]] entropy:[1.7783982]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:587 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.03411926  0.03148686  0.10986701  0.13767911  0.3849294   0.38295493]] probs:[[0.13426167 0.14336541 0.15505454 0.15942746 0.20414679 0.20374411]] entropy:[1.77832]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:588 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.03384127  0.03129501  0.1098      0.13739967  0.38475502  0.38308915]] probs:[[0.13430616 0.14334556 0.15505242 0.15939142 0.2041221  0.20378233]] entropy:[1.7783221]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:589 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.03372676  0.03121604  0.10977421  0.13728592  0.38467616  0.38315058]] probs:[[0.13432446 0.14333735 0.15505178 0.15937674 0.20411041 0.20379926]] entropy:[1.778323]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:590 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.03508526  0.03264501  0.11050491  0.13701978  0.3840364   0.3816092 ]] probs:[[0.13418917 0.14359272 0.15521957 0.15939026 0.20405146 0.20355679]] entropy:[1.7784054]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-1.3775072] v_loss:[[2.4233166e-05]]
DEBUG:chainerrl.agents.a3c:grad norm:1.030913950867166
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:591 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.03015718 0.06737447 0.15088914 0.13872181 0.34623677 0.3396326 ]] probs:[[0.1425442  0.14794926 0.16083583 0.15889074 0.19553351 0.19424643]] entropy:[1.7840102]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:592 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.03025523 0.06734173 0.1508452  0.13842478 0.34606656 0.3396962 ]] probs:[[0.1425676  0.1479542  0.16083938 0.15885405 0.19551314 0.19427162]] entropy:[1.7840123]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:593 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.03511954 0.06104106 0.15225375 0.14088853 0.346239   0.34146926]] probs:[[0.14315356 0.14691284 0.16094331 0.15912451 0.1953978  0.19446802]] entropy:[1.7839794]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:594 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.03512113 0.06104538 0.1522551  0.14087968 0.34622362 0.34147766]] probs:[[0.14315403 0.14691372 0.16094379 0.15912336 0.1953951  0.19446997]] entropy:[1.7839798]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:595 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.03024383 0.06735541 0.15082814 0.1383122  0.3459602  0.33971366]] probs:[[0.14257134 0.1479618  0.1608427  0.15884215 0.19549972 0.19428232]] entropy:[1.7840137]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:596 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.03024372 0.06735724 0.15082958 0.13830616 0.3459587  0.3397191 ]] probs:[[0.1425713  0.14796203 0.16084288 0.15884113 0.19549936 0.19428332]] entropy:[1.7840136]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:597 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.03024161 0.06735992 0.15083109 0.13830532 0.34595704 0.33972088]] probs:[[0.14257094 0.14796238 0.16084307 0.15884097 0.19549899 0.1942836 ]] entropy:[1.7840136]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:598 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.03511249 0.06105834 0.15226443 0.14087313 0.34621668 0.34149727]] probs:[[0.14315228 0.1469151  0.1609447  0.15912174 0.19539304 0.19447307]] entropy:[1.7839794]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-1.4313986] v_loss:[[3.8747945e-07]]
DEBUG:chainerrl.agents.a3c:grad norm:0.4901385059082587
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:599 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.03300799 0.0848907  0.1810637  0.12355618 0.3033485  0.32749948]] probs:[[0.1436687  0.15131937 0.166595   0.1572848  0.188265   0.19286712]] entropy:[1.7857939]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:600 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.03279508 0.08493743 0.18102622 0.12351376 0.30321127 0.32755363]] probs:[[0.14364555 0.15133429 0.16659738 0.15728627 0.1882489  0.19288756]] entropy:[1.7857909]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:601 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.03233506 0.08491657 0.18117651 0.12326874 0.3027604  0.32752302]] probs:[[0.1436044  0.15135738 0.16665132 0.15727502 0.18819669 0.19291511]] entropy:[1.7857887]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:602 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.03231496 0.08492213 0.18117331 0.12326543 0.30274698 0.32752815]] probs:[[0.14360219 0.15135895 0.16665158 0.15727524 0.18819505 0.192917  ]] entropy:[1.7857884]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:603 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.03230661 0.0849255  0.18117261 0.12326485 0.30274194 0.32753143]] probs:[[0.14360116 0.15135963 0.16665167 0.15727535 0.18819433 0.19291788]] entropy:[1.7857884]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:604 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.032704   0.0849685  0.18101448 0.12350966 0.30315575 0.3275853 ]] probs:[[0.14363469 0.15134132 0.16659799 0.15728804 0.18824136 0.19289663]] entropy:[1.7857896]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:605 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.03269727 0.08497632 0.18101764 0.12351647 0.30315378 0.3275946 ]] probs:[[0.14363325 0.151342   0.16659796 0.1572886  0.18824035 0.1928978 ]] entropy:[1.7857895]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:606 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.03228915 0.08494361 0.18118022 0.12327777 0.30273512 0.32755294]] probs:[[0.14359774 0.1513614  0.16665186 0.15727636 0.18819183 0.19292079]] entropy:[1.7857879]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-1.4907169] v_loss:[[6.895076e-05]]
DEBUG:chainerrl.agents.a3c:grad norm:0.6870622307624776
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:607 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.02330064 0.12427735 0.17066915 0.12170599 0.27157345 0.3034937 ]] probs:[[0.1433972  0.15863329 0.16616595 0.15822591 0.18380791 0.18976976]] entropy:[1.787254]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:608 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.01464153 0.12547912 0.13377485 0.1532092  0.2979993  0.3470948 ]] probs:[[0.14056146 0.15703715 0.15834531 0.16145274 0.1866066  0.19599676]] entropy:[1.7854323]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:609 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.00086486 0.13290966 0.15238957 0.1667626  0.29183412 0.35727996]] probs:[[0.1379093  0.1573765  0.16047223 0.16279536 0.18448451 0.1969621 ]] entropy:[1.7851758]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:610 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.00404922  0.13792595  0.16243127  0.1712872   0.28945926  0.3642659 ]] probs:[[0.13676602 0.15762942 0.16153988 0.16297683 0.18342027 0.19766758]] entropy:[1.7849959]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:611 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.00407703  0.14162493  0.16765428  0.17663081  0.28667444  0.3663784 ]] probs:[[0.13646099 0.15786509 0.16202815 0.16348915 0.18250732 0.1976493 ]] entropy:[1.7850616]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:612 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.01313412  0.13900754  0.16930854  0.18240413  0.28401884  0.3690214 ]] probs:[[0.13528328 0.15751372 0.1623596  0.16449977 0.18209416 0.19824952]] entropy:[1.7847608]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:613 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.008316   0.12735257 0.17104048 0.19332318 0.27478817 0.35965246]] probs:[[0.13826159 0.15573938 0.16269413 0.16636008 0.18047993 0.19646491]] entropy:[1.7856989]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:614 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.02427135  0.1366046   0.16557221  0.19676654  0.28969073  0.36582407]] probs:[[0.13374473 0.15708843 0.16170546 0.16682926 0.18307485 0.19755726]] entropy:[1.7844316]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-1.4332541] v_loss:[[0.0001407]]
DEBUG:chainerrl.agents.a3c:grad norm:2.8784565504618422
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:615 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.03096641 0.14195088 0.16377519 0.1941646  0.2763562  0.3310933 ]] probs:[[0.14154623 0.1581606  0.16165029 0.16663815 0.180913   0.19109169]] entropy:[1.7871658]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:616 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.03120413 0.14134315 0.16473678 0.192514   0.27517727 0.32965812]] probs:[[0.14167468 0.15817033 0.16191413 0.1664747  0.18082082 0.19094539]] entropy:[1.7872226]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:617 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.03745382 0.14052007 0.17071708 0.18999708 0.2696723  0.32565805]] probs:[[0.14262623 0.15811041 0.1629577  0.16613    0.17990804 0.19026762]] entropy:[1.7875599]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:618 r:0.05 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.00785085 0.13845152 0.20428798 0.17041677 0.26619747 0.34973976]] probs:[[0.13820766 0.15748939 0.16820686 0.1626049  0.17894956 0.19454165]] entropy:[1.786211]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:619 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.01377002 0.13493457 0.19560075 0.15817994 0.2794105  0.36278823]] probs:[[0.13878247 0.15665914 0.16645725 0.1603434  0.18100928 0.19674845]] entropy:[1.7856914]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:620 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.0164694  0.13972528 0.19489445 0.15295944 0.28359434 0.36432675]] probs:[[0.13898571 0.157217   0.16613425 0.15931146 0.18154363 0.19680797]] entropy:[1.7856634]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:621 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.05451971 0.12164254 0.22213887 0.10841139 0.23122539 0.36616474]] probs:[[0.14564027 0.15575162 0.17221761 0.1537044  0.1737896  0.19889653]] entropy:[1.786316]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:622 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.04900014 0.12477709 0.21419291 0.10676597 0.23786864 0.36658737]] probs:[[0.14493804 0.15634784 0.17097187 0.15355703 0.17506805 0.19911718]] entropy:[1.7861859]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-0.90956014] v_loss:[[0.00332653]]
DEBUG:chainerrl.agents.a3c:grad norm:75.23815322698401
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:623 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.07464907 0.16098702 0.16151902 0.13365586 0.28125542 0.329842  ]] probs:[[0.14788698 0.16122264 0.16130844 0.15687591 0.18182682 0.19087929]] entropy:[1.7878402]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:624 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.07452171 0.16209024 0.1603104  0.13997543 0.2841391  0.33106947]] probs:[[0.14761448 0.16112372 0.1608372  0.15759961 0.18203908 0.19078588]] entropy:[1.7878243]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:625 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.07725724 0.16447492 0.15917435 0.13907544 0.2845901  0.33088002]] probs:[[0.14794329 0.16142598 0.16057259 0.15737748 0.18202825 0.19065239]] entropy:[1.7878704]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:626 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.0752453  0.16323611 0.16014257 0.1381156  0.28311998 0.33012435]] probs:[[0.14777954 0.16137202 0.16087358 0.15736876 0.18192531 0.19068076]] entropy:[1.7878646]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:627 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.07456378 0.16282733 0.16326645 0.1417068  0.2812121  0.32870558]] probs:[[0.14763668 0.16126    0.16133082 0.15788981 0.18152669 0.19035603]] entropy:[1.7879664]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:628 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.07277111 0.16060236 0.16618536 0.13696876 0.27789322 0.3270889 ]] probs:[[0.14763896 0.16119279 0.16209525 0.15742788 0.18125266 0.19039248]] entropy:[1.7879803]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:629 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.05787382 0.16548851 0.17884359 0.11547903 0.27411395 0.34611893]] probs:[[0.14541243 0.16193399 0.16411114 0.15403491 0.18051507 0.19399244]] entropy:[1.787077]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:630 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.08453701 0.14158961 0.16716294 0.15143816 0.2755862  0.34259525]] probs:[[0.14883234 0.1575705  0.16165207 0.15913    0.18016437 0.19265069]] entropy:[1.7878071]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-1.7929275] v_loss:[[0.00150316]]
DEBUG:chainerrl.agents.a3c:grad norm:10.010918553405887
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:631 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.10604595 0.14689521 0.17511986 0.1570242  0.18032289 0.3065806 ]] probs:[[0.15468691 0.16113658 0.1657494  0.16277704 0.16661404 0.18903606]] entropy:[1.789739]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:632 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.0990092  0.14068519 0.16960736 0.15740113 0.16861358 0.29934967]] probs:[[0.15456572 0.16114353 0.16587219 0.16385981 0.16570744 0.18885128]] entropy:[1.7897731]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:633 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.09712376 0.14430912 0.16099901 0.15984386 0.1627947  0.29379314]] probs:[[0.15469767 0.16217208 0.16490144 0.16471107 0.16519782 0.18831989]] entropy:[1.789881]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:634 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.09544566 0.1406305  0.16051464 0.1597775  0.15888111 0.29191908]] probs:[[0.15473904 0.16189127 0.16514255 0.16502087 0.164873   0.18833333]] entropy:[1.7898773]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:635 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.09794773 0.12842293 0.17813219 0.14999847 0.16625859 0.30137545]] probs:[[0.15469925 0.15948632 0.16761461 0.1629647  0.1656362  0.18959887]] entropy:[1.7896049]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:636 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.0994837  0.12434257 0.18281801 0.14960584 0.16698594 0.3025064 ]] probs:[[0.15483692 0.15873423 0.16829303 0.16279545 0.1656496  0.18969074]] entropy:[1.7895594]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:637 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.0989994  0.12519877 0.18349609 0.14596123 0.16893557 0.30470502]] probs:[[0.15471186 0.1588188  0.1683527  0.16215073 0.16591915 0.19004674]] entropy:[1.7894924]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:638 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.09849437 0.12552837 0.18083967 0.15013625 0.16582726 0.3011878 ]] probs:[[0.15478486 0.15902638 0.16807014 0.16298823 0.16556585 0.18956454]] entropy:[1.7895919]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-1.3869222] v_loss:[[0.00015491]]
DEBUG:chainerrl.agents.a3c:grad norm:3.3028158709308792
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:639 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.09437755 0.14548667 0.14313589 0.1806275  0.13587077 0.252056  ]] probs:[[0.15611245 0.16429864 0.16391286 0.17017487 0.16272633 0.18277484]] entropy:[1.7905401]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:640 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.13507774 0.12854682 0.17126161 0.1657834  0.15098864 0.280186  ]] probs:[[0.16041781 0.15937355 0.16632864 0.16541995 0.16299061 0.18546942]] entropy:[1.7904099]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:641 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.13568756 0.1312837  0.1763271  0.17232212 0.15568988 0.28768897]] probs:[[0.15977602 0.15907392 0.16640298 0.16573787 0.16300409 0.18600515]] entropy:[1.790315]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:642 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.13660574 0.13020907 0.17672713 0.1712799  0.15710135 0.28692985]] probs:[[0.15992935 0.15890959 0.1664764  0.16557203 0.16324103 0.18587163]] entropy:[1.7903326]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:643 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.13442057 0.13097665 0.17596072 0.17153893 0.15798868 0.2858078 ]] probs:[[0.15964016 0.15909131 0.16641128 0.16567709 0.16344726 0.18573287]] entropy:[1.7903486]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:644 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.13331264 0.13090101 0.17700222 0.17215212 0.15886466 0.28515628]] probs:[[0.15944609 0.15906203 0.16656664 0.16576073 0.16357276 0.18559177]] entropy:[1.7903572]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:645 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.13278812 0.13082539 0.17690855 0.17199603 0.15934177 0.28471085]] probs:[[0.15938506 0.15907253 0.16657464 0.16575833 0.16367401 0.18553542]] entropy:[1.7903628]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:646 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.13333373 0.12863609 0.17829795 0.16997986 0.16090025 0.28511733]] probs:[[0.15947722 0.1587298  0.16681163 0.16542985 0.1639346  0.18561687]] entropy:[1.7903442]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-1.5349276] v_loss:[[0.00017064]]
DEBUG:chainerrl.agents.a3c:grad norm:1.2758240813208777
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:647 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.10974034 0.16423866 0.16687354 0.16950196 0.17118518 0.26832378]] probs:[[0.15596701 0.16470283 0.16513737 0.165572   0.16585092 0.1827699 ]] entropy:[1.7906302]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:648 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.10722589 0.16704516 0.16161755 0.1689026  0.17047833 0.26538095]] probs:[[0.15581653 0.16542178 0.16452636 0.16572933 0.16599068 0.18251534]] entropy:[1.7906451]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:649 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.1026194  0.16797425 0.15805058 0.16683233 0.17325109 0.26046473]] probs:[[0.15539958 0.16589491 0.16425678 0.16570559 0.16677263 0.18197058]] entropy:[1.7906657]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:650 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.10003103 0.16693372 0.1583124  0.16545078 0.17607538 0.2581258 ]] probs:[[0.15510851 0.1658407  0.16441709 0.16559495 0.1673637  0.18167508]] entropy:[1.7906706]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:651 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.09878992 0.16918084 0.15490559 0.16602668 0.17582446 0.2558304 ]] probs:[[0.1550311  0.16633713 0.1639795  0.16581331 0.1674459  0.18139306]] entropy:[1.7906852]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:652 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.09802171 0.16885602 0.1550233  0.16560896 0.17673248 0.255119  ]] probs:[[0.15494303 0.16631636 0.1640316  0.16577719 0.16763152 0.1813003 ]] entropy:[1.7906861]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:653 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.09743422 0.16584015 0.15862438 0.163935   0.17904031 0.2557061 ]] probs:[[0.15481852 0.16577964 0.16458772 0.1654641  0.16798247 0.18136752]] entropy:[1.7906729]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:654 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.09713379 0.16846365 0.15517966 0.16511635 0.17779179 0.25430408]] probs:[[0.15484093 0.16628915 0.16409479 0.16573347 0.16784759 0.18119405]] entropy:[1.7906868]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-1.5866725] v_loss:[[0.00036551]]
DEBUG:chainerrl.agents.a3c:grad norm:5.651345077773161
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:655 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.08130393 0.1592227  0.18980137 0.15851346 0.19789967 0.24499576]] probs:[[0.15203525 0.16435541 0.1694588  0.16423889 0.1708367  0.17907493]] entropy:[1.7905376]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:656 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.08088834 0.15889691 0.19017227 0.1581798  0.19813402 0.24475332]] probs:[[0.15198912 0.16432029 0.16954067 0.1642025  0.1708959  0.17905158]] entropy:[1.7905315]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:657 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.08124072 0.1591904  0.18982618 0.1584673  0.1979769  0.24494182]] probs:[[0.15202789 0.16435254 0.16946553 0.16423373 0.17085242 0.17906791]] entropy:[1.7905372]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:658 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.08124005 0.15918952 0.18982768 0.1584671  0.19797835 0.24494113]] probs:[[0.15202777 0.16435237 0.16946575 0.16423368 0.17085266 0.17906778]] entropy:[1.790537]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:659 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.08087944 0.15889338 0.19017509 0.15817435 0.19814467 0.24474548]] probs:[[0.15198804 0.16432002 0.16954146 0.16420192 0.17089805 0.1790505 ]] entropy:[1.7905312]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:660 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.08123965 0.1591899  0.18982664 0.1584666  0.19797827 0.2449407 ]] probs:[[0.15202777 0.16435249 0.16946565 0.16423365 0.1708527  0.17906776]] entropy:[1.7905372]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:661 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.08087938 0.15889332 0.19017512 0.15817438 0.19814464 0.24474552]] probs:[[0.15198804 0.16431999 0.16954146 0.16420192 0.17089802 0.1790505 ]] entropy:[1.7905312]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:662 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.08123966 0.15918992 0.18982671 0.15846674 0.19797838 0.2449407 ]] probs:[[0.15202776 0.16435248 0.16946565 0.16423367 0.1708527  0.17906775]] entropy:[1.7905369]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-1.4895751] v_loss:[[3.926863e-05]]
DEBUG:chainerrl.agents.a3c:grad norm:0.13973404679614626
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:663 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.07720531 0.1579896  0.19278541 0.1626254  0.18998879 0.21783982]] probs:[[0.15229441 0.16510801 0.1709542  0.1658752  0.17047676 0.17529146]] entropy:[1.7907947]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:664 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.07846237 0.15819533 0.19328319 0.16316575 0.19055957 0.21934903]] probs:[[0.1523698  0.16501616 0.170909   0.1658384  0.17044415 0.17542246]] entropy:[1.7907956]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:665 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.07719304 0.15799025 0.1927865  0.162625   0.18998727 0.21783268]] probs:[[0.15229303 0.16510864 0.17095491 0.16587564 0.17047705 0.17529076]] entropy:[1.7907945]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:666 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.07846233 0.15819532 0.19328317 0.16316602 0.1905596  0.21934894]] probs:[[0.1523698  0.16501616 0.170909   0.16583845 0.17044415 0.17542244]] entropy:[1.7907956]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:667 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.07846301 0.15819603 0.19328588 0.16316748 0.19056126 0.21935284]] probs:[[0.15236959 0.16501595 0.17090914 0.16583836 0.1704441  0.17542279]] entropy:[1.7907954]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:668 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.07719313 0.15799017 0.19278656 0.162625   0.18998738 0.21783271]] probs:[[0.15229301 0.1651086  0.17095491 0.16587563 0.17047705 0.17529075]] entropy:[1.7907945]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:669 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.07846244 0.15819533 0.19328324 0.16316605 0.19055958 0.21934894]] probs:[[0.15236981 0.16501616 0.17090902 0.16583845 0.17044415 0.17542244]] entropy:[1.7907957]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:670 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.07719325 0.15799016 0.19278662 0.16262518 0.18998738 0.21783265]] probs:[[0.15229303 0.16510859 0.17095491 0.16587566 0.17047705 0.17529073]] entropy:[1.7907946]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-1.4835093] v_loss:[[3.2422526e-05]]
DEBUG:chainerrl.agents.a3c:grad norm:0.5816237293636635
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:671 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.08888424 0.14960718 0.17846993 0.16939363 0.17702602 0.21350974]] probs:[[0.15467829 0.16436183 0.1691749  0.16764635 0.1689308  0.17520782]] entropy:[1.7910495]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:672 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.08884688 0.1496175  0.17843272 0.16933991 0.17705809 0.21353328]] probs:[[0.15467402 0.16436516 0.16917028 0.167639   0.16893788 0.17521368]] entropy:[1.791049]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:673 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.08888412 0.1496076  0.17846894 0.16939405 0.17702599 0.21350819]] probs:[[0.1546783  0.16436195 0.16917478 0.16764648 0.16893084 0.1752076 ]] entropy:[1.7910494]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:674 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.0888845  0.14960727 0.17846948 0.16939433 0.17702559 0.21350801]] probs:[[0.15467834 0.1643619  0.16917485 0.16764651 0.16893075 0.17520756]] entropy:[1.7910492]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:675 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.08884695 0.14961748 0.1784328  0.16933997 0.1770582  0.21353327]] probs:[[0.15467402 0.16436514 0.16917026 0.167639   0.16893789 0.17521366]] entropy:[1.7910488]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:676 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.0888842  0.14960767 0.17846894 0.16939412 0.17702599 0.2135083 ]] probs:[[0.15467831 0.16436197 0.16917478 0.16764648 0.16893084 0.17520761]] entropy:[1.7910494]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:677 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.08884699 0.14961752 0.17843279 0.16934003 0.17705822 0.21353328]] probs:[[0.15467404 0.16436514 0.16917026 0.16763902 0.16893789 0.17521366]] entropy:[1.7910488]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:678 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.08884646 0.14961803 0.17843193 0.16933964 0.17705879 0.21353343]] probs:[[0.15467398 0.16436525 0.16917013 0.16763896 0.168938   0.17521371]] entropy:[1.7910489]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-1.4794668] v_loss:[[2.5788093e-05]]
DEBUG:chainerrl.agents.a3c:grad norm:0.13185400783633858
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:679 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.09951895 0.151755   0.16657962 0.17317884 0.17567794 0.21058135]] probs:[[0.15634713 0.16473114 0.1671914  0.1682984  0.16871952 0.17471237]] entropy:[1.7912095]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:680 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.09948381 0.15175802 0.16654761 0.17312914 0.17571224 0.21060276]] probs:[[0.15634307 0.16473317 0.16718762 0.16829158 0.16872686 0.17471774]] entropy:[1.7912089]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:681 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.09951902 0.15175495 0.16657971 0.17317888 0.17567804 0.2105813 ]] probs:[[0.15634714 0.16473114 0.16719145 0.16829841 0.16871955 0.17471237]] entropy:[1.7912095]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:682 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.09951925 0.15175463 0.16658024 0.17317937 0.1756777  0.21058112]] probs:[[0.15634717 0.16473109 0.1671915  0.16829848 0.16871947 0.17471233]] entropy:[1.7912095]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:683 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.09948381 0.15175804 0.16654761 0.17312923 0.1757123  0.21060282]] probs:[[0.15634306 0.16473316 0.1671876  0.1682916  0.16872686 0.17471774]] entropy:[1.7912089]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:684 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.09951907 0.15175495 0.16657975 0.173179   0.17567803 0.21058124]] probs:[[0.15634714 0.16473114 0.16719143 0.16829842 0.16871953 0.17471236]] entropy:[1.7912095]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:685 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.09948387 0.15175794 0.16654766 0.17312926 0.17571233 0.21060264]] probs:[[0.15634309 0.16473314 0.1671876  0.1682916  0.16872688 0.17471771]] entropy:[1.7912089]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:686 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.09288602 0.14638354 0.16407031 0.1714236  0.171863   0.21278697]] probs:[[0.15576318 0.16432305 0.16725524 0.16848965 0.16856371 0.1756051 ]] entropy:[1.791121]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-1.8403697] v_loss:[[0.00165812]]
DEBUG:chainerrl.agents.a3c:grad norm:3.8529588358059605
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:687 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.11307155 0.1246625  0.16328165 0.12586951 0.19435517 0.25218123]] probs:[[0.15847963 0.16032726 0.16664007 0.16052088 0.17189945 0.18213275]] entropy:[1.7905405]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:688 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.11265478 0.12494744 0.1628772  0.1256538  0.19477282 0.25287592]] probs:[[0.15840155 0.16036074 0.16656001 0.16047406 0.17195818 0.18224545]] entropy:[1.7905241]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:689 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.11262146 0.12431154 0.16234443 0.12557615 0.19521448 0.253984  ]] probs:[[0.15838523 0.16024764 0.1664597  0.16045041 0.17202216 0.1824348 ]] entropy:[1.7904989]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:690 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.11261652 0.12432244 0.16233039 0.12558104 0.19524492 0.25401652]] probs:[[0.15838279 0.1602477  0.16645561 0.16044952 0.17202559 0.1824388 ]] entropy:[1.7904983]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:691 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.11265072 0.12432009 0.16233231 0.12559354 0.19523932 0.25401103]] probs:[[0.15838736 0.16024646 0.16645503 0.16045067 0.1720237  0.18243682]] entropy:[1.7904989]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:692 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.11264867 0.12513678 0.16271189 0.12554435 0.19487098 0.25324228]] probs:[[0.1583898  0.16038021 0.16652116 0.16044559 0.17196338 0.18229984]] entropy:[1.790518]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:693 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.11345072 0.12550989 0.16250116 0.12517197 0.19418626 0.25363722]] probs:[[0.15850955 0.16043262 0.16647835 0.16037841 0.17183769 0.18236339]] entropy:[1.7905219]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:694 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.11904602 0.12724912 0.16081464 0.12272966 0.19016314 0.2571906 ]] probs:[[0.15932627 0.16063862 0.16612205 0.15991426 0.17106973 0.18292911]] entropy:[1.790519]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-1.5021868] v_loss:[[8.364812e-05]]
DEBUG:chainerrl.agents.a3c:grad norm:0.6440020973141909
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:695 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.1444552  0.17695008 0.1807366  0.1198641  0.16708985 0.21468726]] probs:[[0.16283105 0.16820914 0.16884726 0.15887569 0.16655871 0.17467816]] entropy:[1.7913191]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:696 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.17629844 0.18886065 0.17262511 0.11755543 0.14974149 0.23239759]] probs:[[0.16712852 0.16924126 0.16651572 0.15759368 0.16274852 0.17677225]] entropy:[1.7911406]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:697 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.1861263  0.19484712 0.16805387 0.11827711 0.14229466 0.23822746]] probs:[[0.16846794 0.16994354 0.16545066 0.15741667 0.1612432  0.17747799]] entropy:[1.7910192]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:698 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.18792675 0.19581617 0.16745675 0.11849957 0.14101918 0.23902154]] probs:[[0.16871415 0.17005047 0.16529568 0.15739816 0.16098292 0.17755859]] entropy:[1.7909981]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:699 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.18888636 0.19556251 0.16790497 0.11849416 0.14023092 0.23895997]] probs:[[0.16886696 0.16999811 0.16536081 0.15738876 0.16084734 0.17753801]] entropy:[1.7909937]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:700 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.1934699  0.19324152 0.17073724 0.11739942 0.13599613 0.23836097]] probs:[[0.16966099 0.16962226 0.16584766 0.15723348 0.16018486 0.17745079]] entropy:[1.7909633]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:701 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.21618769 0.18206884 0.18724528 0.11320025 0.11525626 0.23442113]] probs:[[0.17353691 0.16771589 0.16858633 0.1565543  0.1568765  0.17673011]] entropy:[1.7907012]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:702 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.2522307  0.16513486 0.21584511 0.11447375 0.08437759 0.22806592]] probs:[[0.17941175 0.16444688 0.17300108 0.15632333 0.15168868 0.17512827]] entropy:[1.7899171]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-1.4163401] v_loss:[[3.7258706e-06]]
DEBUG:chainerrl.agents.a3c:grad norm:0.04994155748412821
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:703 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.27590552 0.17058492 0.19392094 0.15470073 0.03726816 0.19435218]] probs:[[0.18461908 0.1661638  0.17008701 0.16354528 0.14542453 0.17016037]] entropy:[1.789309]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:704 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.27549052 0.1695682  0.17957275 0.15412402 0.02406776 0.1729247 ]] probs:[[0.18608269 0.16738036 0.16906333 0.16481517 0.14471531 0.16794312]] entropy:[1.7891226]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:705 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.26683003  0.18045585  0.14053132  0.14983709 -0.00342688  0.11723129]] probs:[[0.18823774 0.17266124 0.16590363 0.16745469 0.14365992 0.16208275]] entropy:[1.7885797]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:706 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.25946856  0.18669128  0.12001891  0.14957976 -0.02120039  0.08782634]] probs:[[0.18892387 0.1756629  0.16433293 0.16926326 0.1426901  0.15912688]] entropy:[1.7880731]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:707 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.22900571  0.17276742  0.13344732  0.17037588 -0.00656802  0.12761858]] probs:[[0.18211834 0.17215899 0.16552103 0.17174776 0.14389487 0.16455907]] entropy:[1.7892491]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:708 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.23648712 0.16285525 0.16734791 0.17722341 0.03074586 0.18194653]] probs:[[0.17967412 0.16691971 0.16767131 0.16933535 0.14626259 0.17013703]] entropy:[1.7899013]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:709 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.24334508 0.16251041 0.18913935 0.17498243 0.03713226 0.19972   ]] probs:[[0.17938901 0.16545878 0.16992395 0.16753529 0.14596166 0.17173141]] entropy:[1.7898284]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:710 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.24329634 0.16033854 0.18969697 0.1762422  0.04071981 0.20389043]] probs:[[0.17916875 0.16490512 0.16981824 0.16754869 0.14631349 0.17224573]] entropy:[1.7898716]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-1.465918] v_loss:[[0.00016822]]
DEBUG:chainerrl.agents.a3c:grad norm:0.7139032872458543
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:711 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.22131835 0.15241666 0.21243842 0.16090952 0.03789369 0.22246633]] probs:[[0.17545335 0.16377139 0.17390224 0.16516821 0.14604993 0.17565489]] entropy:[1.7897756]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:712 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.21751799 0.15225436 0.20783012 0.1578168  0.03220038 0.2272124 ]] probs:[[0.17513725 0.16407216 0.17344873 0.16498734 0.1455111  0.17684336]] entropy:[1.7896748]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:713 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.20486422 0.14788206 0.19526279 0.161538   0.00294916 0.2346196 ]] probs:[[0.17421539 0.16456576 0.17255068 0.16682847 0.1423626  0.17947713]] entropy:[1.7891296]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:714 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.2033796   0.134324    0.1659091   0.17757484 -0.01600834  0.23343582]] probs:[[0.1752988  0.16360195 0.16885181 0.17083313 0.14076671 0.1806476 ]] entropy:[1.7887492]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:715 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.19421604  0.12228858  0.14558098  0.1882237  -0.02807381  0.23628962]] probs:[[0.17480052 0.16266908 0.16650249 0.17375617 0.13995984 0.18231189]] entropy:[1.7883906]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:716 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.18507981  0.12153438  0.14188002  0.19164246 -0.04062157  0.24149725]] probs:[[0.17364936 0.16295803 0.16630746 0.1747927  0.1385646  0.18372783]] entropy:[1.7880185]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:717 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.18154079  0.12396631  0.14330368  0.20075914 -0.04160561  0.24447048]] probs:[[0.17268503 0.16302358 0.16620669 0.17603584 0.13814762 0.18390125]] entropy:[1.7878983]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:718 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.2079911   0.1117266   0.14247896  0.19576086 -0.0120904   0.2319904 ]] probs:[[0.17669721 0.1604806  0.16549243 0.17454933 0.14179127 0.18098912]] entropy:[1.7886047]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-1.0966862] v_loss:[[0.00120919]]
DEBUG:chainerrl.agents.a3c:grad norm:6.489162283485683
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:719 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.15060058  0.12320976  0.19298947  0.19460173 -0.05675483  0.25018662]] probs:[[0.16726018 0.16274096 0.17450258 0.17478414 0.1359375  0.18477459]] entropy:[1.7873611]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:720 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.15255207  0.13340215  0.22242711  0.17494091 -0.04716105  0.2594123 ]] probs:[[0.1664505  0.16329332 0.17849721 0.17021917 0.13631724 0.18522257]] entropy:[1.7873323]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:721 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.15950067  0.1460162   0.24373478  0.16125347 -0.02991883  0.25302354]] probs:[[0.16661893 0.16438724 0.18126199 0.16691123 0.13786705 0.18295352]] entropy:[1.7877059]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:722 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.16261192  0.15075521  0.2513744   0.15666112 -0.0230324   0.24929227]] probs:[[0.1667718  0.1648061  0.18225172 0.1657823  0.13851547 0.18187265]] entropy:[1.7878424]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:723 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.16435099  0.15817027  0.25957814  0.15235871 -0.02540163  0.25983337]] probs:[[0.16641206 0.16538668 0.18303807 0.16442832 0.13765001 0.18308479]] entropy:[1.78749]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:724 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.16414005  0.16168022  0.2579424   0.14430416 -0.02961222  0.25878924]] probs:[[0.16668388 0.16627437 0.18307602 0.16341014 0.13732451 0.18323112]] entropy:[1.7873979]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:725 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.15770276  0.15726532  0.25758082  0.1460208  -0.03251977  0.25374496]] probs:[[0.16609734 0.1660247  0.18354358 0.1641683  0.13732515 0.18284087]] entropy:[1.7874014]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:726 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.15161407  0.15630002  0.2557041   0.1440325  -0.03889588  0.25296128]] probs:[[0.16556174 0.16633938 0.18372393 0.16431126 0.136843   0.18322068]] entropy:[1.7872543]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-1.5610759] v_loss:[[0.00018517]]
DEBUG:chainerrl.agents.a3c:grad norm:3.9370909487975343
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:727 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.17484674 0.08201664 0.27760246 0.19352713 0.00949623 0.24041758]] probs:[[0.16796024 0.15307029 0.186137   0.17112729 0.14236252 0.17934263]] entropy:[1.7876941]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:728 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.19512907  0.05975929  0.30325258  0.19084574 -0.00717897  0.25272992]] probs:[[0.17066479 0.14905742 0.19015221 0.16993535 0.13940638 0.18078387]] entropy:[1.7861897]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:729 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.19703701  0.06096785  0.30074993  0.18734011 -0.00695059  0.2524552 ]] probs:[[0.17109023 0.14932452 0.1897873  0.16943921 0.13951938 0.18083939]] entropy:[1.7862809]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:730 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.20294118  0.06463351  0.29584023  0.17967097 -0.00550992  0.252479  ]] probs:[[0.17218204 0.14994141 0.18894413 0.1682216  0.13978441 0.18092637]] entropy:[1.7864783]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:731 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.20276284  0.06251749  0.30582985  0.19044071 -0.0053222   0.24905218]] probs:[[0.17167443 0.14920996 0.19031237 0.16957201 0.13942333 0.17980792]] entropy:[1.7862449]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:732 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.23775741  0.06298687  0.28041878  0.16366176 -0.00630727  0.24616118]] probs:[[0.17845376 0.14983863 0.18623157 0.16570911 0.13980727 0.17995976]] entropy:[1.7865565]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:733 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.23403615  0.06815067  0.28024048  0.16074014 -0.00244389  0.25239107]] probs:[[0.17756681 0.15042453 0.18596366 0.16501741 0.14017153 0.18085614]] entropy:[1.7866926]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:734 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.24659915  0.05637316  0.27629405  0.16294578 -0.01241515  0.23970364]] probs:[[0.18045321 0.1491937  0.1858921  0.16597185 0.13927595 0.17921317]] entropy:[1.7863384]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-1.919086] v_loss:[[0.00278201]]
DEBUG:chainerrl.agents.a3c:grad norm:58.10143638927764
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:735 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.0509519   0.08443562  0.29923707  0.17871827 -0.06302559  0.2441938 ]] probs:[[0.1524944  0.15768692 0.1954712  0.17327748 0.1360674  0.18500258]] entropy:[1.7844341]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:736 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.05097679  0.08443436  0.2992475   0.17875664 -0.06300283  0.24426778]] probs:[[0.15249376 0.15768217 0.19546756 0.17327909 0.13606656 0.18501091]] entropy:[1.7844334]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:737 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.05095005  0.08444335  0.29923683  0.17872852 -0.06302232  0.2442114 ]] probs:[[0.15249313 0.15768714 0.1954699  0.17327815 0.13606699 0.18500465]] entropy:[1.7844341]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:738 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.05093973  0.08444851  0.29922837  0.17871228 -0.06303438  0.24418963]] probs:[[0.15249324 0.15768968 0.1954704  0.17327723 0.13606684 0.18500265]] entropy:[1.7844344]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:739 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.0511981   0.08443207  0.29922867  0.17889196 -0.06295443  0.24466978]] probs:[[0.15250705 0.15766063 0.19543765 0.1732793  0.13605489 0.18506046]] entropy:[1.7844306]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:740 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.0423078   0.08029868  0.29119197  0.17868802 -0.07422062  0.23726456]] probs:[[0.15214458 0.15803587 0.19513963 0.17437558 0.1354094  0.18489498]] entropy:[1.7843041]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:741 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.03553652  0.08058571  0.28504598  0.17930551 -0.079986    0.23020777]] probs:[[0.15174729 0.1587397  0.19475181 0.17521004 0.13519174 0.18435952]] entropy:[1.7843337]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:742 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.02973927  0.08127134  0.2882951   0.18609644 -0.08737815  0.23461361]] probs:[[0.15073779 0.15870926 0.19521426 0.17624924 0.13407837 0.18501118]] entropy:[1.7838022]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-1.3435935] v_loss:[[8.454636e-05]]
DEBUG:chainerrl.agents.a3c:grad norm:0.48811328171400853
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:743 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.09542005  0.09166042  0.25416458  0.18956749 -0.02314625  0.24101157]] probs:[[0.1584303  0.15783577 0.1856864  0.1740708  0.14071667 0.18326005]] entropy:[1.7871721]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:744 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.09541442  0.09043715  0.2555171   0.18855432 -0.0209606   0.24319303]] probs:[[0.1583361  0.15754998 0.18582821 0.17379212 0.1409415  0.1835521 ]] entropy:[1.787158]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:745 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.09645013  0.09392121  0.25208905  0.18847    -0.01816419  0.23997489]] probs:[[0.15852104 0.15812066 0.18521667 0.17380036 0.1413548  0.18298645]] entropy:[1.787386]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:746 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.09453055  0.09348128  0.2543574   0.18658233 -0.01964209  0.2425483 ]] probs:[[0.15821989 0.15805396 0.18564062 0.17347571 0.14114858 0.18346126]] entropy:[1.787256]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:747 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.09576895  0.09396502  0.2532012   0.18699013 -0.01982242  0.24148807]] probs:[[0.15843037 0.15814483 0.18544298 0.17356226 0.14113599 0.18328354]] entropy:[1.7873043]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:748 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.09525242  0.09279982  0.2536966   0.18857084 -0.02167589  0.2415286 ]] probs:[[0.15837286 0.15798491 0.18556336 0.17386352 0.14089626 0.1833191 ]] entropy:[1.7872242]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:749 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.08960161  0.11185116  0.24459098  0.1697755  -0.03146554  0.22764859]] probs:[[0.1585383  0.16210525 0.18511654 0.17177236 0.1404609  0.18200664]] entropy:[1.7875693]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:750 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.08987956  0.11154073  0.24335034  0.16756694 -0.03235961  0.22799307]] probs:[[0.1586899  0.1621648  0.18501239 0.17150962 0.14043051 0.1821928 ]] entropy:[1.7875755]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-1.4232886] v_loss:[[6.7753e-06]]
DEBUG:chainerrl.agents.a3c:grad norm:0.5392485454074725
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:751 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.11550958 0.02321517 0.21001452 0.17185067 0.03715662 0.21580411]] probs:[[0.16396011 0.14950484 0.18021095 0.173463   0.15160376 0.18125732]] entropy:[1.7888261]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:752 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.12184668 0.02791514 0.19860469 0.17013235 0.04277585 0.20901656]] probs:[[0.16516224 0.15035462 0.178339   0.17333288 0.1526057  0.18020555]] entropy:[1.7892497]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:753 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.12190074 0.03007945 0.19582038 0.16916838 0.0423195  0.20699383]] probs:[[0.16529712 0.1507953  0.17797877 0.17329793 0.15265238 0.17997855]] entropy:[1.7893429]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:754 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.12512727 0.02788014 0.19757828 0.17442942 0.04393727 0.20833233]] probs:[[0.1655138  0.15017593 0.17795053 0.17387848 0.15260679 0.17987454]] entropy:[1.7892634]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:755 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.12299401 0.02573657 0.19890255 0.17226554 0.04510639 0.21261428]] probs:[[0.1651386  0.14983396 0.17816207 0.17347902 0.1527645  0.18062182]] entropy:[1.789182]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:756 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.12458864 0.03034509 0.1946377  0.17040697 0.04318313 0.20623115]] probs:[[0.16566183 0.15076238 0.17768238 0.17342874 0.15271035 0.17975433]] entropy:[1.7893784]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:757 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.09980951 0.02823935 0.2024494  0.15897372 0.03175134 0.23387535]] probs:[[0.16187394 0.15069346 0.17937127 0.17174007 0.15122363 0.1850977 ]] entropy:[1.7886279]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:758 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.09787138 0.02923703 0.1955949  0.15689506 0.02647193 0.23374034]] probs:[[0.16197617 0.15123197 0.1786043  0.17182438 0.15081438 0.18554886]] entropy:[1.7886479]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-1.4283006] v_loss:[[1.2677901e-05]]
DEBUG:chainerrl.agents.a3c:grad norm:0.2859620545034779
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:759 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.13042192 0.04912704 0.18303762 0.14423187 0.02620918 0.19799946]] probs:[[0.16776276 0.15466413 0.17682606 0.17009562 0.15115987 0.17949158]] entropy:[1.7897482]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:760 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.15051465 0.05968057 0.21686187 0.14219691 0.0483116  0.22275868]] probs:[[0.168032   0.15344164 0.1795586  0.16664013 0.15170705 0.18062055]] entropy:[1.7894677]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:761 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.145954   0.05891047 0.22038479 0.13823725 0.04554801 0.21878633]] probs:[[0.16760972 0.1536373  0.18056105 0.16632129 0.15159799 0.18027267]] entropy:[1.7894261]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:762 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.13705741 0.05480146 0.2033864  0.13719596 0.04365023 0.21548644]] probs:[[0.16716364 0.15396376 0.17862742 0.1671868  0.15225641 0.18080194]] entropy:[1.7896166]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:763 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.13983886 0.05705344 0.21460421 0.13309272 0.04933587 0.2216683 ]] probs:[[0.16693938 0.15367581 0.1798991  0.16581696 0.15249437 0.18117441]] entropy:[1.7894909]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:764 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.14870848 0.06037767 0.22261223 0.13891485 0.04808327 0.22105187]] probs:[[0.16773707 0.15355624 0.18060304 0.16610233 0.15167992 0.18032146]] entropy:[1.7894187]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:765 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.14838244 0.04637839 0.1890995  0.13241567 0.05638262 0.21596475]] probs:[[0.16918279 0.15277644 0.17621359 0.16650294 0.15431252 0.18101177]] entropy:[1.7898066]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:766 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.14687252 0.05851367 0.22060964 0.13853964 0.04573943 0.21660104]] probs:[[0.16779405 0.15360409 0.18063428 0.16640164 0.15165439 0.17991164]] entropy:[1.7894498]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-1.4540282] v_loss:[[0.00011965]]
DEBUG:chainerrl.agents.a3c:grad norm:1.1799689498291457
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:767 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.13742948 0.07328245 0.2010811  0.12203206 0.06059741 0.15773372]] probs:[[0.1684958  0.15802668 0.17956953 0.16592129 0.15603477 0.17195196]] entropy:[1.7906034]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:768 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.13968971 0.07593297 0.21126585 0.12215954 0.06148591 0.16073376]] probs:[[0.16831903 0.1579225  0.18080829 0.16539408 0.1556574  0.17189868]] entropy:[1.7904761]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:769 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.14162417 0.07506942 0.2109664  0.12252828 0.06323931 0.1577182 ]] probs:[[0.16865303 0.15779375 0.18076281 0.165463   0.15593803 0.1713893 ]] entropy:[1.7905034]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:770 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.14878565 0.07993518 0.21247128 0.1258741  0.06440692 0.15846409]] probs:[[0.16933708 0.15807045 0.18047222 0.1655014  0.15563485 0.17098396]] entropy:[1.790523]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:771 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.15611999 0.07349636 0.26205704 0.13476186 0.08954781 0.16736226]] probs:[[0.1678354  0.15452565 0.18659131 0.16428874 0.15702602 0.1697329 ]] entropy:[1.7898266]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:772 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.15734152 0.07133674 0.25491324 0.12154119 0.10371555 0.17130633]] probs:[[0.16815874 0.15430075 0.18539341 0.16224508 0.15937857 0.17052351]] entropy:[1.7900023]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:773 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.14349522 0.07931072 0.22400793 0.13775598 0.1145229  0.1598487 ]] probs:[[0.16655888 0.15620424 0.1805236  0.1656057  0.16180252 0.16930509]] entropy:[1.7907673]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:774 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.1468717  0.0685365  0.23148057 0.13774075 0.10097883 0.15595211]] probs:[[0.1675569  0.15493222 0.18235071 0.1660339  0.160041   0.16908531]] entropy:[1.790466]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-1.1518154] v_loss:[[0.00091545]]
DEBUG:chainerrl.agents.a3c:grad norm:17.38570221237228
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:775 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.25015372 0.174786   0.33148435 0.01352456 0.19478524 0.00302193]] probs:[[0.18087795 0.16774666 0.19620365 0.14276406 0.17113523 0.14127252]] entropy:[1.7847888]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:776 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.25181934 0.17523216 0.33140534 0.01331283 0.19226462 0.00237012]] probs:[[0.18121429 0.16785376 0.19622585 0.14276128 0.1707372  0.14120759]] entropy:[1.7847555]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:777 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.25987968  0.21454117  0.2933633   0.00481674  0.12475244 -0.00117664]] probs:[[0.1848964  0.17670067 0.19119221 0.14327025 0.16152637 0.14241414]] entropy:[1.7851169]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:778 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.25512755  0.2153818   0.29563943  0.00264889  0.13749656 -0.02598999]] probs:[[0.18439251 0.17720743 0.19201598 0.14324953 0.16392937 0.13920522]] entropy:[1.7845366]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:779 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.25383464  0.22828896  0.25323573  0.00089861  0.09918452 -0.06587421]] probs:[[0.18748458 0.18275581 0.18737233 0.14558506 0.16062082 0.13618138]] entropy:[1.7840416]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:780 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.24700476  0.22411524  0.24103436 -0.0004095   0.09745967 -0.10939541]] probs:[[0.18819891 0.18394005 0.18707864 0.14694893 0.16205801 0.13177544]] entropy:[1.7831401]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:781 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.18688734  0.2211012   0.20010808 -0.0097428   0.0842608  -0.12280498]] probs:[[0.18162845 0.1879502  0.18404566 0.14920676 0.16391313 0.13325584]] entropy:[1.7843572]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:782 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.19440694  0.24527913  0.1762471  -0.00868587  0.05532948 -0.10460249]] probs:[[0.1830795  0.19263412 0.1797848  0.14942993 0.15930855 0.13576305]] entropy:[1.7843987]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-1.5152379] v_loss:[[0.00011113]]
DEBUG:chainerrl.agents.a3c:grad norm:2.1453201635032784
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:783 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.2281313   0.21766278  0.10178253  0.01391458  0.04537646 -0.03166863]] probs:[[0.18931344 0.18734196 0.16684334 0.15280879 0.15769286 0.14599963]] entropy:[1.786877]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:784 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.23812181  0.22450697  0.11794712  0.02159513  0.04724347 -0.019633  ]] probs:[[0.18948108 0.18691881 0.1680253  0.15259121 0.15655555 0.14642808]] entropy:[1.7868692]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:785 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.2378886   0.22300006  0.11556795  0.02280642  0.04749122 -0.02003454]] probs:[[0.18954304 0.18674193 0.16771993 0.15286176 0.15668207 0.14645131]] entropy:[1.7869179]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:786 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.2421942   0.22629358  0.12419051  0.02518026  0.04614773 -0.01623536]] probs:[[0.18967867 0.1866865  0.16856606 0.15267591 0.15591094 0.14648189]] entropy:[1.7868373]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:787 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.24055418  0.2253909   0.12088212  0.02454318  0.04703595 -0.01734032]] probs:[[0.18958712 0.18673404 0.16820383 0.15275535 0.15623018 0.14648955]] entropy:[1.7868768]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:788 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.24164557  0.22884594  0.12491465  0.02715905  0.04762208 -0.01452544]] probs:[[0.18933262 0.18692467 0.16847281 0.15278304 0.15594165 0.14654526]] entropy:[1.7868755]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:789 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.23609504  0.22803195  0.11819464  0.02800871  0.0494641  -0.01815154]] probs:[[0.18874542 0.18722968 0.16775402 0.15328714 0.1566115  0.14637218]] entropy:[1.7869841]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:790 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.21285626  0.22601877  0.13489586 -0.0087871   0.0447095  -0.01451457]] probs:[[0.18582848 0.18829061 0.17189154 0.14888597 0.15706776 0.14803568]] entropy:[1.7869227]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-1.6249589] v_loss:[[0.00048892]]
DEBUG:chainerrl.agents.a3c:grad norm:4.275223748759739
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:791 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.19674136 0.16442668 0.10774026 0.04271905 0.07603902 0.0086712 ]] probs:[[0.1833129  0.17748389 0.16770281 0.15714552 0.16246982 0.1518851 ]] entropy:[1.7896013]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:792 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.19723131 0.16312626 0.10510772 0.04093547 0.07710742 0.00981631]] probs:[[0.18349707 0.17734441 0.16734794 0.15694617 0.16272713 0.15213734]] entropy:[1.7896119]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:793 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.20428693 0.1683036  0.11496663 0.03997158 0.07539143 0.01672323]] probs:[[0.18396808 0.17746598 0.16824847 0.1560922  0.16172005 0.15250517]] entropy:[1.7895045]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:794 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.20792435 0.17036985 0.12509665 0.04439187 0.07753608 0.02059187]] probs:[[0.18383323 0.17705749 0.16922028 0.15609995 0.16136046 0.15242863]] entropy:[1.7895136]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:795 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.19272992 0.16445847 0.10848115 0.03888771 0.07868602 0.00767027]] probs:[[0.18274827 0.17765407 0.16798268 0.1566897  0.16305144 0.15187381]] entropy:[1.7896265]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:796 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.19910869 0.1609728  0.10089071 0.03969228 0.08020543 0.01291053]] probs:[[0.18383443 0.17695574 0.16663696 0.15674482 0.16322543 0.15260263]] entropy:[1.789645]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:797 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.20199585 0.1624446  0.10229234 0.03754507 0.08079614 0.01623171]] probs:[[0.18412772 0.17698738 0.16665505 0.1562065  0.16311084 0.15291245]] entropy:[1.7896045]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:798 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.2070158  0.16663155 0.11067953 0.03695576 0.08024643 0.02189101]] probs:[[0.18436135 0.17706439 0.16742934 0.15552984 0.16241069 0.15320438]] entropy:[1.7895377]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-1.4138248] v_loss:[[3.054e-05]]
DEBUG:chainerrl.agents.a3c:grad norm:0.09898799387547053
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:799 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.14310347 0.16100308 0.14611056 0.01246906 0.14512378 0.0256881 ]] probs:[[0.17271608 0.17583548 0.17323624 0.15156502 0.17306538 0.15358186]] entropy:[1.7899209]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:800 r:0.3 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.14365897 0.16050678 0.14485669 0.01142342 0.14579654 0.02696699]] probs:[[0.17282133 0.17575766 0.17302845 0.15141475 0.17319115 0.15378666]] entropy:[1.7899263]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:801 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.1434     0.16070771 0.14524892 0.01183876 0.14545983 0.02645368]] probs:[[0.17277932 0.17579578 0.17309909 0.15148006 0.1731356  0.15371019]] entropy:[1.7899256]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:802 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.14370468 0.16044225 0.14497977 0.01152186 0.14572309 0.02701265]] probs:[[0.17282455 0.17574157 0.17304507 0.15142556 0.17317374 0.15378952]] entropy:[1.7899287]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:803 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.14366758 0.16062343 0.14499813 0.01110392 0.14587238 0.02694046]] probs:[[0.17282158 0.1757769  0.17305167 0.1513653  0.17320304 0.15378147]] entropy:[1.789919]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:804 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.14368227 0.16058356 0.14497468 0.01119421 0.14586322 0.0269594 ]] probs:[[0.17282301 0.17576876 0.17304651 0.15137799 0.17320035 0.1537834 ]] entropy:[1.7899208]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:805 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.14351273 0.16168909 0.14592049 0.00823103 0.14632905 0.02634791]] probs:[[0.17281657 0.17598648 0.17323318 0.15095007 0.17330396 0.15370972]] entropy:[1.7898505]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:806 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.14352185 0.1615652  0.14578713 0.00863461 0.1462764  0.02642963]] probs:[[0.17281452 0.17596097 0.17320643 0.15100783 0.1732912  0.15371907]] entropy:[1.7898601]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-0.8911315] v_loss:[[0.0220117]]
DEBUG:chainerrl.agents.a3c:grad norm:32.12254075770325
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:807 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.09790782  0.11434771  0.10704352 -0.00677078  0.09786288  0.22415784]] probs:[[0.16499232 0.16772719 0.16650654 0.14859438 0.1649849  0.18719463]] entropy:[1.7895066]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:808 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.09809579  0.11326274  0.10610797 -0.00386962  0.09741423  0.22468212]] probs:[[0.16499871 0.1675203  0.16632602 0.14900386 0.16488628 0.18726486]] entropy:[1.7895447]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:809 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.0981009   0.11323942  0.10608353 -0.00380926  0.09740468  0.22469056]] probs:[[0.16499923 0.16751608 0.16632164 0.14901257 0.1648844  0.18726608]] entropy:[1.7895455]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:810 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.09810133  0.11323662  0.10609347 -0.0038043   0.09739528  0.22469175]] probs:[[0.16499919 0.1675155  0.16632317 0.14901322 0.16488273 0.18726619]] entropy:[1.7895455]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:811 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.09814198  0.11291617  0.10659103 -0.00320429  0.09674075  0.22481035]] probs:[[0.16499938 0.16745521 0.16639937 0.14909674 0.16476834 0.18728098]] entropy:[1.7895523]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:812 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.09813014  0.11319429  0.10595492 -0.00351446  0.09693361  0.22480768]] probs:[[0.16500998 0.16751453 0.16630621 0.14906186 0.16481265 0.18729472]] entropy:[1.789547]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:813 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.09918331  0.11307406  0.10677544 -0.0033059   0.0965186   0.22639525]] probs:[[0.16509297 0.16740224 0.16635114 0.14901091 0.16465363 0.18748909]] entropy:[1.7895181]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:814 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.09566364  0.11219893  0.09999625 -0.00206931  0.09092944  0.22293979]] probs:[[0.1650461  0.16779788 0.16576274 0.14967883 0.16426659 0.1874479 ]] entropy:[1.7895863]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-1.5516158] v_loss:[[0.00018346]]
DEBUG:chainerrl.agents.a3c:grad norm:0.7473214066756166
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:815 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.07833242 0.11867778 0.07240189 0.00556285 0.11060131 0.19408083]] probs:[[0.16338317 0.17010972 0.1624171  0.15191615 0.16874136 0.18343247]] entropy:[1.7901349]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:816 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.06886559 0.11985181 0.06692128 0.00981885 0.10564152 0.19171855]] probs:[[0.16230586 0.17079581 0.1619906  0.15299968 0.16838592 0.18352218]] entropy:[1.790175]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:817 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.0705452  0.12195532 0.06811828 0.00986961 0.10968251 0.19287753]] probs:[[0.1622981  0.17086005 0.16190468 0.15274337 0.16877595 0.18341789]] entropy:[1.790154]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:818 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.07094859 0.12227519 0.06915291 0.01049198 0.11075898 0.19344045]] probs:[[0.1622552  0.17080064 0.1619641  0.15273644 0.16884494 0.18339866]] entropy:[1.7901564]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:819 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.07249539 0.12344477 0.07045801 0.00973698 0.11449774 0.19442257]] probs:[[0.16228567 0.17076828 0.16195537 0.15241389 0.16924722 0.18332955]] entropy:[1.7901301]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:820 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.07207067 0.12319611 0.07035483 0.00987242 0.1138742  0.19425084]] probs:[[0.16225642 0.17076756 0.16197826 0.1524718  0.16918308 0.18334289]] entropy:[1.7901349]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:821 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.07339206 0.12406161 0.07189845 0.0097518  0.11710276 0.19523662]] probs:[[0.16225643 0.17068976 0.16201428 0.15225211 0.16950607 0.18328139]] entropy:[1.7901186]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:822 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.07329802 0.12400231 0.07189927 0.0097087  0.1169868  0.19516963]] probs:[[0.16225152 0.1706905  0.16202472 0.15225525 0.1694972  0.18328078]] entropy:[1.7901194]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-1.3880568] v_loss:[[0.00012295]]
DEBUG:chainerrl.agents.a3c:grad norm:0.5827461968762147
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:823 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.07392529 0.12030756 0.07046668 0.03052592 0.12595734 0.16850352]] probs:[[0.16249183 0.17020608 0.1619308  0.15559062 0.17117043 0.17861024]] entropy:[1.7907467]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:824 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.07433853 0.12324643 0.06978242 0.02996854 0.12496263 0.16857313]] probs:[[0.16252439 0.1706707  0.1617856  0.15547083 0.17096387 0.17858465]] entropy:[1.7907319]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:825 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.07418366 0.12568466 0.07147515 0.02878686 0.12379713 0.1685863 ]] probs:[[0.1624529  0.17103857 0.16201349 0.15524295 0.17071603 0.17853609]] entropy:[1.7907208]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:826 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.07020862 0.12819543 0.08182687 0.02497865 0.11795421 0.16951908]] probs:[[0.1617998  0.1714594  0.1636906  0.15464464 0.16971241 0.17869318]] entropy:[1.7906967]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:827 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.05248091 0.13113195 0.10064973 0.01790597 0.10070838 0.16679929]] probs:[[0.15954792 0.17260322 0.16742127 0.15412584 0.16743109 0.17887062]] entropy:[1.7905768]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:828 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.04473183 0.13516085 0.10708099 0.01507484 0.09193027 0.16461992]] probs:[[0.15859279 0.17360263 0.16879569 0.15395847 0.16625759 0.17879286]] entropy:[1.7904762]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:829 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.04320259 0.1347041  0.10908483 0.01477002 0.09093805 0.16425388]] probs:[[0.15839167 0.1735685  0.16917829 0.15395162 0.16613594 0.17877395]] entropy:[1.7904625]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:830 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.04285553 0.13452297 0.10943322 0.0147159  0.0907775  0.16409071]] probs:[[0.15835123 0.17355299 0.16925275 0.15395738 0.1661245  0.17876117]] entropy:[1.7904614]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-0.657596] v_loss:[[0.00708557]]
DEBUG:chainerrl.agents.a3c:grad norm:17.848793387573924
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:831 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.10950463 0.12757389 0.10617462 0.01879718 0.09205976 0.10577783]] probs:[[0.16928549 0.17237216 0.16872272 0.15460588 0.16635795 0.16865578]] entropy:[1.7911702]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:832 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.10950057 0.12758105 0.10618521 0.01879303 0.0920653  0.10577369]] probs:[[0.1692845  0.17237307 0.16872418 0.15460496 0.16635856 0.16865477]] entropy:[1.7911702]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:833 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.10952023 0.12758526 0.10618752 0.01879429 0.0920734  0.10579462]] probs:[[0.1692862  0.17237215 0.16872296 0.15460366 0.1663583  0.16865668]] entropy:[1.7911701]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:834 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.13652411 0.11837353 0.1229154  0.02741466 0.0892576  0.10680369]] probs:[[0.17272115 0.16961443 0.17038655 0.15486735 0.16474716 0.16766334]] entropy:[1.7911416]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:835 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.13100092 0.11817673 0.12152171 0.02665378 0.08866863 0.10607077]] probs:[[0.17203812 0.16984595 0.17041504 0.1549913  0.16490735 0.16780221]] entropy:[1.7911698]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:836 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.14601235 0.10995291 0.12968628 0.02953551 0.1019619  0.10958873]] probs:[[0.17362384 0.1674746  0.17081226 0.15453403 0.16614163 0.1674136 ]] entropy:[1.7911079]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:837 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.14371352 0.11355631 0.12791067 0.02710184 0.10081726 0.10928694]] probs:[[0.17334881 0.16819914 0.17063095 0.15426844 0.16607004 0.16748257]] entropy:[1.7910974]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:838 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.13101365 0.11848272 0.12226741 0.02307886 0.09778771 0.10988663]] probs:[[0.17173456 0.169596   0.17023908 0.15416373 0.16612227 0.16814438]] entropy:[1.791131]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-1.604188] v_loss:[[0.00065954]]
DEBUG:chainerrl.agents.a3c:grad norm:2.1761601566290274
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:839 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.13948728 0.12406383 0.08523885 0.04369394 0.08355648 0.12810025]] probs:[[0.17316541 0.17051509 0.16402172 0.15734705 0.163746   0.17120475]] entropy:[1.7912166]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:840 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.13827562 0.12223836 0.08571299 0.04649932 0.07721741 0.12542503]] probs:[[0.17321432 0.17045859 0.16434486 0.15802503 0.16295457 0.17100266]] entropy:[1.7912478]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:841 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.13616128 0.12137423 0.0853957  0.04794489 0.0731863  0.1238903 ]] probs:[[0.17306557 0.17052527 0.16449907 0.15845239 0.16250286 0.17095487]] entropy:[1.7912667]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:842 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.13824385 0.1260105  0.08036236 0.03926264 0.08714249 0.12915301]] probs:[[0.17305541 0.17095126 0.16332309 0.15674663 0.16443421 0.17148933]] entropy:[1.7911649]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:843 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.14755882 0.11889336 0.08819601 0.04619723 0.08155439 0.11946743]] probs:[[0.17463775 0.16970274 0.16457246 0.15780376 0.16348305 0.16980019]] entropy:[1.7912313]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:844 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.14781359 0.13123389 0.09197458 0.03732044 0.09982202 0.1290259 ]] probs:[[0.17363612 0.17078103 0.16420619 0.15547249 0.16549985 0.17040437]] entropy:[1.7911167]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:845 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.15476447 0.12347291 0.09495541 0.04356933 0.09390613 0.12174364]] probs:[[0.17499723 0.16960607 0.16483764 0.15658124 0.16466478 0.16931303]] entropy:[1.7911744]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:846 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.15022404 0.12994763 0.09515078 0.03194032 0.10067224 0.1307269 ]] probs:[[0.17399974 0.17050718 0.1646761  0.15458901 0.16558787 0.1706401 ]] entropy:[1.7910457]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-1.7620777] v_loss:[[0.00139613]]
DEBUG:chainerrl.agents.a3c:grad norm:6.212914302935652
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:847 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.11575611  0.0568914  -0.00937326  0.09919406  0.12950705  0.13617675]] probs:[[0.17113711 0.16135393 0.15100843 0.16832608 0.17350666 0.17466776]] entropy:[1.7905189]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:848 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.09635119  0.06114699 -0.02865431  0.09135799  0.13766816  0.1427023 ]] probs:[[0.16856174 0.1627309  0.1487544  0.16772218 0.17567207 0.17655866]] entropy:[1.7901726]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:849 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.08104445  0.06652982 -0.04343757  0.0866673   0.144986    0.15296243]] probs:[[0.16625711 0.16386138 0.1467974  0.16719459 0.17723508 0.17865445]] entropy:[1.7897506]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:850 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.07717984  0.06616607 -0.04765506  0.08824082  0.14574765  0.15624803]] probs:[[0.1656709  0.16385625 0.1462282  0.16751356 0.1774291  0.17930199]] entropy:[1.7896152]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:851 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.07497074  0.06438886 -0.05057324  0.09061994  0.14507088  0.15573585]] probs:[[0.16545348 0.1637119  0.14593278 0.16806306 0.17746799 0.1793708 ]] entropy:[1.7895612]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:852 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.07491071  0.06389321 -0.05098699  0.09300032  0.14437568  0.15613246]] probs:[[0.16541097 0.16359855 0.14584368 0.16843042 0.17730974 0.17940663]] entropy:[1.7895505]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:853 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.07687418  0.06490683 -0.04856737  0.09153646  0.14487648  0.15802012]] probs:[[0.16556624 0.16359666 0.14604719 0.1680117  0.17721677 0.17956142]] entropy:[1.7895769]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:854 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.07659284  0.0646732  -0.04888678  0.09168321  0.14485763  0.15784898]] probs:[[0.16554299 0.16358149 0.14602113 0.16806003 0.1772384  0.17955598]] entropy:[1.7895716]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-1.6262672] v_loss:[[0.00079318]]
DEBUG:chainerrl.agents.a3c:grad norm:2.4711541494728713
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:855 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.07463155  0.10485337 -0.00466826  0.11756449  0.0113558   0.12090096]] probs:[[0.16710238 0.1722296  0.15436299 0.17443281 0.15685642 0.17501576]] entropy:[1.7905227]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:856 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.09392849  0.13019106 -0.02038459  0.10560247  0.03725257  0.13896209]] probs:[[0.16858822 0.17481387 0.15037711 0.17056786 0.15929906 0.1763539 ]] entropy:[1.7902422]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:857 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.09291366  0.14481421 -0.03162348  0.10204709  0.04819244  0.13969108]] probs:[[0.16808124 0.1770351  0.14839984 0.16962342 0.16073003 0.17613043]] entropy:[1.7900009]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:858 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.11372263  0.13987944 -0.03251334  0.11103852  0.04543027  0.14577143]] probs:[[0.17081374 0.17534062 0.14757518 0.17035587 0.15953788 0.17637677]] entropy:[1.789876]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:859 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.11951608  0.1386064  -0.03222091  0.11238894  0.04252669  0.14350371]] probs:[[0.17177516 0.17508592 0.14759167 0.17055525 0.15904656 0.17594546]] entropy:[1.7898614]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:860 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.13983183  0.13436806 -0.0214978   0.11840422  0.03188988  0.13489416]] probs:[[0.17491333 0.17396024 0.14885326 0.17120522 0.15701614 0.17405179]] entropy:[1.7899061]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:861 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.1657421   0.12870613 -0.00935716  0.12736997  0.01725062  0.12499845]] probs:[[0.17897242 0.17246525 0.15022475 0.17223497 0.15427555 0.17182699]] entropy:[1.7897336]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:862 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.17248002  0.12643854 -0.00710575  0.13198763  0.01256644  0.12326305]] probs:[[0.1800139  0.1719137  0.15042254 0.17287031 0.15341099 0.17136866]] entropy:[1.7896171]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-1.4782362] v_loss:[[4.5107412e-05]]
DEBUG:chainerrl.agents.a3c:grad norm:2.7473033289053346
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:863 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.17649673 0.1387265  0.02675596 0.1182921  0.05252211 0.13650869]] probs:[[0.17820466 0.17159936 0.15342194 0.16812842 0.1574264  0.1712192 ]] entropy:[1.7904269]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:864 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.17703038 0.13854757 0.02721401 0.11831484 0.05247758 0.13621633]] probs:[[0.17828526 0.17155468 0.15347971 0.16811854 0.15740657 0.1711552 ]] entropy:[1.7904282]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:865 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.17680626 0.13927446 0.02702389 0.1180779  0.05264951 0.13673298]] probs:[[0.1782219  0.17165688 0.1534304  0.16805664 0.15741296 0.17122117]] entropy:[1.7904246]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:866 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.17549303 0.13998643 0.0257596  0.11733805 0.05151212 0.13766706]] probs:[[0.17806792 0.17185627 0.15330535 0.16800775 0.15730461 0.17145813]] entropy:[1.7904061]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:867 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.17684159 0.13963366 0.02706164 0.11794082 0.05281025 0.13720348]] probs:[[0.1782003  0.17169167 0.15341218 0.1680073  0.15741362 0.17127495]] entropy:[1.7904224]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:868 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.17608275 0.13977578 0.02621452 0.11763222 0.05198874 0.13741761]] probs:[[0.17813371 0.17178221 0.15334132 0.16802016 0.15734494 0.17137761]] entropy:[1.7904114]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:869 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.17774962 0.13974646 0.02691448 0.11778889 0.05349448 0.13762894]] probs:[[0.17830624 0.17165717 0.15334149 0.16792908 0.15747195 0.17129408]] entropy:[1.7904139]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:870 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.17610464 0.1398294  0.02610792 0.11767744 0.05190997 0.13766545]] probs:[[0.17813149 0.17178553 0.1533197  0.16802198 0.15732715 0.1714142 ]] entropy:[1.7904077]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-1.5172755] v_loss:[[0.00011759]]
DEBUG:chainerrl.agents.a3c:grad norm:0.20639025529636293
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:871 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.17578539 0.12417478 0.05316762 0.08174767 0.08108176 0.14309749]] probs:[[0.17787164 0.16892444 0.15734556 0.16190737 0.1617996  0.17215139]] entropy:[1.7908806]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:872 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.17792606 0.12602495 0.05337149 0.08119944 0.08433195 0.1432118 ]] probs:[[0.17804205 0.16903716 0.15719156 0.16162731 0.16213441 0.17196749]] entropy:[1.7908659]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:873 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.17491704 0.12291016 0.05169204 0.08134635 0.08135625 0.14484099]] probs:[[0.17777415 0.16876496 0.15716386 0.16189423 0.16189584 0.172507  ]] entropy:[1.7908689]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:874 r:0.1 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.17372087 0.12465329 0.05175775 0.08125822 0.08047958 0.14299078]] probs:[[0.1776296  0.16912413 0.15723436 0.16194195 0.1618159  0.17225406]] entropy:[1.7908847]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:875 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.1703438  0.12538667 0.04839736 0.07998563 0.07725451 0.14517002]] probs:[[0.17727068 0.16947758 0.15691926 0.16195518 0.16151346 0.17286381]] entropy:[1.7908531]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:876 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.17391072 0.12454184 0.05159263 0.08119604 0.08016936 0.14387569]] probs:[[0.17764893 0.16909157 0.15719564 0.16191874 0.16175258 0.17239258]] entropy:[1.790875]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:877 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.17288548 0.12528096 0.05096591 0.08065018 0.079729   0.14405161]] probs:[[0.17751744 0.16926481 0.15714191 0.16187648 0.16172743 0.17247202]] entropy:[1.7908729]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:878 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.17181551 0.12517872 0.04988462 0.08041021 0.07855297 0.1445261 ]] probs:[[0.17742056 0.16933621 0.15705438 0.16192247 0.16162202 0.17264432]] entropy:[1.7908646]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-1.0164764] v_loss:[[0.00643327]]
DEBUG:chainerrl.agents.a3c:grad norm:9.483239459575756
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:879 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.24742663 0.06709474 0.00203752 0.11839337 0.11695044 0.2555516 ]] probs:[[0.18579856 0.1551405  0.1453688  0.16330667 0.1630712  0.18731432]] entropy:[1.787568]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:880 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.24749793 0.06708469 0.00204899 0.1184063  0.11700037 0.2554882 ]] probs:[[0.18580961 0.15513712 0.14536875 0.16330685 0.16307741 0.18730024]] entropy:[1.7875682]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:881 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.24733518 0.06705073 0.0017809  0.11814565 0.1167227  0.25556627]] probs:[[0.18580683 0.15515476 0.14535126 0.16328841 0.16305622 0.18734252]] entropy:[1.7875615]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:882 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.2475958  0.0667102  0.0020819  0.11834555 0.11687867 0.2552895 ]] probs:[[0.18584678 0.15509486 0.14538838 0.1633136  0.16307421 0.18728213]] entropy:[1.787566]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:883 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.2475298  0.06700838 0.00197961 0.11836863 0.11695388 0.2553994 ]] probs:[[0.18582416 0.15513247 0.14536542 0.16330828 0.1630774  0.1872923 ]] entropy:[1.787567]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:884 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.24748443 0.0631199  0.00457807 0.11981394 0.11567974 0.2547042 ]] probs:[[0.1858778  0.15458202 0.14579232 0.1635991  0.16292416 0.18722466]] entropy:[1.7875884]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:885 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.2484344  0.06213669 0.00497433 0.1198641  0.11585727 0.25362653]] probs:[[0.18606971 0.15444277 0.14586204 0.16362073 0.16296645 0.18703832]] entropy:[1.7875891]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:886 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.24746336 0.06313456 0.0045538  0.1197926  0.11566767 0.25472224]] probs:[[0.18587522 0.15458542 0.14578983 0.1635968  0.16292337 0.18722938]] entropy:[1.7875879]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-1.7841905] v_loss:[[0.00177373]]
DEBUG:chainerrl.agents.a3c:grad norm:8.128150699771073
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:887 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.2514569   0.08998332 -0.00742491  0.1073799   0.13242356  0.23642665]] probs:[[0.18651551 0.158704   0.14397398 0.16148905 0.1655844  0.18373309]] entropy:[1.7878783]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:888 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.25061092  0.09049156 -0.00753253  0.1074459   0.1321814   0.23730977]] probs:[[0.18635027 0.15877827 0.14395268 0.1614932  0.16553763 0.183888  ]] entropy:[1.7878817]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:889 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.25145546  0.08997715 -0.00743033  0.10737168  0.13242002  0.23642458]] probs:[[0.18651603 0.1587037  0.14397381 0.16148843 0.16558452 0.18373351]] entropy:[1.7878779]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:890 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.2514535   0.08997581 -0.00743292  0.10736778  0.13241796  0.2364253 ]] probs:[[0.186516   0.15870376 0.14397371 0.16148809 0.16558447 0.18373397]] entropy:[1.787878]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:891 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.25060573  0.09048792 -0.00754283  0.1074319   0.13217543  0.23731083]] probs:[[0.18635044 0.15877865 0.14395207 0.16149193 0.16553764 0.18388933]] entropy:[1.7878815]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:892 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.25060442  0.09048666 -0.00754762  0.10742544  0.13217339  0.23731048]] probs:[[0.18635064 0.15877885 0.14395174 0.16149127 0.16553771 0.18388972]] entropy:[1.7878815]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:893 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.25144792  0.08997198 -0.00744507  0.1073517   0.13241148  0.23642619]] probs:[[0.18651624 0.15870424 0.14397293 0.16148658 0.16558453 0.18373539]] entropy:[1.7878776]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:894 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.251446    0.08997055 -0.00744757  0.107348    0.13240948  0.23642687]] probs:[[0.18651623 0.15870431 0.14397284 0.16148628 0.1655845  0.18373585]] entropy:[1.7878777]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-1.4554241] v_loss:[[8.025611e-06]]
DEBUG:chainerrl.agents.a3c:grad norm:0.2017120593521861
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:895 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.2518866  0.07994292 0.01025708 0.11139155 0.13734314 0.21325366]] probs:[[0.18690965 0.15738289 0.14678895 0.16241102 0.166681   0.17982648]] entropy:[1.788517]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:896 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.25369042 0.07892762 0.01035075 0.11147568 0.137729   0.21128596]] probs:[[0.18726283 0.1572364  0.14681503 0.16243833 0.16675936 0.17948806]] entropy:[1.7884971]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:897 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.2536873  0.0789253  0.01034942 0.11147315 0.13772526 0.211289  ]] probs:[[0.18726255 0.15723628 0.14681508 0.16243817 0.16675898 0.1794889 ]] entropy:[1.7884967]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:898 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.2518226  0.07998849 0.01023456 0.11135616 0.13733321 0.21332602]] probs:[[0.18689816 0.15739046 0.146786   0.16240568 0.16667977 0.17983995]] entropy:[1.7885171]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:899 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.25368568 0.07892299 0.01034167 0.11146068 0.13772158 0.21128999]] probs:[[0.18726306 0.15723659 0.14681457 0.16243684 0.1667591  0.17948985]] entropy:[1.7884967]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:900 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.25181922 0.0799851  0.01022809 0.11134539 0.13732778 0.2133289 ]] probs:[[0.18689832 0.1573906  0.14678568 0.16240461 0.16667958 0.17984122]] entropy:[1.7885169]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:901 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.2536823  0.07891978 0.01033525 0.11144986 0.1377161  0.21129279]] probs:[[0.1872632  0.15723675 0.14681424 0.16243577 0.16675888 0.1794911 ]] entropy:[1.7884965]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:902 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.25181568 0.07998183 0.01022164 0.11133451 0.13732247 0.21333177]] probs:[[0.18689844 0.15739074 0.14678535 0.16240352 0.16667938 0.17984249]] entropy:[1.7885168]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-1.4543382] v_loss:[[6.7230094e-06]]
DEBUG:chainerrl.agents.a3c:grad norm:0.2527706772691934
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:903 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.2039734  0.07975634 0.05083364 0.14596362 0.12974209 0.22245039]] probs:[[0.17755859 0.15681763 0.152347   0.16755152 0.1648555  0.18086983]] entropy:[1.7898798]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:904 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.20279509 0.08017893 0.05101903 0.14603628 0.12946303 0.22347042]] probs:[[0.17734306 0.15687822 0.15236972 0.16755761 0.16480352 0.18104784]] entropy:[1.7898836]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:905 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.2027936  0.08017805 0.05101532 0.14603065 0.12946126 0.22347166]] probs:[[0.17734316 0.1568784  0.15236947 0.167557   0.16480356 0.18104844]] entropy:[1.7898835]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:906 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.20395164 0.07978448 0.05082875 0.14596592 0.12979825 0.222499  ]] probs:[[0.17755148 0.15681918 0.15234348 0.16754884 0.16486174 0.18087532]] entropy:[1.7898797]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:907 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.20279087 0.0801752  0.05100997 0.146022   0.12945533 0.22347327]] probs:[[0.17734335 0.15687856 0.15236923 0.1675562  0.16480322 0.18104942]] entropy:[1.7898835]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:908 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.20278922 0.08017445 0.05100669 0.14601725 0.12945418 0.22347513]] probs:[[0.17734334 0.15687868 0.15236898 0.16755567 0.1648033  0.18105005]] entropy:[1.7898834]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:909 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.20394714 0.07978072 0.05082002 0.14595251 0.12979114 0.22250247]] probs:[[0.17755166 0.15681945 0.15234299 0.16754751 0.16486149 0.18087694]] entropy:[1.7898796]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:910 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.20278633 0.08017144 0.05100131 0.14600879 0.12944831 0.22347668]] probs:[[0.17734352 0.15687883 0.15236875 0.1675549  0.16480295 0.18105103]] entropy:[1.7898831]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-1.4464713] v_loss:[[3.3421213e-06]]
DEBUG:chainerrl.agents.a3c:grad norm:0.10873470046735668
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:911 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.23621128 0.06400844 0.04924718 0.15671937 0.09813916 0.20204277]] probs:[[0.18408369 0.15496318 0.15269254 0.17001702 0.1603435  0.17790006]] entropy:[1.7893304]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:912 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.23703948 0.06342033 0.04932562 0.15701492 0.09817051 0.20108813]] probs:[[0.18424375 0.15487844 0.15271078 0.17007425 0.16035509 0.1777376 ]] entropy:[1.7893198]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:913 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.23619805 0.06401709 0.04923834 0.15670441 0.09814101 0.2020587 ]] probs:[[0.1840816  0.15496482 0.15269147 0.1700148  0.1603441  0.17790323]] entropy:[1.7893307]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:914 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.236197   0.06401522 0.04923528 0.15669963 0.09813838 0.20205916]] probs:[[0.18408178 0.15496485 0.15269133 0.17001434 0.16034399 0.17790368]] entropy:[1.7893305]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:915 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.23703486 0.06341667 0.04931752 0.15700231 0.09816357 0.2010916 ]] probs:[[0.1842439  0.1548787  0.15271036 0.17007302 0.16035485 0.17773917]] entropy:[1.7893196]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:916 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.23619348 0.06401331 0.04923028 0.15669172 0.09813412 0.20206213]] probs:[[0.18408173 0.15496506 0.15269105 0.17001355 0.16034384 0.17790478]] entropy:[1.7893304]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:917 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.23619248 0.06401155 0.0492271  0.15668696 0.09813153 0.20206259]] probs:[[0.18408193 0.1549651  0.15269089 0.17001307 0.16034375 0.17790523]] entropy:[1.7893305]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:918 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.2370304  0.06341292 0.04930937 0.15698957 0.09815674 0.20109501]] probs:[[0.18424405 0.15487893 0.15270993 0.17007175 0.16035458 0.17774071]] entropy:[1.7893193]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-1.5636276] v_loss:[[0.00017188]]
DEBUG:chainerrl.agents.a3c:grad norm:1.1316925084893903
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:919 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.20162676 0.12592895 0.01889035 0.18995039 0.13491172 0.18465409]] probs:[[0.17645685 0.1635925  0.14698638 0.17440847 0.16506863 0.17348719]] entropy:[1.7899158]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:920 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.19440964 0.12856857 0.01227948 0.19852342 0.14192839 0.18443651]] probs:[[0.17504416 0.1638903  0.14589806 0.17576575 0.16609453 0.1733071 ]] entropy:[1.7898021]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:921 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.1932375  0.12899075 0.01130446 0.19989093 0.14297764 0.18436289]] probs:[[0.17481743 0.16393916 0.1457378  0.17598444 0.16624828 0.17327285]] entropy:[1.7897825]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:922 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.19489044 0.12806442 0.01005356 0.19921428 0.14401771 0.18185021]] probs:[[0.1751812  0.16385713 0.14561759 0.17594029 0.16649215 0.17291163]] entropy:[1.7897639]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:923 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.19485968 0.12807548 0.01001919 0.19922896 0.14403011 0.1818501 ]] probs:[[0.1751765  0.16385958 0.14561316 0.17594357 0.16649486 0.17291228]] entropy:[1.7897635]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:924 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.19485562 0.1280766  0.01001551 0.19923095 0.14403176 0.18185066]] probs:[[0.17517585 0.16385981 0.14561267 0.17594399 0.1664952  0.17291245]] entropy:[1.7897632]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:925 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.19304813 0.1290574  0.01114228 0.20010026 0.14314163 0.18435058]] probs:[[0.17478152 0.16394746 0.14571182 0.17601846 0.16627286 0.17326793]] entropy:[1.7897794]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:926 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.19304849 0.1290564  0.01114623 0.2001028  0.14314193 0.18435246]] probs:[[0.17478134 0.16394706 0.1457122  0.17601866 0.16627268 0.17326802]] entropy:[1.7897794]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-1.4944593] v_loss:[[4.7328143e-05]]
DEBUG:chainerrl.agents.a3c:grad norm:0.11827776018325675
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:927 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.13779593 0.1773357  0.23195016 0.11529078 0.07129373 0.24618791]] probs:[[0.16215464 0.16869465 0.17816405 0.15854609 0.15172176 0.18071884]] entropy:[1.7898293]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:928 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.13772482 0.17728195 0.2317933  0.11529202 0.0711474  0.24610664]] probs:[[0.16215694 0.16869996 0.17815128 0.1585598  0.15171249 0.18071955]] entropy:[1.78983]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:929 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.13772023 0.17727822 0.23178253 0.1152921  0.07113703 0.24610122]] probs:[[0.16215713 0.16870031 0.17815039 0.15856074 0.15171179 0.18071963]] entropy:[1.78983]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:930 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.13771981 0.17727777 0.231782   0.11529209 0.07113605 0.24610105]] probs:[[0.16215713 0.16870031 0.17815037 0.1585608  0.1517117  0.18071967]] entropy:[1.78983]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:931 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.13829318 0.17766595 0.2329729  0.11533208 0.07226179 0.24669784]] probs:[[0.16214378 0.16865519 0.17824575 0.15846321 0.15178303 0.18070903]] entropy:[1.7898264]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:932 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.13830069 0.17767176 0.23299201 0.11533177 0.07227815 0.2467077 ]] probs:[[0.16214342 0.16865452 0.17824742 0.15846162 0.15178403 0.18070905]] entropy:[1.7898263]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:933 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.13830057 0.17767172 0.23299225 0.11533152 0.07227822 0.24670796]] probs:[[0.16214338 0.1686545  0.17824744 0.15846156 0.15178403 0.18070908]] entropy:[1.789826]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:934 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.13775119 0.17730215 0.23186234 0.11529094 0.07120528 0.24614255]] probs:[[0.16215552 0.16869746 0.17815733 0.15855408 0.15171595 0.18071972]] entropy:[1.7898295]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-1.5458428] v_loss:[[0.00016704]]
DEBUG:chainerrl.agents.a3c:grad norm:0.47039650845825165
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:935 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.14038517 0.17659271 0.22793712 0.11904134 0.07380389 0.22183508]] probs:[[0.16319342 0.16921054 0.17812546 0.15974715 0.15268162 0.17704183]] entropy:[1.7902524]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:936 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.141158   0.17718464 0.22916664 0.11896036 0.0750297  0.22277047]] probs:[[0.16319141 0.16917783 0.17820461 0.15960886 0.15274891 0.17706841]] entropy:[1.7902461]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:937 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.14138266 0.17736611 0.22970292 0.11896612 0.07551283 0.22304955]] probs:[[0.16318122 0.16915996 0.17824903 0.15956394 0.15277885 0.17706701]] entropy:[1.790244]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:938 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.14145286 0.17742287 0.22987081 0.11896766 0.07566392 0.22313708]] probs:[[0.16317801 0.16915436 0.17826293 0.15954986 0.15278822 0.1770666 ]] entropy:[1.7902431]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:939 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.13990177 0.17626895 0.22695972 0.11898499 0.07290871 0.22132656]] probs:[[0.16320321 0.16924769 0.17804816 0.15982498 0.15262792 0.17704801]] entropy:[1.7902554]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:940 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.13946155 0.17599055 0.2261253  0.11894143 0.07212272 0.22088496]] probs:[[0.1632085  0.16928057 0.17798376 0.15989357 0.1525801  0.17705351]] entropy:[1.7902576]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:941 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.1405587  0.1644755  0.25556847 0.13931946 0.08596407 0.20406197]] probs:[[0.16240929 0.16634043 0.18220446 0.16220815 0.15378031 0.17305733]] entropy:[1.7903087]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:942 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.13863759 0.16365862 0.25485247 0.14277701 0.08746248 0.20401862]] probs:[[0.16206405 0.16617021 0.18203637 0.1627363  0.15397905 0.17301403]] entropy:[1.7903451]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-1.7713473] v_loss:[[0.0012085]]
DEBUG:chainerrl.agents.a3c:grad norm:7.596374213771603
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:943 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.05839609 0.1134556  0.19559506 0.21926364 0.22044504 0.18974969]] probs:[[0.14937684 0.1578321  0.17134365 0.1754475  0.17565489 0.17034501]] entropy:[1.7900207]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:944 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.06059915 0.11013541 0.19895191 0.22673373 0.2227514  0.18457016]] probs:[[0.14952335 0.1571167  0.17170972 0.17654699 0.17584532 0.1692579 ]] entropy:[1.7899358]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:945 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.06089862 0.10948154 0.19919029 0.22755903 0.22277108 0.18353261]] probs:[[0.14957462 0.15702079 0.17175809 0.17670043 0.17585641 0.1690897 ]] entropy:[1.7899275]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:946 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.06149963 0.1090171  0.2000146  0.22868213 0.22313604 0.18259996]] probs:[[0.14962506 0.15690649 0.17185438 0.17685233 0.1758742  0.16888751]] entropy:[1.7899158]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:947 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.06150318 0.10913311 0.20003067 0.2285596  0.22315086 0.18272904]] probs:[[0.14962198 0.1569209  0.17185299 0.17682637 0.17587255 0.16890523]] entropy:[1.789918]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:948 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.06127975 0.10905749 0.19973667 0.2286309  0.22302014 0.1826866 ]] probs:[[0.1496055  0.15692683 0.17182195 0.17685904 0.17586951 0.16891721]] entropy:[1.7899154]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:949 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.05429571 0.11892808 0.18923847 0.20746885 0.21399373 0.20011449]] probs:[[0.1491     0.15905493 0.17064066 0.17378004 0.17491764 0.17250669]] entropy:[1.790124]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:950 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.05644945 0.11587136 0.1930855  0.21435794 0.21860534 0.19430286]] probs:[[0.1491971  0.15833136 0.17104116 0.1747186  0.17546228 0.17124951]] entropy:[1.7900592]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-1.5813571] v_loss:[[0.00034574]]
DEBUG:chainerrl.agents.a3c:grad norm:0.9281530164202083
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:951 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.06417468 0.13038667 0.1917512  0.16894823 0.19634794 0.22735134]] probs:[[0.15074919 0.16106845 0.1712619  0.1674008  0.17205097 0.17746867]] entropy:[1.7903855]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:952 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.06045074 0.14006183 0.20497896 0.17968069 0.20459114 0.22824319]] probs:[[0.14919302 0.16155602 0.17239168 0.16808517 0.17232484 0.17644925]] entropy:[1.7902507]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:953 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.06009676 0.14065681 0.20594639 0.1805307  0.20496105 0.2279798 ]] probs:[[0.149085   0.16159232 0.17249465 0.16816582 0.17232476 0.17633748]] entropy:[1.7902418]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:954 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.05488681 0.14409721 0.21343951 0.1856503  0.2065026  0.22848429]] probs:[[0.14797032 0.16177753 0.17339364 0.16864151 0.17219499 0.17602204]] entropy:[1.7901028]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:955 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.05806834 0.1440067  0.2116338  0.18457629 0.20797588 0.22905485]] probs:[[0.14839473 0.16171154 0.17302589 0.16840702 0.17239414 0.1760666 ]] entropy:[1.7901583]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:956 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.0590069  0.14491883 0.21266714 0.18458614 0.20911005 0.23030882]] probs:[[0.1484029  0.16171618 0.17305182 0.16825995 0.17243737 0.17613184]] entropy:[1.7901549]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:957 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.0573617  0.14808628 0.21917    0.19206083 0.21139832 0.2279444 ]] probs:[[0.14776888 0.16180211 0.17372225 0.16907606 0.17237738 0.17525327]] entropy:[1.7900946]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:958 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.05624011 0.14810975 0.21939662 0.19222602 0.2122622  0.2293868 ]] probs:[[0.14755788 0.16175619 0.17370823 0.16905202 0.17247334 0.1754523 ]] entropy:[1.7900552]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-1.4771527] v_loss:[[0.00023968]]
DEBUG:chainerrl.agents.a3c:grad norm:0.45372883857824103
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:959 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.06280579 0.18107058 0.2126001  0.15813273 0.23161136 0.21380349]] probs:[[0.14849927 0.1671422  0.17249607 0.16335194 0.17580679 0.17270376]] entropy:[1.7902424]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:960 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.05846127 0.17984979 0.212014   0.1582924  0.23090388 0.21488558]] probs:[[0.14798278 0.16708195 0.17254336 0.16351864 0.17583367 0.17303954]] entropy:[1.7901698]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:961 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.06313036 0.18245222 0.21342853 0.15825394 0.23456621 0.21765926]] probs:[[0.14830582 0.16710098 0.17235816 0.16310596 0.17604019 0.17308891]] entropy:[1.7901928]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:962 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.0591842  0.17787413 0.21156177 0.1466955  0.22966632 0.22126769]] probs:[[0.14828193 0.16696852 0.17268911 0.16184299 0.17584404 0.17437337]] entropy:[1.790104]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:963 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.04581923 0.16854352 0.20481864 0.1399059  0.21679936 0.21566293]] probs:[[0.14764175 0.16691974 0.17308593 0.16220735 0.1751721  0.17497315]] entropy:[1.7900302]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:964 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.06084546 0.17763144 0.2182343  0.14360882 0.2036329  0.18496858]] probs:[[0.15000929 0.16859226 0.17557846 0.1629528  0.1730334  0.16983381]] entropy:[1.7904611]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:965 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.06027972 0.18239184 0.22320682 0.14575581 0.20542975 0.18456298]] probs:[[0.14959721 0.169027   0.17606857 0.1629466  0.17296624 0.16939439]] entropy:[1.7903954]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:966 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.0555906  0.18491022 0.22455323 0.14696029 0.2012337  0.18407829]] probs:[[0.14899367 0.16956282 0.17641981 0.16324848 0.17235339 0.1694218 ]] entropy:[1.7903285]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-1.5811538] v_loss:[[0.00025678]]
DEBUG:chainerrl.agents.a3c:grad norm:0.7347043004366873
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:967 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.08972193 0.16707303 0.2177348  0.13728985 0.2506112  0.21925218]] probs:[[0.15200907 0.16423383 0.17276858 0.15941454 0.17854299 0.17303094]] entropy:[1.7902734]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:968 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.0767061  0.16767508 0.21960157 0.13974972 0.25486165 0.23212194]] probs:[[0.14976624 0.16402924 0.17277172 0.15951203 0.17897233 0.17494848]] entropy:[1.7899389]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:969 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.0779069  0.16634399 0.21946934 0.13867277 0.25519422 0.23182592]] probs:[[0.14997993 0.16384791 0.17278776 0.15937619 0.17907216 0.17493606]] entropy:[1.7899454]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:970 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.07691443 0.16527185 0.21843632 0.13657899 0.2540818  0.23212017]] probs:[[0.1499787  0.16383351 0.17277932 0.15919946 0.17904921 0.17515987]] entropy:[1.7899281]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:971 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.08203042 0.15545127 0.21388136 0.1350966  0.228502   0.19872631]] probs:[[0.15259859 0.16422407 0.17410557 0.16091512 0.1766698  0.17148688]] entropy:[1.7905029]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:972 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.07206313 0.15019381 0.21058968 0.12860797 0.2198483  0.19772437]] probs:[[0.15194844 0.16429636 0.17452496 0.16078788 0.17614833 0.17229402]] entropy:[1.7904266]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:973 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.06940582 0.14811061 0.20890215 0.12439194 0.21591736 0.19783844]] probs:[[0.15190786 0.16434681 0.17464764 0.1604946  0.17587714 0.17272604]] entropy:[1.7904071]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:974 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.07211091 0.14741927 0.20777865 0.12237214 0.21280175 0.19476436]] probs:[[0.1525175  0.16444689 0.17467849 0.16037913 0.17555812 0.1724199 ]] entropy:[1.7904854]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-1.6966922] v_loss:[[0.0014587]]
DEBUG:chainerrl.agents.a3c:grad norm:43.89237544594606
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:975 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.1095015  0.15586567 0.23699616 0.04080712 0.21881554 0.24359885]] probs:[[0.15683658 0.1642794  0.17816304 0.14642452 0.17495319 0.17934328]] entropy:[1.7891036]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:976 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.10768849 0.14763695 0.25875327 0.03715207 0.2086903  0.25114703]] probs:[[0.15634134 0.16271338 0.18183625 0.14569354 0.1729571  0.18045841]] entropy:[1.7886637]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:977 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.09330633 0.15218784 0.24678564 0.03430377 0.2042394  0.24630333]] probs:[[0.15499361 0.1643939  0.18070452 0.14611316 0.17317748 0.18061738]] entropy:[1.7887335]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:978 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.09619873 0.1491836  0.24777521 0.03495501 0.20197587 0.24027607]] probs:[[0.15563664 0.1641054  0.18110926 0.14639089 0.17300166 0.17975618]] entropy:[1.7888515]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:979 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.09058649 0.14784352 0.24907891 0.03457599 0.20577031 0.24566245]] probs:[[0.15465449 0.16376796 0.18121532 0.14623034 0.17353466 0.18059726]] entropy:[1.7886596]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:980 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.08018937 0.15321417 0.24921225 0.03322672 0.20630841 0.26236185]] probs:[[0.1527122  0.16428123 0.18083371 0.1457062  0.17323934 0.18322732]] entropy:[1.7882537]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:981 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.0966557  0.14606173 0.24894549 0.03547024 0.20486084 0.24056837]] probs:[[0.15564558 0.16352855 0.18124892 0.14640783 0.17343219 0.17973691]] entropy:[1.7888174]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:982 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.09648649 0.14492165 0.2489868  0.03559721 0.2054701  0.24014556]] probs:[[0.15564369 0.16336785 0.18128486 0.1464494  0.17356513 0.17968914]] entropy:[1.788815]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-1.4379164] v_loss:[[4.5689383e-05]]
DEBUG:chainerrl.agents.a3c:grad norm:0.6940985607843391
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:983 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.14017683 0.17146245 0.1755601  0.08337138 0.27349764 0.2171751 ]] probs:[[0.16037896 0.16547583 0.1661553  0.15152249 0.18325165 0.17321572]] entropy:[1.789998]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:984 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.14102513 0.1753039  0.17179462 0.08382069 0.27342063 0.21771805]] probs:[[0.16046748 0.16606347 0.16548173 0.15154564 0.18318322 0.17325842]] entropy:[1.7900084]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:985 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.13924852 0.17636512 0.1723735  0.08356144 0.27171728 0.2178286 ]] probs:[[0.16023788 0.16629711 0.16563465 0.15155861 0.18293451 0.17333733]] entropy:[1.7900229]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:986 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.14194141 0.1742628  0.17190333 0.08382327 0.27432227 0.21730565]] probs:[[0.1606007  0.16587634 0.16548543 0.15153293 0.1833326  0.17317201]] entropy:[1.7900006]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:987 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.11960777 0.18042181 0.16332237 0.04932195 0.23595265 0.19869466]] probs:[[0.16012312 0.17016305 0.16727808 0.14925514 0.17987962 0.17330098]] entropy:[1.7900081]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:988 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.10994045 0.16606809 0.19470647 0.05373666 0.21464936 0.20812665]] probs:[[0.15860184 0.16775835 0.17263213 0.1499337  0.17610948 0.1749645 ]] entropy:[1.7901167]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:989 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.09488639 0.17527919 0.18838362 0.05342279 0.20110577 0.2069398 ]] probs:[[0.15694293 0.17008105 0.17232454 0.15056857 0.17453086 0.17555207]] entropy:[1.7901213]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:990 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.10252226 0.16774242 0.19498345 0.05385337 0.2120462  0.20394342]] probs:[[0.15774736 0.16837858 0.17302844 0.1502538  0.17600611 0.17458573]] entropy:[1.7901098]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-1.8317153] v_loss:[[0.00261177]]
DEBUG:chainerrl.agents.a3c:grad norm:12.073306352201023
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:991 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.14348085 0.08342275 0.20089693 0.13018587 0.2548451  0.23055997]] probs:[[0.16138445 0.15197732 0.17092168 0.15925305 0.18039586 0.1760677 ]] entropy:[1.7899756]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:992 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.13713376 0.08386672 0.20176211 0.12106921 0.24339786 0.22954558]] probs:[[0.16108546 0.15272944 0.17183992 0.15851837 0.17914565 0.17668118]] entropy:[1.7900323]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:993 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.12347694 0.08461585 0.19370836 0.11441245 0.23478928 0.22550352]] probs:[[0.1599802  0.15388244 0.17161977 0.15853661 0.1788169  0.17716412]] entropy:[1.7900892]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:994 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.12963082 0.08501608 0.19566657 0.11815689 0.23946658 0.22557732]] probs:[[0.16051319 0.15350935 0.17147061 0.158682   0.17914794 0.17667691]] entropy:[1.790097]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:995 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.13526364 0.08070889 0.20232348 0.12271945 0.24578214 0.23314472]] probs:[[0.16068158 0.15215045 0.17182636 0.15867855 0.17945835 0.17720471]] entropy:[1.7899206]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:996 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.10540723 0.10229461 0.22641724 0.09336797 0.22317536 0.25305316]] probs:[[0.15630747 0.1558217  0.17641427 0.15443693 0.17584328 0.18117636]] entropy:[1.789468]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:997 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.10602872 0.10415326 0.22725064 0.09170357 0.22847633 0.25349563]] probs:[[0.1562028  0.15591013 0.1763335  0.15398113 0.17654976 0.18102264]] entropy:[1.7894099]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:998 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.11263497 0.09842874 0.20933834 0.09881922 0.23039876 0.26494667]] probs:[[0.15715343 0.15493664 0.17310977 0.15499717 0.1767942  0.1830088 ]] entropy:[1.7894406]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-1.5211959] v_loss:[[9.450093e-05]]
DEBUG:chainerrl.agents.a3c:grad norm:0.36643612919290114
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:999 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.13861443 0.08769906 0.22077428 0.09906107 0.22514538 0.23612978]] probs:[[0.16155036 0.15353085 0.1753838  0.15528522 0.17615211 0.1780977 ]] entropy:[1.7898784]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:1000 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.14830245 0.07805233 0.23198761 0.10435978 0.22415736 0.22337246]] probs:[[0.16304645 0.1519855  0.17727822 0.1560369  0.1758955  0.1757575 ]] entropy:[1.7898817]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:1001 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.14907168 0.07783206 0.23266424 0.10465053 0.2239033  0.22389618]] probs:[[0.16312222 0.15190573 0.17734416 0.15603472 0.17579725 0.17579599]] entropy:[1.7898749]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:1002 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.14940594 0.07773402 0.23294756 0.10476922 0.22381961 0.22409177]] probs:[[0.16315584 0.15187137 0.17737168 0.15603325 0.17576002 0.17580785]] entropy:[1.7898719]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:1003 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.14964364 0.07580142 0.22294368 0.10711505 0.2247503  0.22823666]] probs:[[0.16331871 0.15169339 0.17573963 0.15651861 0.17605741 0.1766723 ]] entropy:[1.7899199]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:1004 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.14035247 0.07554445 0.20140587 0.10851258 0.22348619 0.22824034]] probs:[[0.16266994 0.15246198 0.17291093 0.15757214 0.17677133 0.17761372]] entropy:[1.7900674]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:1005 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.14858246 0.07678682 0.22627644 0.10571207 0.22440559 0.22598225]] probs:[[0.16316423 0.15186039 0.17634657 0.15631713 0.17601696 0.1762947 ]] entropy:[1.78991]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:1006 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.14951722 0.07882836 0.22905591 0.10229301 0.22292288 0.22713183]] probs:[[0.16325769 0.15211563 0.17677337 0.15572718 0.17569254 0.17643358]] entropy:[1.7898815]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-1.517785] v_loss:[[0.00023039]]
DEBUG:chainerrl.agents.a3c:grad norm:0.4134562828161388
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:1007 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.17275666 0.11324786 0.17113078 0.07478833 0.24336463 0.23852395]] probs:[[0.16698898 0.15734157 0.1667177  0.15140519 0.17920597 0.17834058]] entropy:[1.7899121]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:1008 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.17232478 0.1130419  0.17108172 0.07742268 0.24469543 0.23924531]] probs:[[0.16680779 0.15720636 0.16660057 0.15170534 0.17932734 0.17835264]] entropy:[1.7899235]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:1009 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.19787535 0.10944392 0.1320083  0.12350069 0.2561858  0.21914318]] probs:[[0.17060524 0.15616623 0.15973006 0.15837692 0.18084906 0.1742725 ]] entropy:[1.7902613]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:1010 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.21195841 0.12315328 0.13141195 0.13572745 0.26384708 0.21730793]] probs:[[0.17173691 0.15714337 0.15844654 0.15913178 0.18088335 0.17265809]] entropy:[1.7903304]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:1011 r:0.05 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.18938306 0.10140789 0.13053043 0.1082148  0.2448868  0.23969884]] probs:[[0.16979821 0.15549842 0.16009352 0.1565605  0.17948909 0.17856032]] entropy:[1.7900076]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:1012 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.19449508 0.09730451 0.15019421 0.11148831 0.25116482 0.20023079]] probs:[[0.17098542 0.15514927 0.16357595 0.15736556 0.18095493 0.17196895]] entropy:[1.7903242]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:1013 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.18961233 0.09601591 0.140873   0.106635   0.2436919  0.2198757 ]] probs:[[0.17036512 0.15514304 0.16226074 0.15679929 0.17983207 0.17559975]] entropy:[1.7902122]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:1014 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.18713827 0.10014261 0.1316417  0.10536776 0.24292971 0.23828521]] probs:[[0.16966307 0.15552694 0.16050386 0.1563417  0.17939787 0.17856658]] entropy:[1.7900205]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-0.8735084] v_loss:[[0.00614415]]
DEBUG:chainerrl.agents.a3c:grad norm:13.703173912888783
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:1015 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.19319047 0.08403827 0.1336322  0.15965095 0.23942502 0.2325351 ]] probs:[[0.16968593 0.15213938 0.1598748  0.16408913 0.17771547 0.17649524]] entropy:[1.790277]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:1016 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.19138198 0.08196598 0.1362279  0.15700538 0.2391616  0.23144731]] probs:[[0.16952823 0.1519579  0.16043124 0.16379946 0.17782485 0.17645834]] entropy:[1.7902755]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:1017 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.19070037 0.08137917 0.137444   0.15592961 0.2399408  0.23155922]] probs:[[0.16941732 0.15187287 0.16063082 0.16362779 0.17796831 0.17648289]] entropy:[1.7902632]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:1018 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.19119687 0.08150893 0.13756326 0.1561069  0.24021564 0.23137851]] probs:[[0.1694728  0.1518669  0.1606228  0.16362914 0.17798713 0.17642117]] entropy:[1.7902635]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:1019 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.2090915  0.09067666 0.13413008 0.15522541 0.23263435 0.22731595]] probs:[[0.1722392  0.15300481 0.15979996 0.1632068  0.17634231 0.17540695]] entropy:[1.790411]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:1020 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.20917316 0.09039643 0.1335148  0.1550557  0.23457499 0.22716032]] probs:[[0.17222562 0.1529374  0.15967605 0.16315292 0.17665651 0.17535152]] entropy:[1.7903842]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:1021 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.20481116 0.09012956 0.12732705 0.15518309 0.22946434 0.22246706]] probs:[[0.17207418 0.15342994 0.15924463 0.16374291 0.17636909 0.17513928]] entropy:[1.7904501]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:1022 r:0.1 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.22542539 0.09958145 0.1271726  0.14940202 0.24550831 0.22315553]] probs:[[0.17442918 0.15380336 0.15810606 0.16166002 0.17796764 0.1740337 ]] entropy:[1.7902367]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-0.08599412] v_loss:[[0.01938045]]
DEBUG:chainerrl.agents.a3c:grad norm:232.08047528044733
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:1023 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.31814286 0.19147249 0.01169137 0.18030056 0.2816374  0.12859482]] probs:[[0.18940112 0.16686693 0.1394095  0.16501309 0.18261161 0.1566978 ]] entropy:[1.7868614]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:1024 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.31697348 0.19143948 0.01175278 0.1803616  0.28090364 0.12861992]] probs:[[0.18924378 0.16691789 0.13946523 0.165079   0.18253942 0.15675475]] entropy:[1.7869022]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:1025 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.29248807 0.1902904  0.00859641 0.18517156 0.25924993 0.12542851]] probs:[[0.18631001 0.16821018 0.14026316 0.16735135 0.18021919 0.15764606]] entropy:[1.7876394]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:1026 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.29374892 0.19285026 0.01040476 0.1906043  0.2628064  0.12602787]] probs:[[0.18606718 0.16820931 0.14015706 0.16783194 0.18039796 0.15733649]] entropy:[1.7876139]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:1027 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.28715667 0.19259797 0.00776995 0.19071344 0.25563756 0.12402188]] probs:[[0.18544163 0.16871004 0.14023975 0.16839239 0.17968783 0.15752834]] entropy:[1.7877516]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:1028 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.30947497 0.19384497 0.01323938 0.19079815 0.27837035 0.12932447]] probs:[[0.18772183 0.16722351 0.13959226 0.16671479 0.1819727  0.15677486]] entropy:[1.7871705]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:1029 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.3152299  0.1946767  0.01351211 0.19008392 0.28296313 0.12972628]] probs:[[0.18842049 0.16702156 0.13934575 0.16625622 0.1824378  0.15651824]] entropy:[1.7869852]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:1030 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.30928308 0.1926254  0.01231035 0.1872001  0.2769662  0.12879685]] probs:[[0.18793131 0.16723815 0.13964504 0.16633329 0.18195504 0.15689713]] entropy:[1.7871635]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-2.0673034] v_loss:[[0.00392213]]
DEBUG:chainerrl.agents.a3c:grad norm:16.622293085885424
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:1031 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.16980882 0.18294214 0.01248845 0.20658623 0.32430997 0.23660238]] probs:[[0.16283296 0.1649856  0.13912942 0.16893302 0.19003838 0.17408061]] entropy:[1.7875499]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:1032 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.16783136 0.18239535 0.01121994 0.2068052  0.32464507 0.23688604]] probs:[[0.16258252 0.1649677  0.13901395 0.16904408 0.1901854  0.17420633]] entropy:[1.7874963]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:1033 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[1.4981364e-01 1.7848508e-01 2.6769028e-04 2.0997359e-01 3.0590692e-01
  2.2628008e-01]] probs:[[0.1612749  0.1659658  0.13887362 0.17127497 0.18851991 0.17409076]] entropy:[1.7876116]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:1034 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.11557933  0.17687736 -0.02541498  0.20533422  0.3107638   0.22805986]] probs:[[0.1572277  0.16716698 0.13655137 0.17199235 0.19111581 0.17594573]] entropy:[1.7865356]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:1035 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.11136887  0.18455565 -0.05108499  0.20279904  0.34136197  0.23764642]] probs:[[0.15588671 0.1677234  0.13251232 0.17081134 0.19619766 0.17686859]] entropy:[1.784802]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:1036 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.10295367  0.18120909 -0.05809302  0.1957178   0.34881166  0.24283844]] probs:[[0.1548296  0.16743256 0.13179906 0.16987948 0.1979834  0.17807592]] entropy:[1.7842088]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:1037 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.10366832  0.18064581 -0.05653163  0.19239533  0.3557272   0.2469584 ]] probs:[[0.15466684 0.16704294 0.13177204 0.16901718 0.19900547 0.17849553]] entropy:[1.7839991]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:1038 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.10494461  0.18149303 -0.05367661  0.19155864  0.3598309   0.24922669]] probs:[[0.15458639 0.16688444 0.13191159 0.16857271 0.19946513 0.17857975]] entropy:[1.7839439]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-1.6390944] v_loss:[[0.00076558]]
DEBUG:chainerrl.agents.a3c:grad norm:5.775288071765441
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:1039 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.00382132  0.45904836 -0.05687155  0.11286643  0.17395687  0.16474783]] probs:[[0.14206769 0.22569247 0.13472739 0.15965119 0.16970842 0.16815275]] entropy:[1.7769876]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:1040 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.00196377  0.4622603  -0.05874075  0.10989682  0.16649774  0.15833586]] probs:[[0.1426272  0.22688839 0.13475485 0.1595081  0.16879681 0.16742471]] entropy:[1.7767286]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:1041 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.00313832  0.45923817 -0.05757293  0.11274909  0.17154205  0.16350721]] probs:[[0.14224885 0.22586884 0.13471256 0.15972689 0.16939925 0.16804361]] entropy:[1.7769691]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:1042 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.00177576  0.45951745 -0.06081827  0.1169628   0.16348664  0.16002865]] probs:[[0.14264931 0.22625947 0.13447075 0.16063389 0.16828378 0.16770285]] entropy:[1.7769141]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:1043 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.00135078  0.46047115 -0.05572735  0.11436267  0.17329556  0.1653838 ]] probs:[[0.14226818 0.2257743  0.13473868 0.15972081 0.16941652 0.16808142]] entropy:[1.7770054]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:1044 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-3.1860848e-04  4.6155477e-01 -5.1244546e-02  1.1267379e-01
   1.7985244e-01  1.6946547e-01]] probs:[[0.14205578 0.22544882 0.13500257 0.15904899 0.17010073 0.16834305]] entropy:[1.7770811]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:1045 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 3.4175883e-04  4.6190330e-01 -4.9100149e-02  1.1199460e-01
   1.8237087e-01  1.7122148e-01]] probs:[[0.14199641 0.22528432 0.13514657 0.15876968 0.17034586 0.16845717]] entropy:[1.7771319]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:1046 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 2.7477066e-04  4.6277088e-01 -4.9669359e-02  1.1139562e-01
   1.8163529e-01  1.7212820e-01]] probs:[[0.14198099 0.2254705  0.13506404 0.15866801 0.17021354 0.16860296]] entropy:[1.7770522]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-1.2040818] v_loss:[[0.00058294]]
DEBUG:chainerrl.agents.a3c:grad norm:7.546894717081814
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:1047 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.00589378  0.39261445 -0.01395109  0.13184337  0.20027097  0.16841565]] probs:[[0.14187156 0.211332   0.14073303 0.16282228 0.17435384 0.16888729]] entropy:[1.7819388]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:1048 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.00597177  0.38972133 -0.01314752  0.12709706  0.19501963  0.16782454]] probs:[[0.14218645 0.21120563 0.1411698  0.16242364 0.17383917 0.1691753 ]] entropy:[1.7821009]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:1049 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.00577427  0.38573813 -0.01233913  0.12049656  0.1869845   0.16721879]] probs:[[0.1426796  0.21105398 0.14174598 0.16188276 0.17301188 0.16962576]] entropy:[1.7823193]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:1050 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.00641713  0.38912827 -0.01381354  0.1265366   0.19357146  0.16717835]] probs:[[0.14222759 0.21123557 0.1411795  0.16245195 0.17371519 0.16919029]] entropy:[1.7821076]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:1051 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.00622179  0.3931272  -0.01487765  0.13308842  0.20087358  0.16804491]] probs:[[0.14179996 0.211403   0.14057785 0.16299629 0.1744281  0.16879484]] entropy:[1.7818857]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:1052 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.01093438  0.3912164  -0.02262646  0.13314751  0.19280615  0.16055624]] probs:[[0.1418149  0.2120185  0.14016646 0.1637932  0.17386225 0.16834466]] entropy:[1.7817159]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:1053 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.01214993  0.39119098 -0.02429737  0.13254987  0.19060582  0.15856186]] probs:[[0.14181666 0.21227358 0.14010437 0.16389646 0.17369324 0.16821566]] entropy:[1.781654]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:1054 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.0111405   0.3900851  -0.02321623  0.13400902  0.19208714  0.16094272]] probs:[[0.14182402 0.21183605 0.14012171 0.16397871 0.17378427 0.16845527]] entropy:[1.7817588]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-1.7798395] v_loss:[[0.00170187]]
DEBUG:chainerrl.agents.a3c:grad norm:16.380004262864624
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:1055 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.03061931 0.32413107 0.02046504 0.18004815 0.19278717 0.09621429]] probs:[[0.14845498 0.19909726 0.14695515 0.17238158 0.17459162 0.15851934]] entropy:[1.7860583]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:1056 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.02640188 0.32498223 0.01500193 0.18242282 0.18963665 0.09399953]] probs:[[0.14808875 0.19961533 0.14641014 0.17309365 0.17434682 0.1584453 ]] entropy:[1.785835]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:1057 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.03088555 0.32376397 0.02077536 0.18028185 0.19312456 0.09672732]] probs:[[0.1484659  0.19898586 0.14697246 0.17238867 0.17461689 0.15857016]] entropy:[1.7860826]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:1058 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.03058517 0.3234133  0.02092292 0.18070148 0.19359934 0.09682446]] probs:[[0.14840975 0.1989006  0.14698268 0.17244759 0.17468621 0.1585732 ]] entropy:[1.7860873]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:1059 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.03089883 0.32370585 0.0208808  0.18047372 0.19332701 0.09692411]] probs:[[0.1484522  0.1989533  0.14697245 0.17240356 0.17463382 0.15858464]] entropy:[1.7860863]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:1060 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.0277553  0.32704896 0.01645006 0.17557493 0.1871624  0.08942212]] probs:[[0.14851357 0.20033081 0.14684404 0.17217235 0.174179   0.1579602 ]] entropy:[1.7858245]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:1061 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.03054431 0.3239509  0.02030473 0.18087    0.19304593 0.09683003]] probs:[[0.1484121  0.19901882 0.14690016 0.17248641 0.17459944 0.15858306]] entropy:[1.7860596]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:1062 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.03048306 0.32402086 0.02028226 0.18043381 0.19282432 0.09642761]] probs:[[0.14842914 0.1990678  0.14692275 0.17244156 0.1745915  0.15854718]] entropy:[1.7860558]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-1.5362097] v_loss:[[0.00015298]]
DEBUG:chainerrl.agents.a3c:grad norm:0.6173765631604342
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:1063 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.05876248 0.23572487 0.05654053 0.18349116 0.20432617 0.10831776]] probs:[[0.15309943 0.18273737 0.15275964 0.17343733 0.1770888  0.16087745]] entropy:[1.7892867]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:1064 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.07794714 0.25041735 0.05740678 0.19574834 0.22798328 0.1197335 ]] probs:[[0.15390596 0.18287666 0.15077692 0.17314735 0.17881967 0.16047339]] entropy:[1.7890378]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:1065 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.08376582 0.2535592  0.06460351 0.19538249 0.23346128 0.12155329]] probs:[[0.1542221  0.18276244 0.15129498 0.1724333  0.17912596 0.16016127]] entropy:[1.789116]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:1066 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.09423341 0.26220647 0.06502952 0.20156589 0.23739778 0.12432342]] probs:[[0.15499325 0.18334225 0.15053229 0.17255466 0.17884973 0.15972787]] entropy:[1.7890415]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:1067 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.09227623 0.26255712 0.06537406 0.19973244 0.23688255 0.12448461]] probs:[[0.15477832 0.18351102 0.15066995 0.17233668 0.17885943 0.15984462]] entropy:[1.7890353]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:1068 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.08842916 0.26520237 0.0652419  0.19814384 0.24393003 0.1285853 ]] probs:[[0.15394998 0.1837178  0.15042137 0.17180195 0.17985097 0.16025782]] entropy:[1.7888873]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:1069 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.09405599 0.26471075 0.06825848 0.19874814 0.24051295 0.12644222]] probs:[[0.15475985 0.18355776 0.1508185  0.1718405  0.17916937 0.159854  ]] entropy:[1.7890383]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:1070 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.09276102 0.26377836 0.06546491 0.19896469 0.23754051 0.12449143]] probs:[[0.15480699 0.18368025 0.15063852 0.17215286 0.17892356 0.15979785]] entropy:[1.7890174]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-1.6201499] v_loss:[[0.00055409]]
DEBUG:chainerrl.agents.a3c:grad norm:3.2697742726768535
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:1071 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.02601599 0.18685538 0.11001624 0.17169574 0.33476377 0.20482448]] probs:[[0.14333914 0.1683513  0.15589982 0.16581838 0.1951876  0.17140375]] entropy:[1.7873001]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:1072 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.02399351 0.1864874  0.10912871 0.16931905 0.3345867  0.20431381]] probs:[[0.14319353 0.16845874 0.15591832 0.16559128 0.19534948 0.17148869]] entropy:[1.7872491]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:1073 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.02419979 0.18777761 0.10943791 0.17055915 0.33536434 0.20520608]] probs:[[0.14310774 0.16854042 0.15584095 0.16566326 0.19534405 0.17150357]] entropy:[1.7872307]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:1074 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.02428125 0.18132406 0.1071267  0.16874085 0.33237207 0.20263866]] probs:[[0.14351502 0.16791914 0.15591097 0.16581942 0.19529878 0.17153668]] entropy:[1.7873098]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:1075 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.02370252 0.18106464 0.10565396 0.16295743 0.33388487 0.20005636]] probs:[[0.14364156 0.16812086 0.15590899 0.16510406 0.19588022 0.17134428]] entropy:[1.787234]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:1076 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.02877171 0.19609141 0.09350237 0.16209584 0.3435125  0.20058838]] probs:[[0.14390585 0.17011572 0.15352906 0.16442974 0.19713722 0.17088245]] entropy:[1.7868643]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:1077 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.03197557 0.16986853 0.08588526 0.1544393  0.33794242 0.1857244 ]] probs:[[0.14582245 0.16738269 0.15389945 0.16481993 0.19801773 0.17005783]] entropy:[1.7870706]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:1078 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.03013311 0.15897483 0.09772275 0.15498336 0.32422107 0.18280366]] probs:[[0.14604387 0.16612636 0.15625614 0.1654646  0.19597654 0.1701325 ]] entropy:[1.787616]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-1.4001787] v_loss:[[1.4087482e-05]]
DEBUG:chainerrl.agents.a3c:grad norm:0.70994516895279
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:1079 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.03661629 0.16969831 0.20256886 0.09032927 0.20219763 0.15417564]] probs:[[0.14963631 0.1709361  0.17664823 0.1578935  0.17658266 0.1683032 ]] entropy:[1.7899784]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:1080 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.05293154 0.15614174 0.19893901 0.06686589 0.15184157 0.12870443]] probs:[[0.15473539 0.1715589  0.17906053 0.15690662 0.17082275 0.16691577]] entropy:[1.7904577]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:1081 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.05209634 0.15793969 0.19943224 0.06803948 0.15212639 0.13019763]] probs:[[0.1544903  0.17173879 0.17901458 0.15697311 0.17074332 0.16703989]] entropy:[1.790443]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:1082 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.03889473 0.16607508 0.2022332  0.08617136 0.19253452 0.14917359]] probs:[[0.1505088  0.17092109 0.17721437 0.15779524 0.17550394 0.16805655]] entropy:[1.7900923]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:1083 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.0525306  0.16251938 0.20568123 0.0709864  0.1644936  0.13819528]] probs:[[0.15364982 0.17151399 0.17907895 0.15651187 0.17185293 0.1673924 ]] entropy:[1.790319]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:1084 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.0493986  0.1641554  0.20635813 0.07497598 0.17261826 0.14233747]] probs:[[0.15276544 0.17134179 0.17872766 0.15672317 0.172798   0.16764396]] entropy:[1.7902548]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:1085 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.04913886 0.1602733  0.20565872 0.07541512 0.17770691 0.1413586 ]] probs:[[0.1527322  0.17068511 0.17861022 0.1567986  0.17368686 0.167487  ]] entropy:[1.790248]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:1086 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.05605625 0.15331192 0.20398292 0.06598138 0.16551681 0.1315699 ]] probs:[[0.1546633  0.17046092 0.17932093 0.156206   0.17255414 0.16679476]] entropy:[1.7903668]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-1.1954538] v_loss:[[0.00071363]]
DEBUG:chainerrl.agents.a3c:grad norm:7.6911635227270425
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:1087 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.05714925 0.1878147  0.18612671 0.07637011 0.11688744 0.15434694]] probs:[[0.15479171 0.17639853 0.17610101 0.15779571 0.16432045 0.17059256]] entropy:[1.7904822]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:1088 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.05964886 0.18194185 0.1895696  0.07753608 0.11710458 0.14990257]] probs:[[0.15526836 0.17546648 0.17681001 0.15807067 0.16445069 0.16993377]] entropy:[1.7905573]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:1089 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.06469311 0.17204265 0.19773924 0.07744545 0.12016898 0.14637752]] probs:[[0.15599123 0.17366868 0.1781892  0.15799323 0.16488951 0.16926816]] entropy:[1.790616]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:1090 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.06961969 0.16420121 0.20531878 0.07672714 0.12507778 0.1465416 ]] probs:[[0.15652756 0.17205492 0.17927684 0.15764403 0.16545351 0.16904315]] entropy:[1.7906231]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:1091 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.07132831 0.1642332  0.20631112 0.07751182 0.12287699 0.14770937]] probs:[[0.15673119 0.17199011 0.17938152 0.15770333 0.16502233 0.16917153]] entropy:[1.7906277]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:1092 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.06709979 0.16182137 0.20172052 0.07679297 0.11920386 0.14378978]] probs:[[0.15658303 0.17213999 0.17914708 0.1581082  0.16495794 0.16906385]] entropy:[1.7906537]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:1093 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.06964879 0.16457097 0.2066545  0.0759216  0.12823983 0.14743188]] probs:[[0.15639833 0.17197143 0.17936303 0.15738246 0.16583563 0.1690491 ]] entropy:[1.7905988]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:1094 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.06704223 0.1626066  0.20257826 0.07607337 0.12314888 0.14405121]] probs:[[0.15643908 0.17212674 0.17914629 0.15785831 0.16546726 0.16896231]] entropy:[1.7906377]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-1.4573889] v_loss:[[3.214851e-05]]
DEBUG:chainerrl.agents.a3c:grad norm:0.26424841374653474
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:1095 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.06843048 0.16521096 0.1661499  0.08005399 0.11357803 0.16182022]] probs:[[0.15723184 0.1732095  0.17337221 0.15907007 0.16449314 0.17262319]] entropy:[1.790932]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:1096 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.06737784 0.16458595 0.16472761 0.08081916 0.11229204 0.16090313]] probs:[[0.15718712 0.1732343  0.17325886 0.15931419 0.16440801 0.17259748]] entropy:[1.7909439]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:1097 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.06980377 0.17683208 0.16411011 0.08149908 0.11118103 0.17390293]] probs:[[0.15684843 0.17456694 0.17236017 0.15869358 0.16347453 0.17405635]] entropy:[1.790798]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:1098 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.06980085 0.17805982 0.16415197 0.08187579 0.11072523 0.17481095]] probs:[[0.15679081 0.17471768 0.17230454 0.15869552 0.16334046 0.17415096]] entropy:[1.7907825]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:1099 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.06865177 0.18172842 0.1619145  0.08458424 0.10661063 0.17339632]] probs:[[0.15667489 0.17543165 0.17198987 0.1591911  0.16273642 0.17397602]] entropy:[1.7907683]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:1100 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.05168254 0.1750044  0.17304038 0.07423694 0.09617645 0.16970795]] probs:[[0.15494603 0.17528246 0.17493854 0.15848047 0.16199587 0.17435655]] entropy:[1.7904712]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:1101 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.05138461 0.17046866 0.17688674 0.07145543 0.09817512 0.17071195]] probs:[[0.15491635 0.17450777 0.17563139 0.15805705 0.16233721 0.17455024]] entropy:[1.7904501]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:1102 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.06404222 0.16157737 0.18099009 0.07510894 0.11115433 0.18373346]] probs:[[0.1559326  0.17190792 0.17527772 0.15766785 0.16345471 0.17575924]] entropy:[1.7905837]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-1.5626004] v_loss:[[0.00023787]]
DEBUG:chainerrl.agents.a3c:grad norm:0.5448045111928638
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:1103 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.07866181 0.15661035 0.17036265 0.09125999 0.15411685 0.18285449]] probs:[[0.15678968 0.16950014 0.17184725 0.15877743 0.16907802 0.17400742]] entropy:[1.7909906]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:1104 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.08457926 0.15305378 0.17273673 0.09398983 0.16460127 0.192535  ]] probs:[[0.15699086 0.16811731 0.17145915 0.1584752  0.1700699  0.17488757]] entropy:[1.79096]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:1105 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.08610711 0.14749275 0.17393766 0.08409507 0.17068627 0.19139972]] probs:[[0.15742089 0.16738704 0.17187262 0.15710446 0.1713147  0.17490022]] entropy:[1.790872]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:1106 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.08686196 0.14941305 0.17215115 0.08306296 0.1822355  0.1807544 ]] probs:[[0.15752248 0.16769038 0.17154703 0.1569252  0.17328571 0.17302924]] entropy:[1.79089]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:1107 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.08947167 0.14760642 0.17033339 0.08705644 0.18836673 0.16760342]] probs:[[0.15805574 0.1675166  0.17136733 0.15767446 0.1744857  0.17090015]] entropy:[1.7909844]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:1108 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.09356483 0.14802945 0.17644313 0.0737403  0.18848665 0.17183904]] probs:[[0.1586358  0.16751546 0.17234345 0.15552188 0.17443162 0.1715518 ]] entropy:[1.7908347]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:1109 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.09203615 0.1447764  0.17513502 0.07169268 0.18881907 0.1705358 ]] probs:[[0.15863067 0.16722143 0.17237589 0.15543617 0.1747509  0.17158492]] entropy:[1.790813]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:1110 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.08979964 0.15068801 0.17549452 0.0712091  0.18658023 0.17632551]] probs:[[0.15808193 0.16800638 0.17222615 0.15517026 0.17414601 0.17236933]] entropy:[1.7907696]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-1.5590886] v_loss:[[0.00136501]]
DEBUG:chainerrl.agents.a3c:grad norm:50.020698250223106
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:1111 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.07696608 0.17597385 0.18245688 0.05316414 0.17906854 0.17391084]] probs:[[0.15622298 0.17248188 0.17360371 0.15254849 0.17301649 0.17212641]] entropy:[1.790359]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:1112 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.07694671 0.17604001 0.18241382 0.05318641 0.17897743 0.17384899]] probs:[[0.15622342 0.17249711 0.17360008 0.15255526 0.17300455 0.17211957]] entropy:[1.79036]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:1113 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.07631553 0.17568132 0.18264435 0.05345731 0.17835592 0.17452413]] probs:[[0.15613581 0.17244737 0.1736523  0.1526073  0.1729092  0.17224792]] entropy:[1.7903576]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:1114 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.07852711 0.17576982 0.18359381 0.05288767 0.17727143 0.17322813]] probs:[[0.15647705 0.17245771 0.17381231 0.15251607 0.17271689 0.17201994]] entropy:[1.7903788]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:1115 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.0879394  0.17403601 0.18984807 0.05182159 0.16933466 0.17065372]] probs:[[0.15790926 0.17210713 0.17485012 0.15230767 0.17129989 0.17152599]] entropy:[1.7904687]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:1116 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.12271737 0.17043935 0.21421966 0.04941099 0.14240988 0.15842788]] probs:[[0.16312528 0.1710987  0.17875583 0.15159495 0.16636947 0.16905583]] entropy:[1.7905148]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:1117 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.14854339 0.16917944 0.23559129 0.05224704 0.11715017 0.15114173]] probs:[[0.16689779 0.17037769 0.18207698 0.15157571 0.16173972 0.16733202]] entropy:[1.7902471]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:1118 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.15443373 0.16897917 0.23954086 0.05332083 0.11103605 0.15016903]] probs:[[0.16776858 0.17022668 0.1826721  0.15163445 0.16064355 0.16705464]] entropy:[1.790163]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-1.5791035] v_loss:[[0.00040518]]
DEBUG:chainerrl.agents.a3c:grad norm:0.8621140941876398
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:1119 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.14597343 0.14463018 0.2322686  0.06005992 0.1286704  0.16459996]] probs:[[0.1664418  0.16621837 0.18144289 0.15273926 0.16358663 0.16957109]] entropy:[1.7904696]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:1120 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.14637406 0.14454342 0.23061019 0.05999959 0.12896861 0.16386588]] probs:[[0.16656399 0.16625935 0.18120259 0.15278094 0.16368996 0.16950312]] entropy:[1.7904967]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:1121 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.14422616 0.1447728  0.23124333 0.06110596 0.12802222 0.16534074]] probs:[[0.16619666 0.16628753 0.18130648 0.1529409  0.16352531 0.16974314]] entropy:[1.7904942]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:1122 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.14644805 0.14457989 0.23243429 0.05990638 0.12865636 0.16437685]] probs:[[0.16651462 0.16620384 0.18146619 0.15271011 0.16357826 0.16952696]] entropy:[1.7904657]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:1123 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.14697261 0.14446412 0.2327113  0.05965499 0.12878576 0.16413659]] probs:[[0.16659193 0.16617456 0.1815055  0.15266252 0.16358952 0.16947599]] entropy:[1.7904592]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:1124 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.14636026 0.14459008 0.23245817 0.05994773 0.12861636 0.16441979]] probs:[[0.16650027 0.16620581 0.18147083 0.15271668 0.16357197 0.1695345 ]] entropy:[1.7904658]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:1125 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.1474734  0.14455839 0.23233333 0.05944328 0.12910172 0.16382204]] probs:[[0.16667593 0.16619077 0.1814375  0.1526307  0.16364177 0.16942325]] entropy:[1.7904642]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:1126 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.14309023 0.14483975 0.23080409 0.06167719 0.127635   0.16579248]] probs:[[0.16603395 0.16632469 0.18125524 0.15305223 0.16348758 0.16984642]] entropy:[1.7905049]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-1.4038562] v_loss:[[2.8222319e-05]]
DEBUG:chainerrl.agents.a3c:grad norm:0.09302890777033122
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:1127 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.13730136 0.14118783 0.22617896 0.05256627 0.13272883 0.18394493]] probs:[[0.16504966 0.16569237 0.1803905  0.15164031 0.16429667 0.17293052]] entropy:[1.7903668]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:1128 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.13618948 0.14124078 0.22564442 0.053161   0.1323771  0.18440641]] probs:[[0.16489244 0.16572745 0.18032274 0.1517546  0.16426499 0.17303783]] entropy:[1.7903769]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:1129 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.13952029 0.14093378 0.22543207 0.05141253 0.13368873 0.18237785]] probs:[[0.1654325  0.16566649 0.18027347 0.15148029 0.16447057 0.17267665]] entropy:[1.7903754]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:1130 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.13650025 0.14123985 0.22594646 0.05296913 0.13244914 0.18427496]] probs:[[0.16493286 0.16571644 0.18036538 0.15171555 0.16426605 0.17300375]] entropy:[1.7903717]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:1131 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.17936115 0.13633096 0.21352284 0.03230847 0.14839886 0.15244429]] probs:[[0.17244837 0.16518527 0.17844127 0.1488658  0.16719079 0.16786851]] entropy:[1.7902557]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:1132 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.18092863 0.13687927 0.21359116 0.03155609 0.14767154 0.1525688 ]] probs:[[0.17269114 0.1652493  0.17842479 0.14872992 0.16704237 0.16786245]] entropy:[1.7902341]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:1133 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.17938946 0.13513109 0.21099731 0.03139122 0.15018865 0.14915298]] probs:[[0.17263138 0.16515762 0.17817505 0.14888294 0.1676633  0.16748975]] entropy:[1.7902693]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:1134 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.17930879 0.13491258 0.20974252 0.03069844 0.15068898 0.14741328]] probs:[[0.1727183  0.16521798 0.17805557 0.14886674 0.1678452  0.16729629]] entropy:[1.7902726]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-1.2976623] v_loss:[[0.00022373]]
DEBUG:chainerrl.agents.a3c:grad norm:0.7573360469785256
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:1135 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.17683494 0.1326821  0.19677593 0.03061862 0.14928466 0.1660663 ]] probs:[[0.17232361 0.16488057 0.17579442 0.14888257 0.16764086 0.1704779 ]] entropy:[1.7903788]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:1136 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.18022282 0.13726489 0.19872206 0.02896365 0.14510836 0.16767447]] probs:[[0.17273326 0.16547012 0.17595844 0.14848581 0.16677308 0.17057928]] entropy:[1.7903163]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:1137 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.17946845 0.1367679  0.19872893 0.02836532 0.14430597 0.1680805 ]] probs:[[0.17266595 0.16544822 0.17602383 0.14845112 0.1667001  0.17071079]] entropy:[1.7903079]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:1138 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.17700799 0.1335839  0.19706587 0.02957491 0.14807104 0.16649716]] probs:[[0.17236304 0.16503851 0.17585517 0.14873554 0.16744685 0.17056084]] entropy:[1.7903583]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:1139 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.17642172 0.1325398  0.19634575 0.0296535  0.14916334 0.16594146]] probs:[[0.1723137  0.16491576 0.17578132 0.14879188 0.16768016 0.17051724]] entropy:[1.7903688]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:1140 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.17653549 0.13296174 0.19718157 0.02986311 0.14862834 0.16694972]] probs:[[0.17227307 0.16492768 0.1758668  0.14877103 0.16753188 0.17062959]] entropy:[1.7903616]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:1141 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.17703137 0.13360204 0.19824983 0.02986036 0.14768624 0.16851014]] probs:[[0.17227454 0.16495292 0.175969   0.14869815 0.1672926  0.1708128 ]] entropy:[1.7903447]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:1142 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.18120566 0.1333341  0.19865568 0.02863437 0.14438435 0.17436738]] probs:[[0.17281932 0.16474108 0.17586148 0.14836498 0.16657162 0.17164156]] entropy:[1.7902697]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-1.7062682] v_loss:[[0.00075538]]
DEBUG:chainerrl.agents.a3c:grad norm:2.0584051506630017
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:1143 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.20196746  0.13987866  0.1824313  -0.00673101  0.16034026  0.17259647]] probs:[[0.1766043  0.16597262 0.1731876  0.1433391  0.16940367 0.1714927 ]] entropy:[1.7895355]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:1144 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.2046424   0.13716356  0.1787603  -0.01396667  0.15984069  0.1750958 ]] probs:[[0.17730792 0.16573814 0.1727777  0.142491   0.16953954 0.17214571]] entropy:[1.789355]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:1145 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.20505907  0.1369467   0.17692912 -0.01903455  0.16009821  0.17516579]] probs:[[0.1775492  0.16585855 0.17262435 0.14190447 0.16974322 0.17232022]] entropy:[1.7892435]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:1146 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.20767173  0.13443452  0.1733969  -0.02400842  0.15909344  0.17783308]] probs:[[0.17818734 0.16560382 0.17218348 0.14133815 0.16973819 0.17294902]] entropy:[1.7891011]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:1147 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.20516907  0.1342401   0.17165281 -0.02705204  0.16086794  0.17576449]] probs:[[0.17796673 0.165781   0.1721008  0.14108683 0.17025469 0.17280988]] entropy:[1.7890726]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:1148 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.20565684  0.13453172  0.17067142 -0.02675993  0.16164294  0.17721762]] probs:[[0.177984   0.16576457 0.17186482 0.14107291 0.17032012 0.17299357]] entropy:[1.7890683]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:1149 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.21114933  0.1352552   0.16720165 -0.03086458  0.16313827  0.18234086]] probs:[[0.1787729  0.16570717 0.17108639 0.14034481 0.1703926  0.1736962 ]] entropy:[1.7888842]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:1150 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.2146506   0.1379508   0.1669436  -0.03197156  0.16533397  0.18392174]] probs:[[0.17912689 0.16590156 0.17078191 0.13997617 0.17050724 0.17370623]] entropy:[1.788801]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-1.2604796] v_loss:[[0.00031552]]
DEBUG:chainerrl.agents.a3c:grad norm:5.634485224758116
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:1151 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.16116913  0.11335871  0.1703568  -0.02438676  0.16944636  0.21707162]] probs:[[0.17067778 0.16270961 0.17225315 0.14177221 0.17209639 0.18049082]] entropy:[1.7889667]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:1152 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.17341174  0.12645863  0.17686902 -0.02197921  0.16988695  0.21727318]] probs:[[0.17177917 0.16390003 0.1723741  0.14129062 0.17117475 0.17948134]] entropy:[1.7889832]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:1153 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.16511475  0.11861575  0.17568932 -0.02393012  0.1632677   0.21481305]] probs:[[0.17117266 0.16339552 0.17299235 0.14168806 0.17085679 0.1798946 ]] entropy:[1.7890115]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:1154 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.16356675  0.12141631  0.17669857 -0.02343084  0.16103281  0.2123847 ]] probs:[[0.17097268 0.16391587 0.17323266 0.14181255 0.17053998 0.1795263 ]] entropy:[1.7890724]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:1155 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.19391017  0.1453171   0.18283768 -0.01366862  0.18935771  0.22773764]] probs:[[0.17289162 0.16469114 0.17098783 0.14048295 0.17210633 0.17884015]] entropy:[1.7888817]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:1156 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.17359577  0.12833321  0.17999047 -0.02222265  0.16766676  0.21771085]] probs:[[0.17171761 0.16411851 0.17281921 0.1411796  0.1707025  0.17946252]] entropy:[1.7889681]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:1157 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.17866303  0.13233915  0.1816801  -0.0212859   0.17119662  0.22028114]] probs:[[0.17207012 0.16428095 0.17259005 0.14088629 0.17079015 0.17938246]] entropy:[1.7889223]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:1158 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.1614856   0.11784045  0.17307834 -0.02365344  0.16634928  0.21201126]] probs:[[0.17074631 0.16345435 0.17273726 0.14188826 0.1715788  0.17959504]] entropy:[1.7890682]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-1.5927944] v_loss:[[0.00036981]]
DEBUG:chainerrl.agents.a3c:grad norm:2.3133903206221325
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:1159 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.11957933  0.12414595  0.16665897 -0.00844546  0.16364859  0.2228748 ]] probs:[[0.16429837 0.16505037 0.17221846 0.1445549  0.17170079 0.18217716]] entropy:[1.7893312]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:1160 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.11952472  0.12419658  0.16666159 -0.00850745  0.1636684   0.22281368]] probs:[[0.16429216 0.16506152 0.17222181 0.14454837 0.17170708 0.18216908]] entropy:[1.789331]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:1161 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.11966837  0.12405077  0.16664076 -0.00832743  0.16361992  0.22297387]] probs:[[0.16430873 0.16503038 0.17221084 0.1445682  0.1716914  0.18219046]] entropy:[1.7893325]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:1162 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.11973911  0.1239877   0.16663074 -0.00823621  0.16358878  0.2230596 ]] probs:[[0.16431656 0.16501617 0.17220517 0.14457807 0.1716821  0.18220189]] entropy:[1.7893332]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:1163 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.13060358 0.12949327 0.14906365 0.0063498  0.18523867 0.21799147]] probs:[[0.16533974 0.16515628 0.16842028 0.14602074 0.17462443 0.18043856]] entropy:[1.789662]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:1164 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.13532612 0.13307187 0.13570726 0.00940648 0.19065551 0.2132407 ]] probs:[[0.1661755  0.16580132 0.16623884 0.14651456 0.175629   0.17964076]] entropy:[1.7897562]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:1165 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.13233554 0.1296193  0.15747023 0.01326872 0.18962865 0.22096463]] probs:[[0.1649585  0.16451105 0.16915722 0.14644165 0.17468548 0.18024608]] entropy:[1.7897094]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:1166 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.13225605 0.12952094 0.1594703  0.01374786 0.18934448 0.22185874]] probs:[[0.16486442 0.16441412 0.16941269 0.14643992 0.17455012 0.18031876]] entropy:[1.7897036]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-1.5541917] v_loss:[[0.00015554]]
DEBUG:chainerrl.agents.a3c:grad norm:0.5685279074849996
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:1167 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.3026754   0.07757582  0.1811746  -0.02255071  0.21730298  0.17267501]] probs:[[0.19220951 0.15346712 0.17021888 0.13884524 0.17648105 0.16877823]] entropy:[1.7865603]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:1168 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.3047144   0.08131361  0.17015614 -0.02215433  0.2171005   0.17110588]] probs:[[0.19282234 0.1542182  0.16854638 0.1390593  0.17664735 0.16870654]] entropy:[1.7865896]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:1169 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.30475196  0.08147424  0.16963203 -0.02233819  0.21710664  0.17139247]] probs:[[0.19283582 0.15424795 0.16846353 0.13903823 0.17665415 0.16876034]] entropy:[1.7865858]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:1170 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.30486146  0.08153479  0.16959989 -0.02225622  0.21699159  0.17099349]] probs:[[0.19286679 0.15426517 0.1684667  0.13905673 0.17664285 0.16870165]] entropy:[1.7865872]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:1171 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.30465087  0.08142394  0.16969806 -0.02229724  0.21720465  0.17166927]] probs:[[0.19280598 0.15423192 0.1684656  0.13903648 0.17666198 0.16879801]] entropy:[1.7865877]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:1172 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.30294213  0.0802209   0.17054592 -0.02343391  0.21870199  0.17843756]] probs:[[0.19230731 0.15391083 0.16846003 0.13875622 0.1767709  0.16979471]] entropy:[1.7865615]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:1173 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.30411896  0.08111601  0.16992553 -0.02268685  0.21781036  0.17423843]] probs:[[0.19263113 0.15412657 0.16844068 0.13893016 0.17670268 0.16916873]] entropy:[1.7865782]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:1174 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.3160622   0.08407926  0.17191847 -0.0261228   0.2194857   0.16182055]] probs:[[0.19478199 0.15445425 0.1686351  0.13833743 0.17685044 0.1669408 ]] entropy:[1.7861786]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-1.6805763] v_loss:[[0.00074596]]
DEBUG:chainerrl.agents.a3c:grad norm:2.694489622220951
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:1175 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.27058315  0.0654922   0.18992487 -0.01125072  0.23132706  0.2329269 ]] probs:[[0.18463708 0.15040043 0.17032936 0.13929003 0.17752938 0.17781363]] entropy:[1.7868687]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:1176 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.2930583   0.07335984  0.19173878 -0.01158724  0.22488776  0.23666067]] probs:[[0.18786207 0.15080832 0.16976048 0.13852662 0.17548218 0.17756033]] entropy:[1.7865579]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:1177 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.32079524  0.0806592   0.19024627 -0.01066951  0.21904434  0.23881193]] probs:[[0.19206585 0.15106378 0.1685596  0.1378786  0.17348434 0.17694783]] entropy:[1.786059]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:1178 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.3285325   0.08104774  0.18673888 -0.00940467  0.21708186  0.23975168]] probs:[[0.19337182 0.15097739 0.16780813 0.13792053 0.17297797 0.17694412]] entropy:[1.7858953]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:1179 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.33241966  0.0821778   0.18714105 -0.01012176  0.21785195  0.23719834]] probs:[[0.1940135  0.15106133 0.16777927 0.13774255 0.17301184 0.1763916 ]] entropy:[1.7858046]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:1180 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.331182    0.08153437  0.18661243 -0.00956263  0.21709529  0.23884998]] probs:[[0.19380997 0.15099256 0.16772212 0.1378455  0.17291349 0.17671639]] entropy:[1.7858334]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:1181 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.33253095  0.0820999   0.18689139 -0.00997004  0.21770698  0.23727536]] probs:[[0.1940395  0.15105298 0.16774116 0.13776657 0.17299068 0.17640916]] entropy:[1.7858044]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:1182 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.33451226  0.08285858  0.18723406 -0.0105392   0.21890031  0.23449071]] probs:[[0.19438648 0.15113822 0.167766   0.13766138 0.17316353 0.17588438]] entropy:[1.7857618]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-1.576456] v_loss:[[0.00027484]]
DEBUG:chainerrl.agents.a3c:grad norm:1.3329210927222896
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:1183 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.3210341   0.11009874  0.20875192 -0.0189207   0.20828392  0.21280444]] probs:[[0.19207807 0.1555499  0.17167786 0.13672166 0.17159751 0.172375  ]] entropy:[1.7864236]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:1184 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.3122436   0.11607976  0.21291313 -0.01236872  0.2057256   0.21989836]] probs:[[0.19008236 0.15622441 0.17210881 0.13739294 0.17087622 0.17331524]] entropy:[1.7868447]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:1185 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.33036652  0.11599592  0.20571807 -0.02041031  0.21182422  0.20477346]] probs:[[0.19364138 0.15627812 0.170948   0.1363508  0.17199503 0.1707866 ]] entropy:[1.7862241]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:1186 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.33158454  0.11564019  0.20561793 -0.02168901  0.21294306  0.20351577]] probs:[[0.1938838  0.15622772 0.17093655 0.13618109 0.17219327 0.1705776 ]] entropy:[1.7861494]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:1187 r:0.15 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.29866457  0.11519746  0.21767563 -0.00655942  0.20083839  0.2311488 ]] probs:[[0.18750957 0.15607908 0.17292206 0.13818672 0.1700349  0.17526762]] entropy:[1.7872106]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:1188 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.30078194  0.11491765  0.21716891 -0.00771974  0.20184928  0.2293105 ]] probs:[[0.1879153  0.15604228 0.17284207 0.13803257 0.17021437 0.17495345]] entropy:[1.7871459]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:1189 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.30165324  0.11507631  0.21681847 -0.00853711  0.20260146  0.22766846]] probs:[[0.18810615 0.15608948 0.17280635 0.13793962 0.17036694 0.17469151]] entropy:[1.7871188]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:1190 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.29799023  0.11205244  0.21380016 -0.00964972  0.20294696  0.22417255]] probs:[[0.18786623 0.15599006 0.17269725 0.13811548 0.17083307 0.17449786]] entropy:[1.7871764]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-0.48576912] v_loss:[[0.0189881]]
DEBUG:chainerrl.agents.a3c:grad norm:165.7481504728382
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:1191 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.1687352  0.00330516 0.1449703  0.05884164 0.23625061 0.39725453]] probs:[[0.16538334 0.1401672  0.16149935 0.1481718  0.17693482 0.20784348]] entropy:[1.7833562]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:1192 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.13422075 -0.00848624  0.14504611  0.05512861  0.24648865  0.39851812]] probs:[[0.16068967 0.13931924 0.16243865 0.14846998 0.17978162 0.20930079]] entropy:[1.782645]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:1193 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.14384307 -0.00550892  0.12741208  0.06596815  0.23594216  0.38814083]] probs:[[0.16278034 0.14019716 0.16012755 0.15058488 0.17848434 0.20782565]] entropy:[1.7834404]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:1194 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.12749523 -0.01349111  0.13262495  0.06788909  0.24414161  0.3924629 ]] probs:[[0.16018221 0.13911843 0.161006   0.15091334 0.18000026 0.2087797 ]] entropy:[1.7829101]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:1195 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.11204606 -0.02130521  0.13905779  0.07034259  0.25334102  0.39552936]] probs:[[0.15769878 0.13801129 0.16201656 0.15125744 0.18163183 0.20938404]] entropy:[1.7823834]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:1196 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.09775783 -0.02743603  0.14512002  0.07245535  0.26151377  0.3984504 ]] probs:[[0.15541072 0.1371229  0.16294838 0.15152776 0.18306245 0.20992786]] entropy:[1.7818661]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:1197 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.11571793 -0.0189284   0.13675916  0.06931547  0.25099546  0.3968127 ]] probs:[[0.15824336 0.13830864 0.16160826 0.15106824 0.1811656  0.20960584]] entropy:[1.7824266]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:1198 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.12304609 -0.0130964   0.1323355   0.06631564  0.24668893  0.40268925]] probs:[[0.15920603 0.13894202 0.16069186 0.15042563 0.1801594  0.2105751 ]] entropy:[1.7823519]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-1.4636605] v_loss:[[0.00022857]]
DEBUG:chainerrl.agents.a3c:grad norm:5.0395809454460245
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:1199 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.06876874 0.01302715 0.16320316 0.0950948  0.22733888 0.3808665 ]] probs:[[0.15129468 0.143092   0.16627845 0.15533055 0.17729226 0.20671213]] entropy:[1.784083]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:1200 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.06955752 0.01301463 0.1632957  0.0953003  0.22697093 0.37954205]] probs:[[0.15144037 0.14311509 0.16632274 0.15538949 0.17725785 0.20647442]] entropy:[1.784158]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:1201 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.07095855 0.0115708  0.16428514 0.09582418 0.22558604 0.37974873]] probs:[[0.15164523 0.14290157 0.1664792  0.15546326 0.17700382 0.20650692]] entropy:[1.7841585]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:1202 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.07260594 0.00975945 0.16543922 0.09664543 0.22335456 0.37898523]] probs:[[0.15193184 0.14267732 0.16671158 0.15562846 0.17665182 0.20639902]] entropy:[1.784206]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:1203 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.07313809 0.00900388 0.16587508 0.09695712 0.22260813 0.37893224]] probs:[[0.15202007 0.14257644 0.16679233 0.1556845  0.17652854 0.20639807]] entropy:[1.7842096]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:1204 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.0733499  0.00885882 0.16596659 0.09705859 0.22241218 0.3787515 ]] probs:[[0.15205672 0.14255995 0.1668125  0.15570487 0.17649913 0.20636682]] entropy:[1.7842202]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:1205 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.06693219 0.00321602 0.17810825 0.09180771 0.22663757 0.3804722 ]] probs:[[0.15100092 0.14167982 0.16875738 0.15480424 0.17714901 0.20660855]] entropy:[1.7838122]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:1206 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.05613233 -0.00337737  0.19138266  0.084472    0.23466317  0.38573745]] probs:[[0.14921646 0.1405957  0.1708265  0.1535057  0.17838228 0.20747341]] entropy:[1.7830386]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-1.8356986] v_loss:[[0.00163354]]
DEBUG:chainerrl.agents.a3c:grad norm:14.621934479634202
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:1207 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.05380892 0.01741828 0.18129912 0.09436343 0.26628542 0.33846492]] probs:[[0.14908314 0.14375544 0.16935451 0.1552534  0.18437664 0.1981769 ]] entropy:[1.7850072]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:1208 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.05360187 0.01793032 0.18108308 0.09442879 0.26645494 0.3375528 ]] probs:[[0.14907211 0.14384821 0.16934048 0.15528421 0.18443243 0.19802257]] entropy:[1.7850431]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:1209 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.02873734 0.0281852  0.18571573 0.07520745 0.26531637 0.31785357]] probs:[[0.14664605 0.14656511 0.17157154 0.15362154 0.185787   0.1958087 ]] entropy:[1.7851775]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:1210 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.03911887 0.02070247 0.20987955 0.070157   0.27977175 0.3280634 ]] probs:[[0.14691664 0.14423575 0.17427354 0.15154818 0.18688965 0.19613633]] entropy:[1.7844354]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:1211 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.04397932 0.02021941 0.20302063 0.07678223 0.27718812 0.33167088]] probs:[[0.14753082 0.14406681 0.17296313 0.1524505  0.18627906 0.19670962]] entropy:[1.7845984]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:1212 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.04339923 0.02251256 0.19732685 0.07922573 0.27436477 0.3318922 ]] probs:[[0.14757004 0.14451976 0.17212665 0.1529528  0.18591109 0.19691968]] entropy:[1.784747]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:1213 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.04289853 0.02468127 0.19339564 0.08169432 0.27237245 0.33181784]] probs:[[0.1475614  0.14489758 0.17152713 0.15339866 0.18562311 0.19699211]] entropy:[1.7848746]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:1214 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.05407169 0.01835759 0.18390103 0.09338658 0.26856193 0.3390002 ]] probs:[[0.1489748  0.14374818 0.16962779 0.15494838 0.18461405 0.19808687]] entropy:[1.7849592]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-1.628712] v_loss:[[0.00049145]]
DEBUG:chainerrl.agents.a3c:grad norm:3.3878132482621566
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:1215 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.15560333 0.061089   0.1446807  0.0830888  0.23930074 0.27113277]] probs:[[0.1655966  0.15066223 0.16379769 0.1540135  0.18005316 0.18587682]] entropy:[1.7888533]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:1216 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.15729307 0.06287568 0.14447938 0.08454797 0.2394592  0.2678974 ]] probs:[[0.16584854 0.15090607 0.16373697 0.15421224 0.18005118 0.185245  ]] entropy:[1.7889618]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:1217 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.17530958 0.05292566 0.14125253 0.08375975 0.26102173 0.26993152]] probs:[[0.16799447 0.14864296 0.1623694  0.15329762 0.18302874 0.18466678]] entropy:[1.7884187]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:1218 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.1769611  0.05078572 0.14174925 0.08285493 0.26622102 0.27076867]] probs:[[0.16810223 0.14817545 0.16228606 0.15300433 0.18379708 0.18463483]] entropy:[1.7882661]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:1219 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.17013516 0.0583194  0.1485018  0.08462168 0.2577024  0.2724501 ]] probs:[[0.16694221 0.14928122 0.16336948 0.15325975 0.18222004 0.18492727]] entropy:[1.7885622]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:1220 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.1600569  0.05780619 0.14453074 0.07041317 0.26363963 0.25906658]] probs:[[0.16625468 0.15009527 0.16369332 0.15199949 0.1843993  0.18355797]] entropy:[1.7884746]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:1221 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.17160268 0.05378903 0.14372762 0.0802781  0.26460016 0.26839027]] probs:[[0.16741507 0.14880884 0.1628128  0.15280332 0.18373117 0.18442884]] entropy:[1.7883673]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:1222 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.17879488 0.05037797 0.14279845 0.08549325 0.26611453 0.274173  ]] probs:[[0.16816984 0.14790313 0.16222398 0.15318905 0.18351458 0.18499939]] entropy:[1.7882371]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-1.6650561] v_loss:[[0.00067259]]
DEBUG:chainerrl.agents.a3c:grad norm:4.722670127611972
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:1223 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.24882971 0.06350783 0.12824042 0.05807073 0.26440898 0.25120753]] probs:[[0.1798028  0.14938678 0.15937683 0.14857677 0.18262593 0.18023084]] entropy:[1.7878668]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:1224 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.20503838 0.06967004 0.16046321 0.07353314 0.22879733 0.2696196 ]] probs:[[0.17249325 0.1506546  0.16497318 0.15123771 0.17664059 0.18400067]] entropy:[1.7889535]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:1225 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.20369814 0.07630588 0.15564688 0.06319228 0.23151837 0.25793865]] probs:[[0.17282079 0.1521494  0.16471289 0.15016718 0.17769621 0.18245356]] entropy:[1.7890569]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:1226 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.20069766 0.07523284 0.16641167 0.07017593 0.23016752 0.26915643]] probs:[[0.17162125 0.15138483 0.16583678 0.15062124 0.17675416 0.18378173]] entropy:[1.7890135]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:1227 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.22960119 0.06238395 0.14905229 0.07655165 0.25257587 0.2755802 ]] probs:[[0.1755308  0.14850183 0.16194645 0.15062074 0.17961025 0.18378995]] entropy:[1.7882929]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:1228 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.21733011 0.06166159 0.14712147 0.07310331 0.2516082  0.27016672]] probs:[[0.17412917 0.1490272  0.16232309 0.15074213 0.18020146 0.18357696]] entropy:[1.7884178]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:1229 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.21834542 0.05897781 0.14747243 0.07815138 0.2466063  0.27382308]] probs:[[0.17424147 0.14857271 0.1623199  0.15144886 0.17923592 0.18418114]] entropy:[1.788444]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:1230 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.21976079 0.05705713 0.14407985 0.07743889 0.24756916 0.27261564]] probs:[[0.17461842 0.14839825 0.16189083 0.1514539  0.17954242 0.18409613]] entropy:[1.7883811]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-1.6876153] v_loss:[[0.00078885]]
DEBUG:chainerrl.agents.a3c:grad norm:18.698391829382278
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:1231 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.19145194 0.06613198 0.12540524 0.11113104 0.23764876 0.29274514]] probs:[[0.16963519 0.14965464 0.15879332 0.15654278 0.17765562 0.18771848]] entropy:[1.7886994]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:1232 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.19077916 0.06687369 0.12162263 0.10921806 0.2392089  0.28963703]] probs:[[0.16972592 0.14994664 0.15838495 0.15643239 0.17814802 0.18736205]] entropy:[1.7887118]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:1233 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.18836126 0.06823911 0.11961687 0.10592636 0.24067885 0.28562394]] probs:[[0.16957462 0.15038082 0.15830897 0.1561564  0.17868252 0.18689664]] entropy:[1.7887557]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:1234 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.1883703  0.06825704 0.11943231 0.10580304 0.2408259  0.28546456]] probs:[[0.16958426 0.15039071 0.15828732 0.15614462 0.17871734 0.18687579]] entropy:[1.7887547]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:1235 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.19094561 0.06596325 0.12547812 0.11102155 0.2369731  0.29252943]] probs:[[0.16959633 0.14967088 0.15884893 0.15656903 0.17758486 0.18773003]] entropy:[1.7887094]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:1236 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.18902211 0.06725836 0.12335967 0.10873498 0.23801345 0.2892902 ]] probs:[[0.16948193 0.1500521  0.15871084 0.15640663 0.17799184 0.18735673]] entropy:[1.7887529]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:1237 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.18835884 0.06824593 0.11511794 0.10297474 0.24404588 0.28149608]] probs:[[0.16980131 0.15058325 0.15780942 0.1559047  0.17952529 0.18637602]] entropy:[1.7887273]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:1238 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.19103014 0.06611136 0.12446661 0.11071184 0.23751333 0.29192936]] probs:[[0.16964276 0.14972138 0.15871836 0.15655017 0.17771445 0.18765292]] entropy:[1.7887074]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-1.4738241] v_loss:[[2.6600595e-05]]
DEBUG:chainerrl.agents.a3c:grad norm:0.25704157030866864
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:1239 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.17136653 0.08647572 0.13616665 0.09503461 0.2386639  0.2750826 ]] probs:[[0.16696072 0.15337221 0.16118595 0.15469055 0.17858344 0.1852071 ]] entropy:[1.7892709]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:1240 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.17421128 0.08095559 0.12048774 0.09351885 0.23940958 0.25364876]] probs:[[0.1686005  0.1535884  0.1597817  0.15553014 0.17995922 0.18254003]] entropy:[1.7894467]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:1241 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.16963936 0.08285463 0.12118905 0.0905595  0.2383017  0.25067922]] probs:[[0.16809452 0.15412158 0.16014443 0.15531364 0.18004175 0.18228407]] entropy:[1.789511]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:1242 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.17313883 0.07999344 0.13239959 0.10355016 0.229269   0.2660188 ]] probs:[[0.16781385 0.15288866 0.16111463 0.15653297 0.17750263 0.18414718]] entropy:[1.7895305]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:1243 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.16671827 0.08234792 0.13518155 0.10006849 0.22767666 0.2622282 ]] probs:[[0.1670385  0.15352356 0.16185287 0.15626833 0.17753766 0.18377906]] entropy:[1.7896283]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:1244 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.1731144  0.08001875 0.13227275 0.10344803 0.22936317 0.26590857]] probs:[[0.16781652 0.1528987  0.16110069 0.1565233  0.17752652 0.1841343 ]] entropy:[1.7895302]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:1245 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.16671684 0.08234811 0.13518226 0.10006931 0.22767624 0.2622267 ]] probs:[[0.1670383  0.15352362 0.16185302 0.15626849 0.17753763 0.18377884]] entropy:[1.789628]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:1246 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.17311297 0.08001896 0.1322722  0.10344786 0.22936344 0.26590624]] probs:[[0.1678164  0.15289885 0.16110072 0.15652339 0.1775267  0.184134  ]] entropy:[1.7895302]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-1.630886] v_loss:[[0.000596]]
DEBUG:chainerrl.agents.a3c:grad norm:7.008571813489278
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:1247 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.1767686  0.07067479 0.15141612 0.06653889 0.26394674 0.2548777 ]] probs:[[0.16828595 0.15134634 0.16407312 0.15072168 0.1836153  0.1819576 ]] entropy:[1.7887013]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:1248 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.17676629 0.07067009 0.15141079 0.06653512 0.26394656 0.25486356]] probs:[[0.16828643 0.1513464  0.16407308 0.15072188 0.1836162  0.18195598]] entropy:[1.7887013]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:1249 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.17676567 0.07067016 0.1514107  0.06653514 0.26394674 0.25486276]] probs:[[0.16828635 0.15134645 0.1640731  0.15072192 0.18361628 0.18195586]] entropy:[1.7887015]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:1250 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.17676647 0.0706757  0.15141648 0.06653903 0.26394704 0.25487632]] probs:[[0.16828567 0.15134653 0.16407324 0.15072176 0.18361543 0.18195744]] entropy:[1.7887014]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:1251 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.17676412 0.07067034 0.15141046 0.06653512 0.26394704 0.2548607 ]] probs:[[0.16828622 0.15134656 0.16407317 0.15072201 0.18361646 0.1819556 ]] entropy:[1.7887017]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:1252 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.17676492 0.07067589 0.15141624 0.06653901 0.26394725 0.2548743 ]] probs:[[0.16828549 0.15134664 0.16407329 0.15072182 0.18361555 0.18195714]] entropy:[1.7887015]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:1253 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.17676267 0.07067056 0.1514102  0.066535   0.26394716 0.25485864]] probs:[[0.16828607 0.1513467  0.16407323 0.15072207 0.18361658 0.18195534]] entropy:[1.7887017]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:1254 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.17676343 0.07067613 0.15141597 0.06653891 0.26394746 0.25487232]] probs:[[0.16828535 0.15134679 0.16407335 0.15072191 0.18361571 0.18195692]] entropy:[1.7887017]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-1.4542753] v_loss:[[6.3856896e-06]]
DEBUG:chainerrl.agents.a3c:grad norm:0.13714161422035548
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:1255 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.10012488 0.16653061 0.13446508 0.00351071 0.1626485  0.35973805]] probs:[[0.15692547 0.16770001 0.16240792 0.14247362 0.16705024 0.20344266]] entropy:[1.7857747]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:1256 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.10010147 0.1661759  0.1348913  0.00304239 0.16222845 0.3595201 ]] probs:[[0.1569493  0.1676699  0.16250563 0.14243186 0.16700934 0.20343395]] entropy:[1.785774]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:1257 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.10012612 0.16656114 0.1344295  0.0035497  0.16268456 0.35975537]] probs:[[0.15692338 0.16770269 0.16239977 0.1424771  0.16705383 0.20344321]] entropy:[1.785775]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:1258 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.10010078 0.16617623 0.13489103 0.00304223 0.16222851 0.35951903]] probs:[[0.15694925 0.16767003 0.16250564 0.14243189 0.16700943 0.20343381]] entropy:[1.7857744]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:1259 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.10012545 0.16656148 0.1344292  0.00354953 0.16268457 0.3597543 ]] probs:[[0.15692332 0.16770278 0.16239978 0.14247712 0.16705388 0.20344305]] entropy:[1.785775]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:1260 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.10012501 0.16656104 0.13442971 0.00354879 0.16268398 0.3597534 ]] probs:[[0.15692332 0.16770281 0.16239993 0.14247708 0.16705386 0.20344298]] entropy:[1.785775]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:1261 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.10009971 0.16617656 0.13489057 0.00304221 0.16222854 0.35951754]] probs:[[0.15694916 0.16767016 0.16250564 0.14243196 0.16700952 0.20343362]] entropy:[1.7857744]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:1262 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.10012438 0.16656192 0.1344288  0.00354942 0.16268481 0.35975274]] probs:[[0.15692325 0.16770296 0.1623998  0.14247717 0.16705403 0.20344286]] entropy:[1.7857751]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-1.5198094] v_loss:[[0.00010313]]
DEBUG:chainerrl.agents.a3c:grad norm:0.499946546900638
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:1263 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.1473836  0.12310471 0.1394484  0.00209775 0.12480991 0.28866372]] probs:[[0.1677202  0.16369718 0.16639458 0.14504026 0.16397655 0.19317128]] entropy:[1.7882355]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:1264 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.14744605 0.12260152 0.14015448 0.00151651 0.12419265 0.28830808]] probs:[[0.16776565 0.16364893 0.16654682 0.14498618 0.16390954 0.19314285]] entropy:[1.7882302]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:1265 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.14738122 0.12311813 0.13943684 0.00210783 0.12482413 0.28867108]] probs:[[0.16771895 0.16369852 0.1663918  0.14504096 0.16397804 0.19317171]] entropy:[1.7882355]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:1266 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.14738104 0.12311707 0.13943812 0.00210666 0.12482297 0.28866974]] probs:[[0.167719   0.16369845 0.16639212 0.14504088 0.16397794 0.19317156]] entropy:[1.7882354]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:1267 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.1474452  0.12260156 0.14015415 0.00151656 0.12419268 0.28830627]] probs:[[0.1677656  0.16364905 0.16654687 0.14498627 0.16390964 0.19314262]] entropy:[1.7882302]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-3.4353442] v_loss:[[0.10370687]]
DEBUG:chainerrl.agents.a3c:grad norm:417.062135193463
DEBUG:chainerrl.agents.a3c:update


INFO: outdir:result global_step:2603 local_step:1267 R:0.75
INFO: statistics:[('average_value', 0.21199873739921113), ('average_entropy', 1.2847700697476943)]
DEBUG: Closing video encoder: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000001.mp4


DEBUG:chainerrl.agents.a3c:t:1268 r:0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.10898884  0.14242293 -0.03842914  0.05482633  0.52264607  0.25099912]] probs:[[0.1273733  0.16378161 0.13668539 0.1500453  0.23954877 0.1825657 ]] entropy:[1.7682036]
DEBUG:chainerrl.agents.a3c:t:1269 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.15254852  0.15469225 -0.07283732  0.06484813  0.6306963   0.19726104]] probs:[[0.12058596 0.16395691 0.1305915  0.14986871 0.2639099  0.17108704]] entropy:[1.7554735]
DEBUG:chainerrl.agents.a3c:t:1270 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.15132433  0.15983208 -0.07551658  0.08083294  0.6564825   0.1798445 ]] probs:[[0.11989467 0.16365655 0.129337   0.15122531 0.26892176 0.16696471]] entropy:[1.7527758]
DEBUG:chainerrl.agents.a3c:t:1271 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.1449523   0.16245851 -0.07302548  0.09660236  0.6673861   0.16904382]] probs:[[0.

DEBUG:chainerrl.agents.a3c:t:1297 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.08207134  0.0780493   0.02851561  0.02136177  0.2667872   0.22656624]] probs:[[0.13930342 0.16349381 0.15559265 0.15448354 0.19745545 0.1896712 ]] entropy:[1.7843134]
DEBUG:chainerrl.agents.a3c:t:1298 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.0769197   0.09238847  0.03575308  0.02144523  0.27610752  0.21586356]] probs:[[0.1394576  0.16518545 0.1560901  0.15387268 0.19849974 0.18689442]] entropy:[1.7845091]
DEBUG:chainerrl.agents.a3c:t:1299 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.07053181  0.11862032  0.05776084  0.03204094  0.28780606  0.1943106 ]] probs:[[0.13913424 0.16810544 0.15817973 0.15416324 0.19909401 0.1813233 ]] entropy:[1.7850472]
DEBUG:chainerrl.agents.a3c:pi_loss:[-1.5477432] v_loss:[[0.00020504]]
DEBUG:chainerrl.agents.a3c:grad norm:4.249661494218694
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3

DEBUG:chainerrl.agents.a3c:t:1325 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.03088882 -0.00144224  0.1634723  -0.08650711  0.18221109  0.2563404 ]] probs:[[0.15587364 0.15091467 0.17797251 0.13860798 0.18133895 0.19529226]] entropy:[1.7847952]
DEBUG:chainerrl.agents.a3c:t:1326 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.03288854 -0.00248708  0.169697   -0.08604459  0.17313413  0.26315442]] probs:[[0.15602548 0.15060247 0.17890014 0.1385299  0.1795161  0.19642588]] entropy:[1.7846577]
DEBUG:chainerrl.agents.a3c:t:1327 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.03288087  0.00241429  0.16365542 -0.0836216   0.18359776  0.2590929 ]] probs:[[0.15585452 0.15117776 0.17762908 0.13871486 0.18120697 0.19541687]] entropy:[1.7848529]
DEBUG:chainerrl.agents.a3c:t:1328 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.02256692 -0.02159709  0.13837604 -0.09917289  0.17669386  0.23316036]] probs:[[

DEBUG:chainerrl.agents.a3c:t:1354 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.06694333  0.01499557  0.11261508 -0.04321695  0.11623055  0.19261394]] probs:[[0.14698388 0.15953475 0.17589396 0.15051298 0.17653105 0.19054344]] entropy:[1.7874131]
DEBUG:chainerrl.agents.a3c:t:1355 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.02956967  0.04098633  0.09449202 -0.0323715   0.09878863  0.19267349]] probs:[[0.15178588 0.16288213 0.17183463 0.1513612  0.17257452 0.1895616 ]] entropy:[1.7886178]
DEBUG:chainerrl.agents.a3c:pi_loss:[-1.3475835] v_loss:[[0.0001766]]
DEBUG:chainerrl.agents.a3c:grad norm:1.0841723137686796
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:1356 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.03697791  0.09496661  0.09941211 -0.05811881  0.08165288  0.15471698]] probs:[[0.16118503 0.17080826 0.17156926 0.14656311 0.16854922 0.18132517]] entropy:[1.7896444]
DEBUG:chainerrl.agents.a3

DEBUG:chainerrl.agents.a3c:t:1382 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.13869222  0.10631357  0.10377219 -0.00764776  0.11419098  0.16174376]] probs:[[0.17250891 0.16701277 0.16658886 0.14902423 0.16833358 0.17653169]] entropy:[1.7903947]
DEBUG:chainerrl.agents.a3c:t:1383 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.17124    0.14889538 0.14532967 0.01732207 0.13255037 0.1813456 ]] probs:[[0.17295432 0.16913258 0.16853057 0.14828107 0.16639057 0.17471097]] entropy:[1.7903692]
DEBUG:chainerrl.agents.a3c:t:1384 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.17391925 0.14816968 0.1512059  0.01920555 0.13360994 0.17881666]] probs:[[0.17318465 0.16878214 0.16929539 0.14836043 0.16634251 0.17403488]] entropy:[1.7903945]
DEBUG:chainerrl.agents.a3c:t:1385 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.18359405 0.1454734  0.1548748  0.02342272 0.125286   0.17346278]] probs:[[0.17483851 0.16829

DEBUG:chainerrl.agents.a3c:t:1411 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.00590242  0.0412731   0.16905276  0.02669206  0.1193071   0.03178161]] probs:[[0.15517302 0.1626688  0.18484098 0.16031413 0.17587091 0.16113213]] entropy:[1.7898834]
DEBUG:chainerrl.agents.a3c:pi_loss:[-1.6214842] v_loss:[[0.00039762]]
DEBUG:chainerrl.agents.a3c:grad norm:2.413716342755206
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:1412 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.02796827  0.05643218  0.1708464   0.02098328  0.12033035  0.0347571 ]] probs:[[0.15191142 0.16528943 0.18532524 0.15953274 0.17619586 0.16174532]] entropy:[1.7895772]
DEBUG:chainerrl.agents.a3c:t:1413 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.0226579   0.05591913  0.17658165  0.02395171  0.12398189  0.03775648]] probs:[[0.15220357 0.16464572 0.18576056 0.15946566 0.17624213 0.16168231]] entropy:[1.7895433]
DEBUG:chainerrl.agents.a3

DEBUG:chainerrl.agents.a3c:t:1439 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.01300224  0.04754624  0.12094065 -0.00616207  0.07595306  0.07477801]] probs:[[0.15631375 0.16607071 0.1787178  0.15738662 0.1708559  0.17065525]] entropy:[1.7906415]
DEBUG:chainerrl.agents.a3c:t:1440 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.01285374  0.04784269  0.12092002 -0.00613237  0.07626835  0.07485924]] probs:[[0.15631488 0.1660965  0.1786889  0.15736908 0.17088565 0.17064501]] entropy:[1.7906423]
DEBUG:chainerrl.agents.a3c:t:1441 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.01269392  0.04815322  0.12089124 -0.00610882  0.0765966   0.07496005]] probs:[[0.15631667 0.1661234  0.17865722 0.15734941 0.17091636 0.17063688]] entropy:[1.790643]
DEBUG:chainerrl.agents.a3c:t:1442 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.01247585  0.04857884  0.12085422 -0.00608594  0.07704326  0.07510776]] probs:[[0

DEBUG:chainerrl.agents.a3c:pi_loss:[-1.4900501] v_loss:[[4.0266546e-05]]
DEBUG:chainerrl.agents.a3c:grad norm:0.1104335992804585
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:1468 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.02385497  0.06824297  0.08427679 -0.01465185  0.11291336  0.13702059]] probs:[[0.15916319 0.16638727 0.16907659 0.15315083 0.17398837 0.17823371]] entropy:[1.790456]
DEBUG:chainerrl.agents.a3c:t:1469 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.02538317  0.0741424   0.09633329 -0.02115732  0.10122033  0.15968078]] probs:[[0.15871653 0.1666472  0.17038658 0.15149903 0.17122132 0.18152936]] entropy:[1.790108]
DEBUG:chainerrl.agents.a3c:t:1470 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.03066226  0.07529356  0.11184635 -0.02647447  0.08315954  0.19031161]] probs:[[0.15868793 0.1659308  0.17210825 0.14987518 0.16724116 0.18615672]] entropy:[1.7895015]
DEBUG:chainerrl.agents.

DEBUG:chainerrl.agents.a3c:t:1496 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.02617416  0.21179126  0.06271683 -0.10197365  0.11449658  0.20358197]] probs:[[0.14929903 0.1894105  0.16317809 0.1384005  0.17184998 0.18786193]] entropy:[1.78538]
DEBUG:chainerrl.agents.a3c:t:1497 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.00481221  0.21566726  0.06405849 -0.10408778  0.11342487  0.21895121]] probs:[[0.15152086 0.18889716 0.16232395 0.1372011  0.17053838 0.18951851]] entropy:[1.785254]
DEBUG:chainerrl.agents.a3c:t:1498 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.00594601  0.21656527  0.06578357 -0.10486113  0.1133881   0.22104008]] probs:[[0.15126412 0.18896063 0.16251284 0.137018   0.17043628 0.18980809]] entropy:[1.7851554]
DEBUG:chainerrl.agents.a3c:t:1499 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.02767682  0.22573419  0.06214757 -0.08920239  0.11529265  0.20556904]] probs:[[0.1

DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:1524 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.09004552  0.05029767  0.02716917 -0.14549114  0.13364948  0.13876167]] probs:[[0.17286895 0.16613254 0.16233423 0.13659182 0.18057346 0.18149896]] entropy:[1.7874887]
DEBUG:chainerrl.agents.a3c:t:1525 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.08867342  0.05001271  0.02953315 -0.13907193  0.13584822  0.13987716]] probs:[[0.17235968 0.16582328 0.16246183 0.13725467 0.18068555 0.18141499]] entropy:[1.787637]
DEBUG:chainerrl.agents.a3c:t:1526 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.08868744  0.05012859  0.0293323  -0.1380822   0.13632359  0.14037369]] probs:[[0.17231023 0.16579261 0.16238034 0.13734923 0.18071707 0.18145047]] entropy:[1.7876492]
DEBUG:chainerrl.agents.a3c:t:1527 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.08840422  0.04996799  0.02967345 -0.1383459   

DEBUG:chainerrl.agents.a3c:t:1553 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.07649348  0.0610096   0.09325433 -0.04120271  0.02028194  0.13483104]] probs:[[0.16960895 0.16700298 0.1724757  0.15077661 0.16033798 0.17979783]] entropy:[1.7902247]
DEBUG:chainerrl.agents.a3c:t:1554 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.07744726  0.06453843  0.09324826 -0.04208557  0.01974432  0.13309751]] probs:[[0.16973336 0.16755639 0.17243662 0.15061033 0.16021647 0.17944685]] entropy:[1.7902263]
DEBUG:chainerrl.agents.a3c:t:1555 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.07530474  0.06895587  0.09085588 -0.0486834   0.02078917  0.13234437]] probs:[[0.1695382  0.16846523 0.17219532 0.1497684  0.16054314 0.1794897 ]] entropy:[1.7901523]
DEBUG:chainerrl.agents.a3c:pi_loss:[-1.4671888] v_loss:[[4.815605e-05]]
DEBUG:chainerrl.agents.a3c:grad norm:0.6796018758038422
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents

DEBUG:chainerrl.agents.a3c:t:1582 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.23460504 0.18933977 0.1501353  0.06285034 0.17474948 0.25678667]] probs:[[0.17601731 0.16822748 0.16175981 0.14823925 0.16579081 0.17996529]] entropy:[1.789844]
DEBUG:chainerrl.agents.a3c:t:1583 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.2094285  0.19117802 0.15127844 0.06159988 0.1989235  0.2645089 ]] probs:[[0.17140375 0.16830392 0.16172087 0.14784926 0.16961259 0.18110959]] entropy:[1.7898717]
DEBUG:chainerrl.agents.a3c:t:1584 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.21622182 0.19275194 0.15357532 0.06256088 0.19333945 0.26278615]] probs:[[0.17245291 0.16845259 0.16198078 0.1478892  0.16855158 0.18067296]] entropy:[1.7899008]
DEBUG:chainerrl.agents.a3c:t:1585 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.20418102 0.18862277 0.14063327 0.07425681 0.19030085 0.26079285]] probs:[[0.17106842 0.1684275  0.1

DEBUG:chainerrl.agents.a3c:t:1611 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.1735695  0.23403518 0.2562506  0.03406975 0.07281039 0.2878381 ]] probs:[[0.16546638 0.17578009 0.1797288  0.14392154 0.14960656 0.18549661]] entropy:[1.7874532]
DEBUG:chainerrl.agents.a3c:pi_loss:[-1.3794911] v_loss:[[5.6964862e-05]]
DEBUG:chainerrl.agents.a3c:grad norm:1.780986185935903
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:1612 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.20974727 0.2055436  0.22590595 0.0498908  0.08854956 0.25750557]] probs:[[0.17243946 0.1717161  0.17524849 0.1469643  0.15275702 0.18087469]] entropy:[1.7889678]
DEBUG:chainerrl.agents.a3c:t:1613 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.20606408 0.20650858 0.2306533  0.05220867 0.08862469 0.25727743]] probs:[[0.17168924 0.17176558 0.17596328 0.14720567 0.15266511 0.18071108]] entropy:[1.7889884]
DEBUG:chainerrl.agents.a3c:t:1614 r:0.0 

DEBUG:chainerrl.agents.a3c:t:1640 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.23362695 0.21904114 0.18519673 0.11817697 0.10801252 0.27219254]] probs:[[0.17389843 0.1713804  0.16567717 0.15493745 0.15337056 0.18073593]] entropy:[1.7899903]
DEBUG:chainerrl.agents.a3c:t:1641 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.22831583 0.22461015 0.18494526 0.12353496 0.10742357 0.2683615 ]] probs:[[0.17296948 0.17232968 0.16562803 0.15576278 0.15327333 0.18003671]] entropy:[1.7901028]
DEBUG:chainerrl.agents.a3c:t:1642 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.22611172 0.22749442 0.1834495  0.12362103 0.10841504 0.26527804]] probs:[[0.17267834 0.17291726 0.16546643 0.15585715 0.1535051  0.1795757 ]] entropy:[1.7901523]
DEBUG:chainerrl.agents.a3c:t:1643 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.22284777 0.23314136 0.18088073 0.12235378 0.11062159 0.25884408]] probs:[[0.17229103 0.1740737  0.

DEBUG:chainerrl.agents.a3c:t:1669 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.20188408 0.2715454  0.2314841  0.11213048 0.0753253  0.19425112]] probs:[[0.16978326 0.18203227 0.17488398 0.15520845 0.14959984 0.16849224]] entropy:[1.7895254]
DEBUG:chainerrl.agents.a3c:t:1670 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.18482988 0.2855814  0.24363133 0.11691967 0.06372175 0.19398189]] probs:[[0.16677666 0.18445529 0.17687744 0.15582684 0.1477538  0.16831002]] entropy:[1.789051]
DEBUG:chainerrl.agents.a3c:t:1671 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.17954096 0.2860149  0.24710758 0.12019251 0.06532247 0.19388826]] probs:[[0.16580606 0.18443419 0.17739616 0.15625204 0.14790945 0.16820207]] entropy:[1.7890671]
DEBUG:chainerrl.agents.a3c:t:1672 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.18959913 0.27057168 0.23589377 0.1164518  0.07163395 0.19254681]] probs:[[0.16798627 0.18215443 0.1

DEBUG:chainerrl.agents.a3c:t:1698 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.16885804 0.21110235 0.1795966  0.18007857 0.09504464 0.25425646]] probs:[[0.16438648 0.17147964 0.16616127 0.16624138 0.15268956 0.17904168]] entropy:[1.790622]
DEBUG:chainerrl.agents.a3c:t:1699 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.18461795 0.21728139 0.13942957 0.16131575 0.09213986 0.2879704 ]] probs:[[0.16704144 0.17258766 0.15966111 0.163194   0.15228653 0.18522926]] entropy:[1.789833]
DEBUG:chainerrl.agents.a3c:pi_loss:[-1.5300124] v_loss:[[0.00010515]]
DEBUG:chainerrl.agents.a3c:grad norm:0.8091243805232055
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:1700 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.44850126 0.20476411 0.13750194 0.11215281 0.04059008 0.13459305]] probs:[[0.21615052 0.16939577 0.15837659 0.15441234 0.1437483  0.15791656]] entropy:[1.7824676]
DEBUG:chainerrl.agents.a3c:t:1701 r:0.0 a:4 

DEBUG:chainerrl.agents.a3c:t:1727 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.33374733 0.15408114 0.16363624 0.13612482 0.1909727  0.17559285]] probs:[[0.19155374 0.16005255 0.1615892  0.15720426 0.1660674  0.16353287]] entropy:[1.789472]
DEBUG:chainerrl.agents.a3c:t:1728 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.34392393 0.1616004  0.16356689 0.1282443  0.18421389 0.17444915]] probs:[[0.19339381 0.16116118 0.16147842 0.15587415 0.16484714 0.16324528]] entropy:[1.7891464]
DEBUG:chainerrl.agents.a3c:t:1729 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.34318838 0.15805094 0.1602117  0.12210977 0.19292758 0.17692026]] probs:[[0.19332136 0.16064812 0.16099562 0.15497677 0.16634984 0.16370822]] entropy:[1.789079]
DEBUG:chainerrl.agents.a3c:t:1730 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.3435777  0.16408111 0.16501276 0.13015488 0.18086222 0.17459698]] probs:[[0.19326162 0.16150695 0.16

DEBUG:chainerrl.agents.a3c:grad norm:0.42288235218704695
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:1756 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.1859696  0.21301739 0.09919126 0.14678289 0.30021304 0.15207154]] probs:[[0.16684566 0.17142005 0.1529775  0.16043396 0.18703815 0.1612847 ]] entropy:[1.789711]
DEBUG:chainerrl.agents.a3c:t:1757 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.18690816 0.22315015 0.12350121 0.12164244 0.2879784  0.17781532]] probs:[[0.16640072 0.17254202 0.1561773  0.15588728 0.18409815 0.16489452]] entropy:[1.7900724]
DEBUG:chainerrl.agents.a3c:t:1758 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.19474733 0.2207736  0.12173443 0.11132885 0.2931844  0.18037486]] probs:[[0.16764183 0.17206219 0.15583797 0.1542248  0.18498354 0.16524963]] entropy:[1.7898567]
DEBUG:chainerrl.agents.a3c:t:1759 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.19066453

DEBUG:chainerrl.agents.a3c:t:1785 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.21239592 0.17619331 0.11582729 0.11123    0.24186246 0.21112429]] probs:[[0.17227094 0.16614582 0.15641299 0.15569554 0.17742269 0.17205201]] entropy:[1.7905481]
DEBUG:chainerrl.agents.a3c:t:1786 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.20468129 0.16339068 0.11557202 0.11766935 0.22320016 0.19285257]] probs:[[0.17247641 0.16549978 0.15777203 0.15810329 0.17570023 0.17044824]] entropy:[1.7909067]
DEBUG:chainerrl.agents.a3c:t:1787 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.20383228 0.16240153 0.11558181 0.11879019 0.22125451 0.18989031]] probs:[[0.17249854 0.16549782 0.15792786 0.15843536 0.17553018 0.17011026]] entropy:[1.7909474]
DEBUG:chainerrl.agents.a3c:pi_loss:[-1.3889902] v_loss:[[4.0747644e-05]]
DEBUG:chainerrl.agents.a3c:grad norm:0.30930592884310426
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:1788 r:0.

DEBUG:chainerrl.agents.a3c:t:1814 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.16730066 0.18793602 0.10269152 0.13204436 0.2017419  0.1500132 ]] probs:[[0.16830644 0.17181557 0.15777613 0.16247596 0.1742041  0.16542184]] entropy:[1.7912056]
DEBUG:chainerrl.agents.a3c:t:1815 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.166613   0.19658917 0.0914865  0.13358384 0.21618865 0.14396508]] probs:[[0.16795436 0.17306522 0.15579885 0.16249758 0.17649066 0.1641933 ]] entropy:[1.7909158]
DEBUG:chainerrl.agents.a3c:t:1816 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.16706152 0.18636411 0.10621955 0.13010542 0.20095363 0.15204503]] probs:[[0.16824399 0.17152306 0.15831286 0.16213983 0.17404385 0.16573642]] entropy:[1.7912434]
DEBUG:chainerrl.agents.a3c:t:1817 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.16696988 0.18442203 0.10826048 0.13012607 0.1981684  0.15287994]] probs:[[0.16829035 0.17125314 0.

DEBUG:chainerrl.agents.a3c:t:1843 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.12930217 0.18818673 0.16771914 0.10673803 0.14688434 0.19520208]] probs:[[0.16224909 0.17208995 0.16860348 0.15862907 0.165127   0.17330146]] entropy:[1.7912674]
DEBUG:chainerrl.agents.a3c:pi_loss:[-1.4725797] v_loss:[[2.5964619e-05]]
DEBUG:chainerrl.agents.a3c:grad norm:0.07867231726023342
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:1844 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.14418274 0.19057626 0.15942936 0.11534335 0.14523613 0.17935091]] probs:[[0.16471018 0.17253171 0.16724071 0.16002789 0.16488378 0.17060581]] entropy:[1.7914554]
DEBUG:chainerrl.agents.a3c:t:1845 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.1776726  0.17848986 0.16055647 0.11902921 0.11602209 0.18966417]] probs:[[0.17009217 0.17023124 0.16720563 0.16040424 0.1599226  0.17214413]] entropy:[1.7913392]
DEBUG:chainerrl.agents.a3c:t:1846 r:0.

DEBUG:chainerrl.agents.a3c:t:1872 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.15027863 0.2769439  0.01021825 0.11958916 0.13459691 0.09370323]] probs:[[0.16938946 0.1922633  0.14725122 0.16426995 0.16675386 0.16007222]] entropy:[1.7885381]
DEBUG:chainerrl.agents.a3c:t:1873 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.15079695 0.27832437 0.00914846 0.11916848 0.13428476 0.0926126 ]] probs:[[0.16949417 0.1925481  0.14710845 0.16421722 0.16671842 0.15991369]] entropy:[1.7884706]
DEBUG:chainerrl.agents.a3c:t:1874 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.15093084 0.27859792 0.00883238 0.11913522 0.13428202 0.09250671]] probs:[[0.16951583 0.19259961 0.14706106 0.16421077 0.16671696 0.15989578]] entropy:[1.788456]
DEBUG:chainerrl.agents.a3c:t:1875 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.15094282 0.27862895 0.00880627 0.1191258  0.13427588 0.09248327]] probs:[[0.16951825 0.192606   0.1

DEBUG:chainerrl.agents.a3c:t:1901 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.13012178 0.16011545 0.20570299 0.14352863 0.13060018 0.13035573]] probs:[[0.16331413 0.16828673 0.17613606 0.1655184  0.16339229 0.16335236]] entropy:[1.7913842]
DEBUG:chainerrl.agents.a3c:t:1902 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.13297665 0.16166216 0.20694922 0.14379506 0.1325857  0.13064085]] probs:[[0.16355821 0.1683179  0.17611577 0.16533725 0.16349429 0.16317661]] entropy:[1.7913866]
DEBUG:chainerrl.agents.a3c:t:1903 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.13266706 0.16127872 0.20695561 0.1436408  0.13232617 0.1305096 ]] probs:[[0.16354084 0.16828759 0.1761527  0.16534537 0.1634851  0.16318838]] entropy:[1.7913848]
DEBUG:chainerrl.agents.a3c:t:1904 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.12925313 0.1594396  0.20641604 0.14320724 0.12939206 0.1292461 ]] probs:[[0.16326402 0.1682675  0.

DEBUG:chainerrl.agents.a3c:t:1930 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.13453874 0.12476879 0.16335401 0.07234929 0.15590073 0.19071156]] probs:[[0.1656016  0.16399157 0.17044288 0.15561663 0.16917725 0.17517014]] entropy:[1.7910851]
DEBUG:chainerrl.agents.a3c:t:1931 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.13560905 0.1246602  0.16033593 0.06981315 0.15427642 0.1902665 ]] probs:[[0.16596162 0.16415446 0.1701165  0.15539351 0.1690888  0.17528515]] entropy:[1.7910768]
DEBUG:chainerrl.agents.a3c:pi_loss:[-1.5917354] v_loss:[[0.00031626]]
DEBUG:chainerrl.agents.a3c:grad norm:1.0399135654431104
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:1932 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.12557468 0.14204101 0.15206358 0.09846224 0.14858739 0.17090856]] probs:[[0.16430198 0.16702983 0.1687123  0.15990719 0.16812685 0.17192183]] entropy:[1.7915024]
DEBUG:chainerrl.agents.a3c:t:1933 r:0.0 a:

DEBUG:chainerrl.agents.a3c:t:1959 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.17176051 0.1858801  0.0818121  0.14761063 0.20944145 0.23479967]] probs:[[0.16645011 0.16881698 0.1521318  0.16247852 0.17284177 0.17728078]] entropy:[1.7905953]
DEBUG:chainerrl.agents.a3c:t:1960 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.17196493 0.18541977 0.08155146 0.14728302 0.20922247 0.23462497]] probs:[[0.16651835 0.16877396 0.15212339 0.16245866 0.17283942 0.17728622]] entropy:[1.7905946]
DEBUG:chainerrl.agents.a3c:t:1961 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.17233647 0.18560043 0.08155252 0.14794536 0.20912966 0.23474382]] probs:[[0.16654605 0.16876982 0.15209235 0.16253296 0.17278793 0.17727092]] entropy:[1.7905965]
DEBUG:chainerrl.agents.a3c:t:1962 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.17017974 0.17266291 0.0823217  0.13989136 0.1703839  0.2078146 ]] probs:[[0.1687166  0.16913606 0.

DEBUG:chainerrl.agents.a3c:grad norm:105.16415708764909
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:1988 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.25733042 0.15653014 0.06425869 0.04744777 0.2661332  0.19659562]] probs:[[0.18217821 0.1647098  0.15019189 0.14768812 0.18378897 0.17144297]] entropy:[1.7881624]
DEBUG:chainerrl.agents.a3c:t:1989 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.25733048 0.15726468 0.0647089  0.04829944 0.26680622 0.19720446]] probs:[[0.18207943 0.16474144 0.15017803 0.14773381 0.18381298 0.17145434]] entropy:[1.788173]
DEBUG:chainerrl.agents.a3c:t:1990 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.25729543 0.1575801  0.06452207 0.04776971 0.2675272  0.19650485]] probs:[[0.1820818  0.16480133 0.15015718 0.14766265 0.18395437 0.17134266]] entropy:[1.7881517]
DEBUG:chainerrl.agents.a3c:t:1991 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.25702173 

DEBUG:chainerrl.agents.a3c:t:2017 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.14852047 0.2071451  0.0087664  0.0659382  0.342574   0.19256957]] probs:[[0.16366933 0.17355122 0.14232227 0.15069619 0.19872096 0.17103997]] entropy:[1.7859709]
DEBUG:chainerrl.agents.a3c:t:2018 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.15290283 0.19843923 0.00234449 0.06291322 0.34695882 0.19456793]] probs:[[0.16454223 0.1722081  0.14154376 0.15038185 0.1997813  0.17154273]] entropy:[1.7856736]
DEBUG:chainerrl.agents.a3c:t:2019 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.15365416 0.20173612 0.00355658 0.06391515 0.3472049  0.19428812]] probs:[[0.16449879 0.17260146 0.14157161 0.15037982 0.19962767 0.17132069]] entropy:[1.785698]
DEBUG:chainerrl.agents.a3c:pi_loss:[-1.4706523] v_loss:[[0.0001838]]
DEBUG:chainerrl.agents.a3c:grad norm:1.1179806755681756
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:2020 r:0.0 a:4 

DEBUG:chainerrl.agents.a3c:t:2046 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.09512734 0.10446081 0.06597551 0.19080906 0.25771934 0.214256  ]] probs:[[0.15663852 0.15810733 0.15213813 0.1723664  0.18429406 0.1764556 ]] entropy:[1.789296]
DEBUG:chainerrl.agents.a3c:t:2047 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.09512671 0.1044608  0.06596986 0.19080818 0.25771397 0.21425584]] probs:[[0.15663874 0.15810767 0.15213759 0.1723666  0.18429346 0.17645594]] entropy:[1.7892962]
DEBUG:chainerrl.agents.a3c:t:2048 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.09512869 0.10446245 0.06596883 0.19080824 0.25771463 0.21425492]] probs:[[0.15663901 0.15810788 0.15213738 0.17236656 0.18429352 0.17645574]] entropy:[1.7892962]
DEBUG:chainerrl.agents.a3c:pi_loss:[-5.093573] v_loss:[[0.27605927]]
DEBUG:chainerrl.agents.a3c:grad norm:419.5471430532931
DEBUG:chainerrl.agents.a3c:update


INFO: outdir:result global_step:4197 local_step:2048 R:0.75
INFO: statistics:[('average_value', 0.2061407254461939), ('average_entropy', 1.5580894541992718)]


DEBUG:chainerrl.agents.a3c:t:2049 r:0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.3809764  -0.21301393  0.05368926 -0.13602981  0.14881665  0.34397975]] probs:[[0.21617891 0.11935664 0.15583834 0.12890816 0.17139085 0.20832716]] entropy:[1.7676935]
DEBUG:chainerrl.agents.a3c:t:2050 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.41111764 -0.262617   -0.02469785 -0.14903174  0.22557788  0.4159354 ]] probs:[[0.219147   0.11172138 0.14173049 0.12516005 0.18203573 0.22020534]] entropy:[1.7578738]
DEBUG:chainerrl.agents.a3c:t:2051 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.41375935 -0.26709333 -0.03527528 -0.15274864  0.23544337  0.42453125]] probs:[[0.21932514 0.11101913 0.13998295 0.12446783 0.18350452 0.22170046]] entropy:[1.7564906]
DEBUG:chainerrl.agents.a3c:t:2052 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.4140373  -0.2674312  -0.03686409 -0.15348051  0.23664232  0.42566326]] probs:[[0.

DEBUG:chainerrl.agents.a3c:t:2078 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.15911238 -0.20650207  0.15137368 -0.08575377  0.1770919   0.47178373]] probs:[[0.17082089 0.11851045 0.16950408 0.13372019 0.17391995 0.23352449]] entropy:[1.7683806]
DEBUG:chainerrl.agents.a3c:t:2079 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.15908925 -0.20674165  0.15146896 -0.0857019   0.17711309  0.47183123]] probs:[[0.17081599 0.11848141 0.16951929 0.13372639 0.17392267 0.23353429]] entropy:[1.7683686]
DEBUG:chainerrl.agents.a3c:t:2080 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.15958793 -0.20288783  0.1502052  -0.08655351  0.17671792  0.47113913]] probs:[[0.17090386 0.11894073 0.16930781 0.13361461 0.17385665 0.23337634]] entropy:[1.7685572]
DEBUG:chainerrl.agents.a3c:pi_loss:[-1.4439712] v_loss:[[1.0980632e-05]]
DEBUG:chainerrl.agents.a3c:grad norm:2.279804474424461
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents

DEBUG:chainerrl.agents.a3c:t:2106 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.09082042 -0.03027106  0.15784238 -0.00762338  0.16504109  0.27789214]] probs:[[0.16275309 0.14419158 0.17403495 0.14749444 0.17529231 0.19623363]] entropy:[1.7861171]
DEBUG:chainerrl.agents.a3c:t:2107 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.08752231 -0.03107177  0.1607745  -0.00484786  0.16343462  0.2793485 ]] probs:[[0.1621727  0.14403667 0.17449814 0.14786382 0.17496295 0.19646575]] entropy:[1.786082]
DEBUG:chainerrl.agents.a3c:t:2108 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.0967385  -0.03492828  0.15981533 -0.00653768  0.15516414  0.27785096]] probs:[[0.16387168 0.14365533 0.17454115 0.14779224 0.17373121 0.19640839]] entropy:[1.7861178]
DEBUG:chainerrl.agents.a3c:t:2109 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.09087776 -0.03320436  0.16084383 -0.00594711  0.15951407  0.27881175]] probs:[[0

DEBUG:chainerrl.agents.a3c:t:2135 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.03775385 0.05772324 0.2085045  0.00352505 0.13078158 0.24513873]] probs:[[0.15383045 0.15693323 0.18247291 0.14865412 0.16882773 0.18928163]] entropy:[1.7877415]
DEBUG:chainerrl.agents.a3c:t:2136 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.03749241 0.05802927 0.20860906 0.00334275 0.13056515 0.24526706]] probs:[[0.15379214 0.1569832  0.18249424 0.14862886 0.16879328 0.18930824]] entropy:[1.7877336]
DEBUG:chainerrl.agents.a3c:pi_loss:[-1.3560555] v_loss:[[0.00015149]]
DEBUG:chainerrl.agents.a3c:grad norm:0.37197977935591153
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:2137 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.05077395 0.06309702 0.18673797 0.01895484 0.1415262  0.22510046]] probs:[[0.1559536  0.15788732 0.17866679 0.15106942 0.17076884 0.18565406]] entropy:[1.7889036]
DEBUG:chainerrl.agents.a3c:t:2138 r:0.0 a

DEBUG:chainerrl.agents.a3c:t:2164 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.11372534 0.0749379  0.2138952  0.03519212 0.06561757 0.13755149]] probs:[[0.16753426 0.16116045 0.18518546 0.15488064 0.15966536 0.1715739 ]] entropy:[1.790025]
DEBUG:chainerrl.agents.a3c:t:2165 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.11062276 0.07957295 0.21181136 0.03646124 0.06899335 0.13673335]] probs:[[0.16694175 0.16183788 0.18471858 0.15500902 0.16013472 0.1713581 ]] entropy:[1.7901319]
DEBUG:chainerrl.agents.a3c:t:2166 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.10256594 0.07877978 0.20823264 0.04024439 0.06032766 0.12943012]] probs:[[0.16629538 0.1623865  0.18482922 0.15624793 0.15941761 0.1708233 ]] entropy:[1.790204]
DEBUG:chainerrl.agents.a3c:t:2167 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.10130716 0.07910785 0.20744336 0.04218971 0.05707988 0.12631576]] probs:[[0.16626002 0.16260982 0.18

DEBUG:chainerrl.agents.a3c:pi_loss:[-0.759095] v_loss:[[0.00768812]]
DEBUG:chainerrl.agents.a3c:grad norm:20.50757459894297
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:2193 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.0630423  0.08623021 0.11715543 0.09065676 0.17335467 0.1431862 ]] probs:[[0.15855066 0.16227008 0.16736671 0.16298996 0.17704192 0.1717806 ]] entropy:[1.7910635]
DEBUG:chainerrl.agents.a3c:t:2194 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.05679418 0.08844014 0.11919904 0.09503272 0.17008364 0.13744405]] probs:[[0.15774184 0.16281357 0.16789934 0.16389047 0.17666394 0.1709908 ]] entropy:[1.7910937]
DEBUG:chainerrl.agents.a3c:t:2195 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.05646697 0.09337611 0.11556433 0.09794196 0.17831556 0.14349097]] probs:[[0.15719983 0.16311035 0.16676992 0.16385677 0.17757025 0.17149287]] entropy:[1.791005]
DEBUG:chainerrl.agents.a3c:t:2196 r:0.0 a:2 p

DEBUG:chainerrl.agents.a3c:t:2221 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.01635454  0.00331749  0.313686    0.07149214  0.15577787  0.05563482]] probs:[[0.1478155  0.15075211 0.20561497 0.16138801 0.17558043 0.15884902]] entropy:[1.7851279]
DEBUG:chainerrl.agents.a3c:t:2222 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.01659603  0.00347006  0.3137216   0.07144054  0.15586092  0.05565204]] probs:[[0.14777926 0.15077457 0.20562154 0.1613791  0.17559437 0.15885118]] entropy:[1.7851232]
DEBUG:chainerrl.agents.a3c:t:2223 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.01719086  0.00384326  0.31355637  0.07130276  0.15543285  0.05520803]] probs:[[0.14772588 0.15086608 0.20563559 0.16139455 0.17556022 0.15881775]] entropy:[1.7851237]
DEBUG:chainerrl.agents.a3c:t:2224 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.01658562  0.00349658  0.3136098   0.07119928  0.15596561  0.05561209]] probs:[[

DEBUG:chainerrl.agents.a3c:t:2250 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.06924639  0.17419158  0.16383801  0.08968325  0.14359906  0.06461868]] probs:[[0.14102775 0.17989884 0.17804585 0.16532056 0.1744786  0.16122836]] entropy:[1.7885104]
DEBUG:chainerrl.agents.a3c:t:2251 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.06910055  0.17408942  0.16398257  0.08970644  0.14366674  0.06471525]] probs:[[0.14103998 0.17986983 0.17806107 0.16531461 0.17448011 0.16123441]] entropy:[1.7885138]
DEBUG:chainerrl.agents.a3c:t:2252 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.07401988  0.17671986  0.16170567  0.08876781  0.14155453  0.06248816]] probs:[[0.14055943 0.18061545 0.1779239  0.16540849 0.17437443 0.16111824]] entropy:[1.7883849]
DEBUG:chainerrl.agents.a3c:t:2253 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.07489029  0.17723359  0.1613615   0.08855139  0.14109835  0.06204575]] probs:[[

DEBUG:chainerrl.agents.a3c:t:2279 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.03274585 0.15508284 0.15795374 0.12709823 0.21329705 0.13916603]] probs:[[0.14986588 0.16936865 0.1698556  0.16469465 0.17952096 0.1666942 ]] entropy:[1.7903353]
DEBUG:chainerrl.agents.a3c:t:2280 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.0327475  0.15508176 0.15795338 0.12709743 0.2132969  0.13916592]] probs:[[0.14986616 0.16936852 0.16985558 0.16469455 0.17952098 0.16669421]] entropy:[1.7903354]
DEBUG:chainerrl.agents.a3c:pi_loss:[-1.1775962] v_loss:[[0.00470691]]
DEBUG:chainerrl.agents.a3c:grad norm:62.96098683543786
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:2281 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.07730321 0.04893568 0.17976567 0.12635496 0.0806333  0.30976778]] probs:[[0.15636623 0.15199283 0.17323749 0.1642275  0.15688781 0.19728817]] entropy:[1.7876751]
DEBUG:chainerrl.agents.a3c:t:2282 r:0.0 a:2

DEBUG:chainerrl.agents.a3c:t:2308 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.08793882  0.21341331  0.12901004  0.04006567 -0.00828886  0.36368597]] probs:[[0.15736535 0.17840293 0.1639631  0.15000926 0.14292821 0.20733117]] entropy:[1.7838299]
DEBUG:chainerrl.agents.a3c:t:2309 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.08816785  0.21599826  0.12708625  0.03861228 -0.01201394  0.3618269 ]] probs:[[0.15755133 0.17903507 0.16380386 0.14993408 0.14253242 0.2071432 ]] entropy:[1.7837653]
DEBUG:chainerrl.agents.a3c:t:2310 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.08937152  0.21165514  0.1297868   0.04063379 -0.00989376  0.3584429 ]] probs:[[0.15777849 0.17830145 0.16428576 0.15027311 0.14286882 0.20649236]] entropy:[1.7840648]
DEBUG:chainerrl.agents.a3c:t:2311 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.0918973   0.20970821  0.1383163   0.0447127  -0.00806403  0.35701546]] probs:[[

DEBUG:chainerrl.agents.a3c:pi_loss:[-1.6686803] v_loss:[[0.0007463]]
DEBUG:chainerrl.agents.a3c:grad norm:1.5645338117123506
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:2337 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.06605712 0.09559937 0.02048371 0.14807752 0.26110896 0.23608473]] probs:[[0.15452076 0.15915376 0.14763677 0.16772887 0.18780051 0.18315926]] entropy:[1.7879231]
DEBUG:chainerrl.agents.a3c:t:2338 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.06586357 0.0959091  0.02035957 0.14794916 0.26102668 0.23635858]] probs:[[0.15448867 0.15920079 0.14761636 0.16770497 0.1877824  0.18320684]] entropy:[1.7879183]
DEBUG:chainerrl.agents.a3c:t:2339 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.06693791 0.09109389 0.02026724 0.14943075 0.2632305  0.2351261 ]] probs:[[0.15468179 0.15846379 0.14762856 0.16798303 0.18822964 0.18301319]] entropy:[1.7878611]
DEBUG:chainerrl.agents.a3c:t:2340 r:0.0 a:0

DEBUG:chainerrl.agents.a3c:t:2366 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.08527614 0.10315188 0.09977791 0.14334217 0.20388354 0.18742384]] probs:[[0.15808018 0.1609314  0.16038932 0.167531   0.17798688 0.17508124]] entropy:[1.7907275]
DEBUG:chainerrl.agents.a3c:t:2367 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.08787366 0.10745658 0.10877133 0.15046893 0.21413581 0.18031509]] probs:[[0.15780543 0.16092618 0.1611379  0.167999   0.1790428  0.1730887 ]] entropy:[1.7907509]
DEBUG:chainerrl.agents.a3c:t:2368 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.08665359 0.10560152 0.10638953 0.14906533 0.21033503 0.17734341]] probs:[[0.15797675 0.16099863 0.16112554 0.16815053 0.17877524 0.17297338]] entropy:[1.7907841]
DEBUG:chainerrl.agents.a3c:pi_loss:[-1.5435152] v_loss:[[0.00016554]]
DEBUG:chainerrl.agents.a3c:grad norm:0.6962642006709909
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:2369 r:0.0 a:

DEBUG:chainerrl.agents.a3c:t:2395 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.1965268   0.04945186  0.02030288  0.04579307 -0.01207196  0.377209  ]] probs:[[0.17948516 0.15493684 0.15048578 0.154371   0.14569184 0.21502939]] entropy:[1.7817774]
DEBUG:chainerrl.agents.a3c:t:2396 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.19567445  0.04941168  0.02001436  0.04560172 -0.01179861  0.3773153 ]] probs:[[0.17936261 0.15495686 0.15046786 0.15436761 0.14575636 0.21508868]] entropy:[1.7817793]
DEBUG:chainerrl.agents.a3c:t:2397 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.1955723   0.04934417  0.02001248  0.04559122 -0.01176268  0.37733126]] probs:[[0.17934825 0.15494983 0.1504709  0.15436938 0.14576481 0.21509686]] entropy:[1.7817794]
DEBUG:chainerrl.agents.a3c:t:2398 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.19563374  0.04935508  0.02004184  0.04562604 -0.01177012  0.37735057]] probs:[[

DEBUG:chainerrl.agents.a3c:t:2424 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.04332931 0.06196239 0.21449502 0.08737712 0.06936955 0.30282515]] probs:[[0.15213323 0.15499452 0.18053459 0.15898414 0.15614684 0.19720672]] entropy:[1.7869661]
DEBUG:chainerrl.agents.a3c:pi_loss:[-1.6145067] v_loss:[[0.00043388]]
DEBUG:chainerrl.agents.a3c:grad norm:2.5007253506480627
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:2425 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.0058929   0.09041889  0.20423661  0.09834029  0.10606367  0.25402293]] probs:[[0.1457708  0.16050857 0.17985754 0.16178507 0.16303945 0.18903863]] entropy:[1.7882168]
DEBUG:chainerrl.agents.a3c:t:2426 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.00532921  0.08967146  0.2053158   0.09778583  0.1063954   0.25481078]] probs:[[0.14581364 0.16034536 0.18000315 0.16165176 0.16304952 0.18913658]] entropy:[1.7881889]
DEBUG:chainerrl.agents.a3c:t:2

DEBUG:chainerrl.agents.a3c:t:2453 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.08469172 0.13499166 0.17419751 0.13447289 0.08311358 0.14487168]] probs:[[0.15982836 0.16807334 0.17479368 0.16798618 0.15957633 0.16974214]] entropy:[1.7912298]
DEBUG:chainerrl.agents.a3c:t:2454 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.08361986 0.13570906 0.17495005 0.13636437 0.08350256 0.14321324]] probs:[[0.15962844 0.16816372 0.17489383 0.16827396 0.15960972 0.1694304 ]] entropy:[1.7912202]
DEBUG:chainerrl.agents.a3c:t:2455 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.08322192 0.13550706 0.17669411 0.13787745 0.08419083 0.14201128]] probs:[[0.15950605 0.16806774 0.17513448 0.1684666  0.15966068 0.16916445]] entropy:[1.7912083]
DEBUG:chainerrl.agents.a3c:t:2456 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.08057059 0.13718368 0.1781932  0.14209396 0.084912   0.13850231]] probs:[[0.15902697 0.16828969 0.

DEBUG:chainerrl.agents.a3c:t:2482 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.12456925 0.07674136 0.18166134 0.12540951 0.06758026 0.12284802]] probs:[[0.16790423 0.16006275 0.17776915 0.16804537 0.15860309 0.16761547]] entropy:[1.791054]
DEBUG:chainerrl.agents.a3c:t:2483 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.12178082 0.06361242 0.15968597 0.11040831 0.03835333 0.10369134]] probs:[[0.17027518 0.1606531  0.17685337 0.1683497  0.15664597 0.16722268]] entropy:[1.7909888]
DEBUG:chainerrl.agents.a3c:t:2484 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.11503683 0.04498908 0.12789404 0.09533121 0.00342161 0.07555631]] probs:[[0.17296688 0.16126554 0.1752051  0.16959181 0.15469955 0.16627109]] entropy:[1.7908719]
DEBUG:chainerrl.agents.a3c:t:2485 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.11452708  0.04061593  0.11809091  0.09070826 -0.00651249  0.06518197]] probs:[[0.17401478 0.161616

DEBUG:chainerrl.agents.a3c:t:2511 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.08532391  0.01675421  0.11935698  0.05118002 -0.02654857 -0.02722493]] probs:[[0.17474906 0.16316815 0.18079866 0.16888314 0.15625331 0.15614766]] entropy:[1.7902606]
DEBUG:chainerrl.agents.a3c:t:2512 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.08642208  0.0139368   0.12332335  0.05202748 -0.0240018  -0.0258937 ]] probs:[[0.1747312  0.1625139  0.18129945 0.16882357 0.15646385 0.1561681 ]] entropy:[1.7902205]
DEBUG:chainerrl.agents.a3c:pi_loss:[-1.4344823] v_loss:[[5.226877e-05]]
DEBUG:chainerrl.agents.a3c:grad norm:0.23473163186014367
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:2513 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.05612759  0.01617004  0.1124832   0.0560182   0.01000748 -0.00232851]] probs:[[0.16900909 0.16238904 0.1788072  0.1689906  0.16139138 0.15941268]] entropy:[1.7909951]
DEBUG:chainerrl.agent

DEBUG:chainerrl.agents.a3c:t:2539 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.04654149  0.05573801  0.01215656  0.08152883  0.05930527  0.1341636 ]] probs:[[0.15118314 0.16746451 0.16032292 0.17183974 0.16806297 0.18112676]] entropy:[1.7901976]
DEBUG:chainerrl.agents.a3c:t:2540 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.0453356   0.05683444  0.00677551  0.07443404  0.04511505  0.13901195]] probs:[[0.15185098 0.16818587 0.15997392 0.17117208 0.16622634 0.18259074]] entropy:[1.7901388]
DEBUG:chainerrl.agents.a3c:t:2541 r:0.1 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.0560012   0.06009698  0.00121622  0.07923266  0.06456507  0.13910162]] probs:[[0.14991675 0.16837244 0.15874477 0.17162539 0.16912642 0.18221423]] entropy:[1.7898933]
DEBUG:chainerrl.agents.a3c:t:2542 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.05181674  0.05690023  0.00768234  0.08261099  0.06919824  0.1387727 ]] probs:[[

DEBUG:chainerrl.agents.a3c:t:2568 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.04889082  0.01303894  0.08751775  0.03672504  0.0044944   0.20531712]] probs:[[0.1505185  0.16013478 0.17251681 0.16397303 0.15877233 0.19408458]] entropy:[1.7883667]
DEBUG:chainerrl.agents.a3c:pi_loss:[-1.4420251] v_loss:[[2.4749057e-05]]
DEBUG:chainerrl.agents.a3c:grad norm:0.3275092130867622
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:2569 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.03102123  0.01274054  0.0802535   0.02969464  0.0284677   0.15877558]] probs:[[0.15395676 0.16084377 0.17207776 0.163594   0.1633934  0.18613432]] entropy:[1.7899163]
DEBUG:chainerrl.agents.a3c:t:2570 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.03174565  0.01171883  0.07860751  0.03083424  0.02816716  0.15724143]] probs:[[0.15395406 0.16079314 0.17191625 0.16389632 0.1634598  0.1859804 ]] entropy:[1.789943]
DEBUG:chainerrl.agents

DEBUG:chainerrl.agents.a3c:t:2596 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.02820044  0.48339617 -0.10015015  0.15566108 -0.27259323  0.01342121]] probs:[[0.15828976 0.24954097 0.13922296 0.17980771 0.11717092 0.15596758]] entropy:[1.762238]
DEBUG:chainerrl.agents.a3c:t:2597 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.02805424  0.48343664 -0.10010343  0.15570277 -0.27275398  0.01351762]] probs:[[0.1582671  0.2495518  0.1392299  0.17981574 0.11715243 0.15598308]] entropy:[1.7622278]
DEBUG:chainerrl.agents.a3c:t:2598 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.02811312  0.48338944 -0.10013998  0.15561596 -0.2727158   0.0135177 ]] probs:[[0.15827937 0.24954468 0.1392274  0.17980348 0.11715908 0.155986  ]] entropy:[1.7622342]
DEBUG:chainerrl.agents.a3c:t:2599 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.03319531  0.47835657 -0.10210872  0.14891663 -0.26774973  0.01103545]] probs:[[0

DEBUG:chainerrl.agents.a3c:pi_loss:[0.99675596] v_loss:[[0.09641217]]
DEBUG:chainerrl.agents.a3c:grad norm:242.14876154312486
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:2625 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.13620463  0.40125495 -0.10727552  0.09768533 -0.14623378 -0.13789563]] probs:[[0.17973316 0.23428157 0.14089194 0.17294161 0.13550857 0.13664319]] entropy:[1.7708783]
DEBUG:chainerrl.agents.a3c:t:2626 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.13655147  0.40025806 -0.10684782  0.09741466 -0.14688925 -0.13872664]] probs:[[0.17986023 0.23413238 0.14100294 0.17295703 0.13546853 0.13657883]] entropy:[1.7709163]
DEBUG:chainerrl.agents.a3c:t:2627 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.13653547  0.40062773 -0.10693339  0.09752422 -0.14676286 -0.13861805]] probs:[[0.17983532 0.23419026 0.14097361 0.17295481 0.13546906 0.13657694]] entropy:[1.7708935]
DEBUG:chainerrl.agents.a

DEBUG:chainerrl.agents.a3c:t:2653 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.20456022  0.1704326  -0.05298449  0.10917193 -0.08017922 -0.10187272]] probs:[[0.19467779 0.18814598 0.15047562 0.17696598 0.14643861 0.14329606]] entropy:[1.7840726]
DEBUG:chainerrl.agents.a3c:t:2654 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.20563859  0.16689038 -0.05352562  0.10731681 -0.08319075 -0.10384666]] probs:[[0.19519769 0.18777879 0.15063332 0.17691883 0.1462304  0.14324087]] entropy:[1.7840186]
DEBUG:chainerrl.agents.a3c:t:2655 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.20409366  0.17186588 -0.05485588  0.10893106 -0.07852644 -0.10383298]] probs:[[0.19462264 0.18845038 0.15022181 0.17695577 0.14670774 0.14304164]] entropy:[1.7840143]
DEBUG:chainerrl.agents.a3c:t:2656 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.20290254  0.17600232 -0.05616524  0.10995568 -0.07561348 -0.10031282]] probs:[[

DEBUG:chainerrl.agents.a3c:pi_loss:[-1.7852902] v_loss:[[0.00141277]]
DEBUG:chainerrl.agents.a3c:grad norm:13.31833126474384
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:2681 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.15849023  0.08826531 -0.46169204  0.00922591  0.23753285  0.17418842]] probs:[[0.18418092 0.17169052 0.09906104 0.15864263 0.19932987 0.18709503]] entropy:[1.7703167]
DEBUG:chainerrl.agents.a3c:t:2682 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.15651503  0.08658743 -0.45588255  0.01324276  0.24205665  0.17359124]] probs:[[0.18356846 0.17117049 0.09950323 0.1590654  0.19996239 0.18673003]] entropy:[1.7705686]
DEBUG:chainerrl.agents.a3c:t:2683 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.15631287  0.08655695 -0.4559672   0.01340675  0.24204858  0.17348643]] probs:[[0.18353978 0.17117311 0.09949937 0.15909877 0.19996995 0.18671902]] entropy:[1.7705706]
DEBUG:chainerrl.agents.a3

DEBUG:chainerrl.agents.a3c:t:2709 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.18864101  0.0831288  -0.27029863  0.05405093  0.14252456  0.17531434]] probs:[[0.18700036 0.16827479 0.11817551 0.16345218 0.17857239 0.1845248 ]] entropy:[1.7813287]
DEBUG:chainerrl.agents.a3c:t:2710 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.20961288  0.07550532 -0.2795478   0.02587705  0.15491372  0.18089327]] probs:[[0.19090341 0.16694427 0.11705067 0.15886134 0.18074161 0.18549871]] entropy:[1.7800353]
DEBUG:chainerrl.agents.a3c:t:2711 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.2135145   0.07411853 -0.28756416  0.00805442  0.165993    0.18859392]] probs:[[0.1916072  0.16667594 0.11609035 0.15602049 0.18271472 0.18689126]] entropy:[1.7791085]
DEBUG:chainerrl.agents.a3c:t:2712 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.2211401   0.0711137  -0.2843048   0.01320194  0.15715837  0.18731941]] probs:[[

DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:2737 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.27495328  0.2537471  -0.13434179  0.13936472  0.10388628  0.10862114]] probs:[[0.19209096 0.18806033 0.1275711  0.16773418 0.16188754 0.16265589]] entropy:[1.7834799]
DEBUG:chainerrl.agents.a3c:t:2738 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.27426377  0.25159094 -0.13713622  0.1415893   0.11785472  0.11187275]] probs:[[0.19152054 0.18722709 0.12692484 0.16772413 0.16379014 0.16281329]] entropy:[1.7835326]
DEBUG:chainerrl.agents.a3c:t:2739 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.27516487  0.25486842 -0.13489936  0.13827637  0.09518138  0.11106641]] probs:[[0.19232523 0.18846105 0.12762849 0.16772057 0.16064619 0.16321842]] entropy:[1.7833848]
DEBUG:chainerrl.agents.a3c:t:2740 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.27705494  0.25322786 -0.13362028  0.13776675 

DEBUG:chainerrl.agents.a3c:t:2766 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.2104836   0.34056193 -0.06597342  0.23185688  0.13012476  0.19400965]] probs:[[0.17166106 0.19550781 0.13019912 0.17536952 0.15840627 0.1688563 ]] entropy:[1.7845594]
DEBUG:chainerrl.agents.a3c:t:2767 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.21072187  0.32920206 -0.05528226  0.23356155  0.12853299  0.18983148]] probs:[[0.17194696 0.19357523 0.13178633 0.17591935 0.15837999 0.16839217]] entropy:[1.7852073]
DEBUG:chainerrl.agents.a3c:t:2768 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.20100199  0.3262305  -0.04799706  0.22658186  0.12671691  0.19481167]] probs:[[0.17061545 0.19337681 0.13300851 0.17503607 0.15840058 0.16956253]] entropy:[1.7855848]
DEBUG:chainerrl.agents.a3c:pi_loss:[-1.3847841] v_loss:[[7.326267e-05]]
DEBUG:chainerrl.agents.a3c:grad norm:1.5818735128408588
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents

DEBUG:chainerrl.agents.a3c:t:2795 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.17439742 0.28059548 0.07006206 0.23525721 0.10711364 0.09427952]] probs:[[0.16853026 0.18741274 0.1518328  0.17910552 0.15756397 0.1555547 ]] entropy:[1.7887425]
DEBUG:chainerrl.agents.a3c:t:2796 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.17530118 0.28036934 0.07006849 0.23505484 0.10718    0.09414356]] probs:[[0.16867185 0.18735838 0.15182407 0.17905782 0.15756434 0.1555236 ]] entropy:[1.7887477]
DEBUG:chainerrl.agents.a3c:t:2797 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.17462866 0.2805805  0.07010121 0.23533045 0.10714892 0.0941833 ]] probs:[[0.16856152 0.18740135 0.1518318  0.17911044 0.15756232 0.15553261]] entropy:[1.7887415]
DEBUG:chainerrl.agents.a3c:t:2798 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.17459151 0.28058803 0.07009953 0.2353418  0.10714759 0.09418762]] probs:[[0.16855569 0.18740325 0.

DEBUG:chainerrl.agents.a3c:t:2824 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.13390955 0.18405135 0.03256037 0.20374197 0.13426973 0.2677618 ]] probs:[[0.1620496  0.17038222 0.14643085 0.1737704  0.16210797 0.18525898]] entropy:[1.7891598]
DEBUG:chainerrl.agents.a3c:pi_loss:[-1.4876236] v_loss:[[3.8263628e-05]]
DEBUG:chainerrl.agents.a3c:grad norm:0.20869203259799557
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:2825 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.13630791 0.16402438 0.05627618 0.1770782  0.13923106 0.2434959 ]] probs:[[0.16369626 0.1682968  0.1511059  0.17050812 0.16417547 0.18221742]] entropy:[1.7902082]
DEBUG:chainerrl.agents.a3c:t:2826 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.1366423  0.16287838 0.0567399  0.17624858 0.13944876 0.24322096]] probs:[[0.16378763 0.16814165 0.1512098  0.17040484 0.16424794 0.18220809]] entropy:[1.7902256]
DEBUG:chainerrl.agents.a3c:t:2827 r:0.

DEBUG:chainerrl.agents.a3c:t:2853 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.11065159 0.03690103 0.10115922 0.17778943 0.02250731 0.17907973]] probs:[[0.16735418 0.15545586 0.16577311 0.17897573 0.1532343  0.17920682]] entropy:[1.7899092]
DEBUG:chainerrl.agents.a3c:t:2854 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.09652633 0.03190655 0.08978192 0.17963576 0.02373995 0.17787139]] probs:[[0.16578503 0.15541084 0.16467068 0.18015209 0.15414684 0.17983451]] entropy:[1.7898352]
DEBUG:chainerrl.agents.a3c:t:2855 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.10048086 0.03550563 0.09580239 0.18151662 0.0280671  0.17783633]] probs:[[0.16590889 0.15547167 0.16513449 0.1799132  0.15431947 0.17925227]] entropy:[1.7899201]
DEBUG:chainerrl.agents.a3c:t:2856 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.11458687 0.034957   0.1093101  0.17565288 0.03063857 0.16783398]] probs:[[0.16791673 0.15506406 0.

DEBUG:chainerrl.agents.a3c:t:2882 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.23761332  0.08828144  0.09630083  0.19877876 -0.01996473  0.13138098]] probs:[[0.18644375 0.16058087 0.1618738  0.17934206 0.14410634 0.16765314]] entropy:[1.7883738]
DEBUG:chainerrl.agents.a3c:t:2883 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.25108662  0.08951818  0.09188955  0.20020695 -0.02311454  0.11842123]] probs:[[0.1890371  0.16083433 0.16121617 0.17965953 0.14370206 0.16555075]] entropy:[1.78797]
DEBUG:chainerrl.agents.a3c:t:2884 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.22565904  0.08486595  0.09398328  0.19335602 -0.02222506  0.13342562]] probs:[[0.18498479 0.16069056 0.16216232 0.1791047  0.14437145 0.1686862 ]] entropy:[1.7885901]
DEBUG:chainerrl.agents.a3c:t:2885 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.21127522  0.05925452  0.0901701   0.20430891 -0.02000061  0.14235345]] probs:[[0.

DEBUG:chainerrl.agents.a3c:t:2911 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.15610266 0.06465203 0.13335784 0.12195625 0.04990707 0.12771979]] probs:[[0.1745867  0.15932894 0.17066057 0.16872582 0.15699686 0.16970108]] entropy:[1.7910365]
DEBUG:chainerrl.agents.a3c:t:2912 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.16301101 0.05790746 0.13556442 0.12823981 0.04655322 0.12814379]] probs:[[0.17559923 0.15807992 0.17084517 0.16959837 0.1562952  0.16958208]] entropy:[1.7908665]
DEBUG:chainerrl.agents.a3c:pi_loss:[-1.3175812] v_loss:[[0.00019177]]
DEBUG:chainerrl.agents.a3c:grad norm:2.085022074835286
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:2913 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.16959046 0.06530356 0.15102854 0.10603268 0.04685455 0.11519178]] probs:[[0.17691138 0.15939128 0.17365786 0.16601716 0.15647763 0.16754472]] entropy:[1.7908237]
DEBUG:chainerrl.agents.a3c:t:2914 r:0.0 a:3

DEBUG:chainerrl.agents.a3c:t:2940 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.1117559  0.11991753 0.16110149 0.13519168 0.05248418 0.12294731]] probs:[[0.16566725 0.1670249  0.17404726 0.16959566 0.15613322 0.16753171]] entropy:[1.7912271]
DEBUG:chainerrl.agents.a3c:t:2941 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.0878691  0.12082279 0.17278206 0.16110425 0.06694314 0.13079947]] probs:[[0.160739   0.1661242  0.17498407 0.17295252 0.15741032 0.16778985]] entropy:[1.791065]
DEBUG:chainerrl.agents.a3c:t:2942 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.0874061  0.11862698 0.17205772 0.15816495 0.06581387 0.13075712]] probs:[[0.16086695 0.16596858 0.17507759 0.1726621  0.1574307  0.16799407]] entropy:[1.7910745]
DEBUG:chainerrl.agents.a3c:t:2943 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.08798907 0.11649469 0.1658917  0.16017987 0.06857595 0.12812462]] probs:[[0.16112089 0.16577981 0.1

DEBUG:chainerrl.agents.a3c:pi_loss:[-0.8140913] v_loss:[[0.00375259]]
DEBUG:chainerrl.agents.a3c:grad norm:11.657736641006508
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:2969 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.01797974  0.18434502  0.22763832  0.10354642  0.11032155  0.13931227]] probs:[[0.14411072 0.1764269  0.18423277 0.16273256 0.16383883 0.16865815]] entropy:[1.7889012]
DEBUG:chainerrl.agents.a3c:t:2970 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.00587736  0.18582195  0.21544515  0.10369667  0.10980581  0.13199702]] probs:[[0.14608549 0.17695425 0.18227462 0.16300258 0.16400144 0.1676815 ]] entropy:[1.7893476]
DEBUG:chainerrl.agents.a3c:t:2971 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.05629006  0.17084518  0.2383387   0.12434088  0.12928021  0.14758445]] probs:[[0.13839588 0.17368677 0.18581414 0.1657945  0.16661546 0.1696933 ]] entropy:[1.7879779]
DEBUG:chainerrl.agents.a

DEBUG:chainerrl.agents.a3c:t:2997 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.03914435  0.19012626  0.18938765  0.0802695   0.0750536  -0.00715357]] probs:[[0.1572724  0.18290396 0.18276893 0.16387509 0.16302256 0.150157  ]] entropy:[1.7890594]
DEBUG:chainerrl.agents.a3c:t:2998 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.03924725  0.19187792  0.19019894  0.07735762  0.07058032 -0.00650744]] probs:[[0.15738648 0.18333867 0.18303111 0.1635003  0.16239595 0.15034756]] entropy:[1.7889986]
DEBUG:chainerrl.agents.a3c:t:2999 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.03263783 0.18061589 0.20182003 0.08455241 0.04797772 0.00340082]] probs:[[0.15664765 0.181631   0.18552345 0.16499475 0.15906914 0.15213406]] entropy:[1.7889339]
DEBUG:chainerrl.agents.a3c:t:3000 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.03395163 0.1701142  0.19483791 0.11267164 0.1210359  0.01107286]] probs:[[0.15454301 0

DEBUG:chainerrl.agents.a3c:t:3026 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.01292434  0.07856021  0.0437213   0.11786329 -0.06017153  0.07543904]] probs:[[0.16119209 0.17212701 0.16623354 0.17902683 0.14982994 0.17159061]] entropy:[1.7901795]
DEBUG:chainerrl.agents.a3c:t:3027 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.01332115  0.08940422  0.04885527  0.11935449 -0.06789476  0.07728484]] probs:[[0.16089761 0.17361693 0.16671775 0.17889544 0.14834674 0.1715255 ]] entropy:[1.7899613]
DEBUG:chainerrl.agents.a3c:t:3028 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.02023581  0.09685571  0.04378043  0.12116747 -0.07354144  0.07956788]] probs:[[0.16177906 0.17466179 0.16563328 0.17896016 0.1472975  0.17166822]] entropy:[1.7898062]
DEBUG:chainerrl.agents.a3c:t:3029 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.02254769  0.09590369  0.03967785  0.12151808 -0.07177872  0.08011606]] probs:[[

DEBUG:chainerrl.agents.a3c:t:3055 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.09865834 0.03179298 0.08976564 0.04280063 0.15975854 0.16351683]] probs:[[0.16660681 0.15583088 0.1651318  0.15755568 0.17710395 0.17777081]] entropy:[1.7904563]
DEBUG:chainerrl.agents.a3c:t:3056 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.1235361   0.06892248  0.11235908 -0.00172845  0.18750225  0.19236647]] probs:[[0.16791736 0.15899271 0.16605099 0.14814734 0.17900936 0.17988221]] entropy:[1.789548]
DEBUG:chainerrl.agents.a3c:pi_loss:[-1.1722229] v_loss:[[0.00082807]]
DEBUG:chainerrl.agents.a3c:grad norm:9.760099457473501
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:3057 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.11203652 0.07671605 0.07095337 0.02678034 0.19491325 0.1063844 ]] probs:[[0.16880217 0.16294408 0.16200778 0.15500715 0.18338801 0.16785078]] entropy:[1.7904043]
DEBUG:chainerrl.agents.a3c:t:3058 r:0.

DEBUG:chainerrl.agents.a3c:t:3084 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.18955696  0.2409792   0.10808743 -0.0465729   0.11231441  0.22011313]] probs:[[0.1747977  0.1840213  0.16112167 0.13803393 0.16180415 0.18022129]] entropy:[1.7873747]
DEBUG:chainerrl.agents.a3c:t:3085 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.21656805  0.25009203  0.09378219 -0.06638553  0.12419028  0.21656768]] probs:[[0.17908756 0.18519306 0.15839455 0.13495229 0.163285   0.1790875 ]] entropy:[1.7863897]
DEBUG:chainerrl.agents.a3c:t:3086 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.22008765  0.2567592   0.08810703 -0.07275672  0.1334754   0.22104521]] probs:[[0.17928089 0.18597743 0.15711424 0.13376833 0.16440642 0.17945264]] entropy:[1.7859584]
DEBUG:chainerrl.agents.a3c:t:3087 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.22103858  0.26570028  0.07332934 -0.08799953  0.1581272   0.21236143]] probs:[[

DEBUG:chainerrl.agents.a3c:pi_loss:[-1.3567722] v_loss:[[0.00086259]]
DEBUG:chainerrl.agents.a3c:grad norm:28.60760785329434
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:3113 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.18043855 0.18186496 0.14651823 0.00982699 0.17525034 0.0093647 ]] probs:[[0.17702846 0.17728116 0.1711243  0.14926137 0.17611237 0.1491924 ]] entropy:[1.788899]
DEBUG:chainerrl.agents.a3c:t:3114 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.1685052  0.18365411 0.14489445 0.01094609 0.17709781 0.03678489]] probs:[[0.17447828 0.17714156 0.17040697 0.14904393 0.17598397 0.15294525]] entropy:[1.7894119]
DEBUG:chainerrl.agents.a3c:t:3115 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.13368602 0.16002855 0.14722116 0.00175365 0.18144283 0.08155489]] probs:[[0.1690663  0.1735791  0.1713702  0.14816974 0.17733626 0.16047846]] entropy:[1.7900177]
DEBUG:chainerrl.agents.a3c:t:3116 r:0.0 a:3 

DEBUG:chainerrl.agents.a3c:t:3142 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.16524594  0.12029812  0.1416972  -0.04095752  0.01276394  0.02383263]] probs:[[0.18271029 0.1746797  0.17845796 0.14866543 0.15687037 0.15861636]] entropy:[1.7889131]
DEBUG:chainerrl.agents.a3c:t:3143 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.15652113  0.11546101  0.12319455 -0.03838834  0.02045072  0.00388823]] probs:[[0.18244416 0.1751047  0.17646413 0.15013497 0.15923382 0.15661824]] entropy:[1.7892165]
DEBUG:chainerrl.agents.a3c:t:3144 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.15902391  0.11106107  0.11999171 -0.0254934   0.03417226 -0.00210613]] probs:[[0.18247497 0.17392953 0.17548978 0.15172894 0.16105749 0.1553193 ]] entropy:[1.7894657]
DEBUG:chainerrl.agents.a3c:pi_loss:[-1.6665728] v_loss:[[0.00060205]]
DEBUG:chainerrl.agents.a3c:grad norm:5.71591473642675
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c

DEBUG:chainerrl.agents.a3c:t:3170 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.22429079  0.10549603  0.12719604 -0.07334963  0.02256943 -0.15450543]] probs:[[0.19841127 0.17618723 0.18005228 0.14733392 0.16216601 0.13584924]] entropy:[1.7838495]
DEBUG:chainerrl.agents.a3c:t:3171 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.21256246  0.11505417  0.16301382 -0.04521137  0.00751982 -0.152072  ]] probs:[[0.19453034 0.17645746 0.18512651 0.1503272  0.15846686 0.13509168]] entropy:[1.7840482]
DEBUG:chainerrl.agents.a3c:t:3172 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.2238149   0.06963234  0.15057968 -0.07871924 -0.00351243 -0.1504339 ]] probs:[[0.19960453 0.17108415 0.1855089  0.14749643 0.15901694 0.13728915]] entropy:[1.7835332]
DEBUG:chainerrl.agents.a3c:t:3173 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.22794572  0.06540742  0.15240633 -0.07307766  0.00027106 -0.14809333]] probs:[[

DEBUG:chainerrl.agents.a3c:t:3199 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.22581269  0.01010872  0.06049764 -0.11906727  0.35083005  0.00531771]] probs:[[0.18877912 0.15215105 0.16001421 0.13371328 0.21391848 0.15142383]] entropy:[1.7792095]
DEBUG:chainerrl.agents.a3c:t:3200 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.23536816  0.00800887  0.09201878 -0.11100867  0.31965548  0.04118247]] probs:[[0.18933019 0.15082696 0.1640454  0.13390301 0.20598017 0.15591435]] entropy:[1.7813661]
DEBUG:chainerrl.agents.a3c:pi_loss:[-1.3412709] v_loss:[[0.0001767]]
DEBUG:chainerrl.agents.a3c:grad norm:9.352159034369167
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:3201 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.1537991  -0.0148319   0.11597504 -0.05648959  0.2508669   0.07553487]] probs:[[0.17715706 0.14966594 0.17058142 0.14355928 0.19521558 0.1638207 ]] entropy:[1.7864699]
DEBUG:chainerrl.agents.a3c

DEBUG:chainerrl.agents.a3c:t:3227 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.15697762 -0.02337342  0.21173891 -0.01028444  0.12664966 -0.10220857]] probs:[[0.1825134  0.15239449 0.19278678 0.15440229 0.17706123 0.14084181]] entropy:[1.7855587]
DEBUG:chainerrl.agents.a3c:t:3228 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.14006747 -0.01815575  0.21423814 -0.0141769   0.140208   -0.09265987]] probs:[[0.17920431 0.1529794  0.19300136 0.15358931 0.1792295  0.14199606]] entropy:[1.78583]
DEBUG:chainerrl.agents.a3c:t:3229 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.14549087 -0.01699477  0.21473375  0.00843014  0.13956764 -0.10939626]] probs:[[0.17976753 0.15280747 0.19265622 0.15674241 0.17870587 0.13932055]] entropy:[1.7856327]
DEBUG:chainerrl.agents.a3c:t:3230 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.14143255 -0.01696384  0.22138017  0.0079891   0.13722609 -0.094176  ]] probs:[[0.

DEBUG:chainerrl.agents.a3c:t:3256 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.09840426  0.0339801   0.21505427  0.11090816  0.11754715 -0.07592544]] probs:[[0.16854131 0.15802556 0.18939428 0.17066197 0.17179877 0.14157806]] entropy:[1.7879206]
DEBUG:chainerrl.agents.a3c:pi_loss:[-1.3627691] v_loss:[[0.00385967]]
DEBUG:chainerrl.agents.a3c:grad norm:55.452598512043636
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:3257 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.1572598  0.05042786 0.1965399  0.06002912 0.15509322 0.01419062]] probs:[[0.1751125  0.15736951 0.1821278  0.15888774 0.1747335  0.15176897]] entropy:[1.7895255]
DEBUG:chainerrl.agents.a3c:t:3258 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.16364186 0.04624629 0.19358276 0.08114742 0.16035855 0.00145401]] probs:[[0.17582582 0.15635018 0.1811698  0.16190332 0.17524946 0.14950143]] entropy:[1.7893708]
DEBUG:chainerrl.agents.a3c:t:3259 r:

DEBUG:chainerrl.agents.a3c:t:3285 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.24249369  0.57910234 -0.42126346  0.6008048  -0.09582347 -0.21402253]] probs:[[0.17567065 0.24597245 0.09045511 0.251369   0.12524775 0.11128502]] entropy:[1.7194967]
DEBUG:chainerrl.agents.a3c:t:3286 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.2424307   0.5790919  -0.42125022  0.6007904  -0.09583215 -0.214     ]] probs:[[0.17566215 0.24597348 0.09045763 0.25136906 0.12524849 0.11128916]] entropy:[1.7195002]
DEBUG:chainerrl.agents.a3c:t:3287 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.24302562  0.57914394 -0.42261446  0.6010577  -0.09540516 -0.21332636]] probs:[[0.17573334 0.24593963 0.09031718 0.25138858 0.12527822 0.11134303]] entropy:[1.7194458]
DEBUG:chainerrl.agents.a3c:t:3288 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.24205156  0.57937235 -0.42138138  0.6009872  -0.09578171 -0.21368127]] probs:[[

INFO: outdir:result global_step:6734 local_step:3294 R:2.35
INFO: statistics:[('average_value', 0.25480661007416533), ('average_entropy', 1.7202345148228255)]


DEBUG:chainerrl.agents.a3c:t:3295 r:0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.13939211  0.26496443 -0.12901038  0.34078553  0.5367304  -0.17302021]] probs:[[0.12409557 0.18593712 0.12539062 0.20058331 0.24400154 0.11999186]] entropy:[1.7529612]
DEBUG:chainerrl.agents.a3c:t:3296 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.18720397  0.3292735  -0.21438622  0.394838    0.65627104 -0.1631973 ]] probs:[[0.11379525 0.19073369 0.1107437  0.20365812 0.2645091  0.11656015]] entropy:[1.7334126]
DEBUG:chainerrl.agents.a3c:t:3297 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.19090042  0.3419036  -0.22704607  0.40663517  0.67335707 -0.15797603]] probs:[[0.11245365 0.19158761 0.10846153 0.20439957 0.2668799  0.11621775]] entropy:[1.7304406]
DEBUG:chainerrl.agents.a3c:t:3298 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.18712051  0.34613386 -0.23467408  0.40788636  0.6703351  -0.15795924]] probs:[[0.

DEBUG:chainerrl.agents.a3c:t:3324 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.04970751  0.23984942 -0.00981456  0.33157802  0.36115262  0.02362413]] probs:[[0.13468191 0.17991288 0.14016338 0.19719662 0.20311573 0.1449295 ]] entropy:[1.7778847]
DEBUG:chainerrl.agents.a3c:t:3325 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.04970716  0.23984933 -0.00981492  0.33157784  0.3611523   0.02362397]] probs:[[0.13468197 0.17991288 0.14016335 0.1971966  0.20311569 0.1449295 ]] entropy:[1.7778847]
DEBUG:chainerrl.agents.a3c:t:3326 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.04970684  0.23984925 -0.0098153   0.33157754  0.36115193  0.0236238 ]] probs:[[0.13468204 0.1799129  0.14016332 0.19719657 0.20311564 0.1449295 ]] entropy:[1.7778847]
DEBUG:chainerrl.agents.a3c:pi_loss:[-1.5288281] v_loss:[[0.00014425]]
DEBUG:chainerrl.agents.a3c:grad norm:1.475642462775523
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3

DEBUG:chainerrl.agents.a3c:t:3353 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.11854995 0.12384763 0.10393712 0.17930622 0.18968575 0.08111997]] probs:[[0.1641919  0.16506405 0.16181004 0.17447688 0.17629729 0.1581598 ]] entropy:[1.7909876]
DEBUG:chainerrl.agents.a3c:t:3354 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.11852948 0.12388111 0.10390399 0.17928563 0.1896515  0.0811449 ]] probs:[[0.16419002 0.16507106 0.16180614 0.17447484 0.17629284 0.15816517]] entropy:[1.7909882]
DEBUG:chainerrl.agents.a3c:t:3355 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.11845255 0.12384003 0.10381657 0.17929013 0.18959926 0.08109818]] probs:[[0.1641855  0.16507243 0.16179997 0.17448424 0.17629233 0.15816559]] entropy:[1.7909876]
DEBUG:chainerrl.agents.a3c:t:3356 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.11845466 0.12383231 0.10382329 0.1792904  0.18961233 0.0811016 ]] probs:[[0.16418535 0.16507065 0.

DEBUG:chainerrl.agents.a3c:t:3382 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.10979314 0.11484531 0.11525651 0.18149044 0.17716879 0.11355603]] probs:[[0.1623814  0.16320387 0.16327098 0.17445123 0.17369893 0.16299358]] entropy:[1.7912669]
DEBUG:chainerrl.agents.a3c:pi_loss:[-1.4295722] v_loss:[[5.2516807e-06]]
DEBUG:chainerrl.agents.a3c:grad norm:0.09108124828254718
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:3383 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.08786618 0.13363835 0.07596026 0.20747401 0.17941163 0.15549526]] probs:[[0.15803054 0.16543205 0.1561602  0.17810908 0.1731804  0.1690877 ]] entropy:[1.7906616]
DEBUG:chainerrl.agents.a3c:t:3384 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.09034289 0.13405412 0.07861821 0.19980307 0.17819692 0.14949869]] probs:[[0.15869313 0.16578364 0.15684336 0.17705007 0.17326573 0.16836397]] entropy:[1.7908134]
DEBUG:chainerrl.agents.a3c:t:3385 r:0.

DEBUG:chainerrl.agents.a3c:t:3411 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.19201103 0.08577299 0.03158645 0.13794945 0.09257674 0.09927915]] probs:[[0.1813186  0.1630436  0.15444393 0.17177649 0.16415669 0.16526064]] entropy:[1.7905318]
DEBUG:chainerrl.agents.a3c:t:3412 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.19084494 0.0756288  0.03762545 0.1494213  0.08025739 0.07899053]] probs:[[0.18188365 0.16208988 0.15604551 0.17450328 0.16284189 0.16263571]] entropy:[1.7904007]
DEBUG:chainerrl.agents.a3c:t:3413 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.1897716  0.06450111 0.02104751 0.1586904  0.06464018 0.07463285]] probs:[[0.18281466 0.16128974 0.15443121 0.17721996 0.16131218 0.1629322 ]] entropy:[1.7899966]
DEBUG:chainerrl.agents.a3c:t:3414 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.18965186 0.05953933 0.01385412 0.1665421  0.05859996 0.06898873]] probs:[[0.18323596 0.16088055 0.

DEBUG:chainerrl.agents.a3c:t:3440 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.14355142  0.1134871   0.00320412  0.30963847  0.17353368 -0.01884992]] probs:[[0.16947281 0.16445355 0.14728142 0.20009245 0.17463093 0.14406882]] entropy:[1.7856059]
DEBUG:chainerrl.agents.a3c:t:3441 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.14293714  0.11598787  0.00512214  0.3101595   0.17604582 -0.01908992]] probs:[[0.16918273 0.16468428 0.14740212 0.19997689 0.17487791 0.14387608]] entropy:[1.7856095]
DEBUG:chainerrl.agents.a3c:t:3442 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.1430992   0.11501591  0.00431966  0.31003755  0.17504847 -0.01910519]] probs:[[0.16928664 0.16459864 0.14735045 0.20004287 0.17478254 0.1439389 ]] entropy:[1.7856025]
DEBUG:chainerrl.agents.a3c:t:3443 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.14355804  0.11214541  0.00245989  0.3098611   0.17245743 -0.01909765]] probs:[[

DEBUG:chainerrl.agents.a3c:t:3469 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.23047797  0.03107139  0.12081501  0.28178576  0.21012034 -0.05234158]] probs:[[0.18176849 0.14890778 0.16288932 0.19133802 0.17810552 0.13699086]] entropy:[1.7851298]
DEBUG:chainerrl.agents.a3c:t:3470 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.22997902  0.04013622  0.12359546  0.2842586   0.2131802  -0.0353059 ]] probs:[[0.18075739 0.14950247 0.1625153  0.19083998 0.17774624 0.13863863]] entropy:[1.7856764]
DEBUG:chainerrl.agents.a3c:pi_loss:[-1.7191285] v_loss:[[0.00095857]]
DEBUG:chainerrl.agents.a3c:grad norm:1.8213909805789739
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:3471 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.28125516  0.04649739  0.1418489   0.27465108  0.19297917 -0.05106429]] probs:[[0.18914959 0.14957237 0.16453642 0.18790455 0.17316799 0.13566908]] entropy:[1.7848808]
DEBUG:chainerrl.agents.a

DEBUG:chainerrl.agents.a3c:t:3497 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.24162026  0.21547832  0.02796835  0.17188549  0.09936731 -0.04856574]] probs:[[0.18761748 0.18277635 0.15152541 0.17497979 0.16273975 0.14036122]] entropy:[1.7865852]
DEBUG:chainerrl.agents.a3c:t:3498 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.24182282  0.21570274  0.02810121  0.17104053  0.09883875 -0.04866143]] probs:[[0.18768327 0.18284443 0.15156798 0.17485787 0.16267782 0.14036857]] entropy:[1.7865808]
DEBUG:chainerrl.agents.a3c:t:3499 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.24190608  0.21852833  0.0282904   0.16762786  0.09431105 -0.04857232]] probs:[[0.18784098 0.18350062 0.15171142 0.17439407 0.16206554 0.14048734]] entropy:[1.7865396]
DEBUG:chainerrl.agents.a3c:t:3500 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.24451135  0.2330693   0.03063052  0.14786078  0.07113849 -0.04750083]] probs:[[

DEBUG:chainerrl.agents.a3c:t:3526 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.13139266 0.27509436 0.05487267 0.10004934 0.0771058  0.02345178]] probs:[[0.16963936 0.19585538 0.15714279 0.16440478 0.16067569 0.15228197]] entropy:[1.7882733]
DEBUG:chainerrl.agents.a3c:pi_loss:[-2.189782] v_loss:[[0.00588683]]
DEBUG:chainerrl.agents.a3c:grad norm:25.41103985143414
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:3527 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.16343081  0.23092283  0.09229767 -0.01249972  0.1214973   0.07111304]] probs:[[0.17511341 0.18734014 0.16308975 0.14686346 0.16792212 0.1596711 ]] entropy:[1.7888982]
DEBUG:chainerrl.agents.a3c:t:3528 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.16409068  0.2304923   0.09241658 -0.01324138  0.12188651  0.07067551]] probs:[[0.17523935 0.18727057 0.16311878 0.14676325 0.16799742 0.15961069]] entropy:[1.7888849]
DEBUG:chainerrl.agents.a3c:t:352

DEBUG:chainerrl.agents.a3c:t:3554 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.20976989 0.21259895 0.13067763 0.0028309  0.15817171 0.11782391]] probs:[[0.17851654 0.1790223  0.16494119 0.14514631 0.16953902 0.16283466]] entropy:[1.7893693]
DEBUG:chainerrl.agents.a3c:t:3555 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.20988075 0.2124318  0.13053729 0.00309027 0.15778825 0.11772358]] probs:[[0.17855006 0.17900614 0.16493073 0.14519514 0.16948706 0.16283084]] entropy:[1.7893758]
DEBUG:chainerrl.agents.a3c:t:3556 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.20989546 0.21241449 0.1305182  0.00312337 0.15773399 0.11771134]] probs:[[0.17855446 0.17900482 0.16492923 0.14520139 0.16947953 0.16283046]] entropy:[1.7893765]
DEBUG:chainerrl.agents.a3c:t:3557 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.20990197 0.21244279 0.13052551 0.00310378 0.1577094  0.11772437]] probs:[[0.1785552  0.17900945 0.

DEBUG:chainerrl.agents.a3c:pi_loss:[-0.46374014] v_loss:[[0.01267131]]
DEBUG:chainerrl.agents.a3c:grad norm:36.00229167873464
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:3583 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.04317112 0.25637665 0.07638494 0.00352452 0.18975948 0.18740836]] probs:[[0.15277497 0.18908027 0.15793443 0.14683646 0.17689466 0.17647925]] entropy:[1.7876667]
DEBUG:chainerrl.agents.a3c:t:3584 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.04246965 0.25635782 0.0766431  0.00357296 0.1902044  0.18788773]] probs:[[0.1526525  0.1890577  0.15795931 0.14682882 0.1769556  0.17654613]] entropy:[1.7876518]
DEBUG:chainerrl.agents.a3c:t:3585 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.04238321 0.25632742 0.07666039 0.00357031 0.19022524 0.1879076 ]] probs:[[0.15264073 0.18905371 0.15796353 0.1468298  0.17696093 0.17655128]] entropy:[1.787651]
DEBUG:chainerrl.agents.a3c:t:3586 r:0.0 a:1

DEBUG:chainerrl.agents.a3c:t:3612 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.05351703 0.23598562 0.11092315 0.07733088 0.15836692 0.19883308]] probs:[[0.15266451 0.18322435 0.16168481 0.15634367 0.16954063 0.176542  ]] entropy:[1.7896444]
DEBUG:chainerrl.agents.a3c:t:3613 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.05351727 0.23598346 0.11092287 0.07732926 0.15836607 0.19883215]] probs:[[0.15266469 0.18322413 0.16168493 0.15634356 0.16954066 0.17654201]] entropy:[1.7896444]
DEBUG:chainerrl.agents.a3c:t:3614 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.05351731 0.23598345 0.11092283 0.07732926 0.15836604 0.19883202]] probs:[[0.1526647  0.18322414 0.16168495 0.15634358 0.16954066 0.176542  ]] entropy:[1.7896444]
DEBUG:chainerrl.agents.a3c:pi_loss:[-1.5042117] v_loss:[[6.38644e-05]]
DEBUG:chainerrl.agents.a3c:grad norm:0.21096078563689713
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:3615 r:0.0 

DEBUG:chainerrl.agents.a3c:t:3641 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.08404414 0.1859843  0.05876121 0.13338387 0.14938015 0.16187787]] probs:[[0.15920044 0.17628539 0.15522584 0.16725235 0.16994928 0.1720866 ]] entropy:[1.7907948]
DEBUG:chainerrl.agents.a3c:t:3642 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.08404414 0.1859843  0.05876121 0.13338387 0.14938015 0.16187787]] probs:[[0.15920044 0.17628539 0.15522584 0.16725235 0.16994928 0.1720866 ]] entropy:[1.7907948]
DEBUG:chainerrl.agents.a3c:t:3643 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.08343044 0.19134699 0.06184437 0.13664007 0.15721762 0.1695901 ]] probs:[[0.15838301 0.17643152 0.15500079 0.16703875 0.17051162 0.17263438]] entropy:[1.7907014]
DEBUG:chainerrl.agents.a3c:t:3644 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.08341888 0.19134434 0.06183064 0.13664176 0.15720631 0.16963352]] probs:[[0.15838094 0.17643078 0.

DEBUG:chainerrl.agents.a3c:pi_loss:[-1.536403] v_loss:[[0.00022138]]
DEBUG:chainerrl.agents.a3c:grad norm:1.1288696003535315
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:3671 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.1796668  0.08355626 0.09291848 0.17110476 0.10577116 0.14773552]] probs:[[0.17500608 0.15896913 0.16046444 0.17351405 0.16254015 0.16950618]] entropy:[1.7910424]
DEBUG:chainerrl.agents.a3c:t:3672 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.17915757 0.08752687 0.09387564 0.17406891 0.10892678 0.15122183]] probs:[[0.17451249 0.15923253 0.16024669 0.1736267  0.16267683 0.16970482]] entropy:[1.7910651]
DEBUG:chainerrl.agents.a3c:t:3673 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.17871203 0.09084724 0.09451628 0.17616291 0.11166115 0.15398829]] probs:[[0.1741154  0.15946965 0.16005582 0.17367212 0.16282362 0.1698634 ]] entropy:[1.7910846]
DEBUG:chainerrl.agents.a3c:t:3674 r:0.0 a:2

DEBUG:chainerrl.agents.a3c:t:3700 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.14167048 0.11771887 0.03487743 0.16936989 0.26346493 0.13961014]] probs:[[0.1658211  0.16189662 0.14902537 0.17047846 0.18729858 0.16547982]] entropy:[1.789445]
DEBUG:chainerrl.agents.a3c:t:3701 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.14167015 0.11771861 0.03487836 0.16937009 0.26346475 0.13961011]] probs:[[0.16582106 0.16189657 0.1490255  0.1704785  0.18729854 0.16547981]] entropy:[1.7894449]
DEBUG:chainerrl.agents.a3c:t:3702 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.14167005 0.1177186  0.03487838 0.16937003 0.26346466 0.13961002]] probs:[[0.16582106 0.16189659 0.14902551 0.1704785  0.18729854 0.16547981]] entropy:[1.7894452]
DEBUG:chainerrl.agents.a3c:pi_loss:[-1.2865721] v_loss:[[0.00045151]]
DEBUG:chainerrl.agents.a3c:grad norm:0.6202589747504859
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:3703 r:0.0 a:5

DEBUG:chainerrl.agents.a3c:t:3729 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.15086724 0.13272306 0.07777506 0.11673621 0.23522961 0.15224749]] probs:[[0.16757764 0.16456449 0.15576595 0.16195455 0.18232833 0.1678091 ]] entropy:[1.7905941]
DEBUG:chainerrl.agents.a3c:t:3730 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.15086637 0.13272133 0.07777391 0.11673675 0.23523116 0.15224737]] probs:[[0.1675775  0.16456422 0.15576579 0.16195466 0.18232864 0.16780908]] entropy:[1.7905941]
DEBUG:chainerrl.agents.a3c:t:3731 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.15086757 0.13272299 0.07777506 0.11673617 0.23522925 0.15224689]] probs:[[0.16757771 0.1645645  0.15576597 0.16195455 0.18232828 0.16780901]] entropy:[1.7905943]
DEBUG:chainerrl.agents.a3c:t:3732 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.15086651 0.13272117 0.07777403 0.11673669 0.23523086 0.1522468 ]] probs:[[0.16757756 0.16456424 0.

INFO: outdir:result global_step:7638 local_step:3741 R:0.15000000000000002
INFO: statistics:[('average_value', 0.2392278595356674), ('average_entropy', 1.7442961527417582)]


DEBUG:chainerrl.agents.a3c:t:3742 r:0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.07533206 0.07403915 0.13859554 0.10624089 0.02220542 0.07654393]] probs:[[0.16542928 0.16521552 0.17623304 0.17062233 0.15686995 0.16562986]] entropy:[1.7911357]
DEBUG:chainerrl.agents.a3c:t:3743 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.05551033 0.10564509 0.11128451 0.09991651 0.02640225 0.10657102]] probs:[[0.16186792 0.17018999 0.17115247 0.16921784 0.15722416 0.17034765]] entropy:[1.7912605]
DEBUG:chainerrl.agents.a3c:t:3744 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.05242442 0.10791433 0.10883712 0.10055769 0.02549675 0.11150903]] probs:[[0.16132416 0.17052904 0.17068648 0.16927913 0.15703803 0.17114314]] entropy:[1.7912169]
DEBUG:chainerrl.agents.a3c:t:3745 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.05209072 0.10381599 0.10587861 0.10353133 0.02595082 0.10752437]] probs:[[0.16149007 0.17006299 0.17

DEBUG:chainerrl.agents.a3c:t:3771 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.02668966 0.06021153 0.06505641 0.11974536 0.03482424 0.02755771]] probs:[[0.16181855 0.16733494 0.16814764 0.17759955 0.16314024 0.16195907]] entropy:[1.791225]
DEBUG:chainerrl.agents.a3c:t:3772 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.02681089 0.06071743 0.06528909 0.1194171  0.03544012 0.0272887 ]] probs:[[0.16181515 0.16739583 0.16816285 0.17751603 0.16321754 0.16189249]] entropy:[1.7912291]
DEBUG:chainerrl.agents.a3c:t:3773 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.02671829 0.06083075 0.06536081 0.11915686 0.03554897 0.02719253]] probs:[[0.16180472 0.16741951 0.16817963 0.17747483 0.1632399  0.16188148]] entropy:[1.7912314]
DEBUG:chainerrl.agents.a3c:pi_loss:[-1.4716464] v_loss:[[1.8013607e-05]]
DEBUG:chainerrl.agents.a3c:grad norm:0.045638744026896015
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:3774 r:0.

DEBUG:chainerrl.agents.a3c:t:3800 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.06106094 0.13441549 0.11908384 0.06717089 0.0733761  0.02742331]] probs:[[0.16336417 0.17579815 0.17312342 0.16436538 0.16538846 0.15796039]] entropy:[1.7911049]
DEBUG:chainerrl.agents.a3c:t:3801 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.06105595 0.13451207 0.11935167 0.06739625 0.07326029 0.02722452]] probs:[[0.16335535 0.17580651 0.17316131 0.16439436 0.16536121 0.15792124]] entropy:[1.7911011]
DEBUG:chainerrl.agents.a3c:t:3802 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.06052293 0.13458762 0.11888614 0.06739174 0.0732216  0.02713657]] probs:[[0.16329695 0.17585064 0.17311108 0.16442247 0.16538382 0.15793505]] entropy:[1.7911009]
DEBUG:chainerrl.agents.a3c:t:3803 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.06105718 0.13451424 0.11944781 0.06746592 0.07311726 0.02701798]] probs:[[0.16336004 0.17581174 0.

DEBUG:chainerrl.agents.a3c:t:3829 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.02081033 0.15100649 0.14658529 0.03362389 0.01767912 0.09017529]] probs:[[0.15736444 0.17924626 0.17845552 0.15939382 0.15687248 0.16866751]] entropy:[1.7901577]
DEBUG:chainerrl.agents.a3c:pi_loss:[-1.4070555] v_loss:[[1.9490566e-05]]
DEBUG:chainerrl.agents.a3c:grad norm:0.0766303823591975
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:3830 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.01342372  0.1872254   0.2477091  -0.04936694 -0.01601158  0.0828053 ]] probs:[[0.15188986 0.18563916 0.19721381 0.14652741 0.1514973  0.16723245]] entropy:[1.7854183]
DEBUG:chainerrl.agents.a3c:t:3831 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.01417358  0.18842648  0.24874562 -0.04676202 -0.01428751  0.08415149]] probs:[[0.15159672 0.1856427  0.19718513 0.14673606 0.15157945 0.1672599 ]] entropy:[1.7854297]
DEBUG:chainerrl.agents.a3c:

DEBUG:chainerrl.agents.a3c:t:3857 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.37993824 -0.07588246  0.06918584 -0.14164765  0.23199138 -0.03362379]] probs:[[0.22300678 0.14137012 0.16344063 0.13237202 0.19233821 0.14747228]] entropy:[1.7742702]
DEBUG:chainerrl.agents.a3c:t:3858 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.38591415 -0.11537174  0.03220114 -0.14229685  0.19056663 -0.04714825]] probs:[[0.22890861 0.13866167 0.16071135 0.13497801 0.1882885  0.14845182]] entropy:[1.7731545]
DEBUG:chainerrl.agents.a3c:t:3859 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.38420206 -0.11621501  0.04307026 -0.12955503  0.18011963 -0.03911178]] probs:[[0.22801094 0.13823795 0.16210784 0.13640611 0.18591902 0.14931822]] entropy:[1.7740695]
DEBUG:chainerrl.agents.a3c:t:3860 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.383932   -0.1204927   0.03849765 -0.12934299  0.17461225 -0.0408651 ]] probs:[[

DEBUG:chainerrl.agents.a3c:pi_loss:[-1.4592676] v_loss:[[3.1263608e-05]]
DEBUG:chainerrl.agents.a3c:grad norm:0.2481829504935496
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:3886 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.09281367  0.0266653   0.04653721  0.09313505  0.17335667 -0.01235182]] probs:[[0.17020863 0.15931392 0.16251145 0.17026334 0.18448496 0.15321766]] entropy:[1.7899886]
DEBUG:chainerrl.agents.a3c:t:3887 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.09238537  0.02593607  0.04709392  0.09484006  0.17330006 -0.01230551]] probs:[[0.17010365 0.15916775 0.16257127 0.1705217  0.1844397  0.15319584]] entropy:[1.7899826]
DEBUG:chainerrl.agents.a3c:t:3888 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.09318153  0.02526754  0.04741046  0.09451066  0.17262599 -0.01092474]] probs:[[0.17022012 0.15904361 0.16260457 0.17044652 0.18429483 0.15339038]] entropy:[1.790008]
DEBUG:chainerrl.agents

DEBUG:chainerrl.agents.a3c:t:3914 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.05005419  0.06062644  0.06047373  0.0899974   0.17381719 -0.00849922]] probs:[[0.16295324 0.16468517 0.16466002 0.16959387 0.18442196 0.15368575]] entropy:[1.7902353]
DEBUG:chainerrl.agents.a3c:t:3915 r:0.1 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.04986747  0.0604337   0.06063764  0.08998226  0.17397289 -0.00846801]] probs:[[0.1629235  0.16465412 0.16468771 0.16959201 0.18445145 0.15369119]] entropy:[1.790232]
DEBUG:chainerrl.agents.a3c:t:3916 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.05057123  0.06117884  0.06001019  0.09002771  0.17335972 -0.00860529]] probs:[[0.16303696 0.1647756  0.16458315 0.16959842 0.18433696 0.15366891]] entropy:[1.7902445]
DEBUG:chainerrl.agents.a3c:t:3917 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.05067783  0.06128208  0.05991431  0.09002852  0.17327887 -0.00861854]] probs:[[0

DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:3942 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.16291538 -0.01275139  0.0701279   0.08192655  0.12160122  0.00863659]] probs:[[0.18218091 0.1528311  0.16603734 0.16800797 0.17480762 0.15613507]] entropy:[1.7899257]
DEBUG:chainerrl.agents.a3c:t:3943 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.16285545 -0.01268525  0.06998676  0.08179931  0.12156904  0.00863552]] probs:[[0.18217936 0.15284908 0.16602245 0.16799521 0.17481098 0.15614292]] entropy:[1.7899277]
DEBUG:chainerrl.agents.a3c:t:3944 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.16294494 -0.01278574  0.07019993  0.08198803  0.12161775  0.00863705]] probs:[[0.18218169 0.15282199 0.1660451  0.16801403 0.17480609 0.15613118]] entropy:[1.7899247]
DEBUG:chainerrl.agents.a3c:t:3945 r:0.2 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.16300409 -0.01286643  0.07038572  0.08211366 

DEBUG:chainerrl.agents.a3c:t:3971 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.14378084  0.05703088  0.09486292 -0.04007857  0.13146432 -0.01765395]] probs:[[0.18050869 0.16550957 0.17189108 0.15019275 0.17829908 0.15359883]] entropy:[1.7893493]
DEBUG:chainerrl.agents.a3c:t:3972 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.143789    0.05699061  0.09494839 -0.04005673  0.13147889 -0.01766248]] probs:[[0.18050762 0.16550057 0.17190336 0.15019391 0.17829917 0.15359534]] entropy:[1.789349]
DEBUG:chainerrl.agents.a3c:t:3973 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.14378047  0.05703024  0.09486187 -0.04007958  0.13146414 -0.01765416]] probs:[[0.18050873 0.16550954 0.17189099 0.1501927  0.17829916 0.15359887]] entropy:[1.7893494]
DEBUG:chainerrl.agents.a3c:pi_loss:[-1.5049155] v_loss:[[7.490395e-05]]
DEBUG:chainerrl.agents.a3c:grad norm:0.4337667983839976
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.

DEBUG:chainerrl.agents.a3c:t:3999 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.15846966  0.13026717  0.11991553  0.03537092  0.10402035 -0.00069545]] probs:[[0.1779863  0.17303677 0.17125478 0.15737128 0.16855419 0.1517966 ]] entropy:[1.7902417]
DEBUG:chainerrl.agents.a3c:t:4000 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.15925877  0.12607056  0.12341012  0.03779022  0.10578152 -0.00178827]] probs:[[0.17803283 0.17222121 0.17176364 0.15766925 0.16876222 0.15155081]] entropy:[1.790244]
DEBUG:chainerrl.agents.a3c:t:4001 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.1576902   0.12952028  0.11941448  0.03543067  0.10370799 -0.00128083]] probs:[[0.17793407 0.17299162 0.17125219 0.15745717 0.16858344 0.1517815 ]] entropy:[1.7902502]
DEBUG:chainerrl.agents.a3c:t:4002 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.16298057  0.1192157   0.13393153  0.04456984  0.11258159 -0.00415758]] probs:[[0

DEBUG:chainerrl.agents.a3c:t:4028 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.6199934  -0.53412104 -0.6499181   1.9148841  -0.4384226  -0.4015948 ]] probs:[[0.16795947 0.05296378 0.04717251 0.6131523  0.05828278 0.06046922]] entropy:[1.2345607]
DEBUG:chainerrl.agents.a3c:t:4029 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.6183015  -0.5348836  -0.65211326  1.9164741  -0.4406514  -0.40237024]] probs:[[0.1676133  0.05290375 0.04705159 0.6139     0.05813143 0.06039992]] entropy:[1.2331374]
DEBUG:chainerrl.agents.a3c:pi_loss:[-1.2040052] v_loss:[[0.00131614]]
DEBUG:chainerrl.agents.a3c:grad norm:489.26704528381833
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:4030 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.7643698  -0.48219433 -0.59138674  1.4525886  -0.3350794  -0.4421606 ]] probs:[[0.2399406  0.06898073 0.06184522 0.47752196 0.07991329 0.07179831]] entropy:[1.4430481]
DEBUG:chainerrl.agents.a

DEBUG:chainerrl.agents.a3c:t:4056 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.8426378  -0.34185645 -0.4616735   0.7275765  -0.09507965 -0.2756698 ]] probs:[[0.31378213 0.09598622 0.08514772 0.27967763 0.12285213 0.10255419]] entropy:[1.6458784]
DEBUG:chainerrl.agents.a3c:t:4057 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.8422205  -0.34344432 -0.46056712  0.7274425  -0.09607    -0.27538946]] probs:[[0.3137514  0.09586454 0.08526921 0.2797295  0.12276974 0.10261571]] entropy:[1.6458899]
DEBUG:chainerrl.agents.a3c:t:4058 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.8418981  -0.34483808 -0.45963454  0.727255   -0.09697706 -0.27509356]] probs:[[0.31374073 0.09575865 0.08537338 0.2797577  0.1226938  0.10267569]] entropy:[1.6458989]
DEBUG:chainerrl.agents.a3c:t:4059 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.8407824  -0.34971172 -0.4563469   0.72668815 -0.10026734 -0.27401707]] probs:[[

DEBUG:chainerrl.agents.a3c:t:4085 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.40180618 -0.12910555 -0.29946622  0.57674813 -0.09456604 -0.08699559]] probs:[[0.22235586 0.13076049 0.11027824 0.26486522 0.1353558  0.1363844 ]] entropy:[1.7377492]
DEBUG:chainerrl.agents.a3c:pi_loss:[-1.6880769] v_loss:[[0.00108369]]
DEBUG:chainerrl.agents.a3c:grad norm:14.182369005210095
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:4086 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.33806217 -0.06768911 -0.18029733  0.43982866 -0.06742423 -0.03161401]] probs:[[0.21156374 0.14100213 0.1259855  0.23422748 0.14103948 0.14618166]] entropy:[1.7631395]
DEBUG:chainerrl.agents.a3c:t:4087 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.32409972 -0.07433864 -0.24789824  0.45117056 -0.0673802  -0.04731018]] probs:[[0.21110058 0.14172609 0.11914448 0.23970418 0.14271574 0.14560898]] entropy:[1.7595317]
DEBUG:chainerrl.agents.a

DEBUG:chainerrl.agents.a3c:t:4113 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.26513416 -0.0882726  -0.13704395  0.26253307  0.0502841   0.10849151]] probs:[[0.19879816 0.13961425 0.13296846 0.19828175 0.160363   0.16997433]] entropy:[1.7798779]
DEBUG:chainerrl.agents.a3c:t:4114 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.2648979  -0.08785769 -0.13801396  0.2636099   0.05647514  0.10449212]] probs:[[0.19866914 0.13961452 0.1327847  0.1984134  0.16129227 0.16922599]] entropy:[1.7798822]
DEBUG:chainerrl.agents.a3c:t:4115 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.2859866  -0.10086562 -0.10912123  0.25067386  0.07443283  0.10856573]] probs:[[0.20141575 0.13679986 0.13567513 0.19442731 0.16301095 0.16867103]] entropy:[1.7801888]
DEBUG:chainerrl.agents.a3c:t:4116 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.28627855 -0.10139053 -0.10994972  0.25257862  0.07634775  0.10807863]] probs:[[

DEBUG:chainerrl.agents.a3c:pi_loss:[-1.6114291] v_loss:[[0.00043762]]
DEBUG:chainerrl.agents.a3c:grad norm:1.2489215079140485
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:4142 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.18713026 -0.01555167  0.11037813  0.16031718  0.04510076  0.11081609]] probs:[[0.18147762 0.14818336 0.16806994 0.17667632 0.1574492  0.16814357]] entropy:[1.7894881]
DEBUG:chainerrl.agents.a3c:t:4143 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.15550119 -0.01908479  0.08665821  0.15743649  0.02268578  0.09372044]] probs:[[0.1788611  0.15020838 0.16696206 0.17920758 0.15661556 0.16814534]] entropy:[1.7897065]
DEBUG:chainerrl.agents.a3c:t:4144 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.16181514 -0.02683632  0.08953734  0.153436    0.02450294  0.09383972]] probs:[[0.1799861  0.149042   0.16743611 0.17848428 0.15689354 0.16815804]] entropy:[1.7895643]
DEBUG:chainerrl.agents.a

DEBUG:chainerrl.agents.a3c:t:4170 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.13858771 -0.00132863  0.0322192   0.16483746  0.04787629  0.08015459]] probs:[[0.17693967 0.15383682 0.15908526 0.18164578 0.16159567 0.1668968 ]] entropy:[1.7900352]
DEBUG:chainerrl.agents.a3c:t:4171 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.13855527 -0.00118733  0.03234148  0.16490321  0.04790541  0.08018585]] probs:[[0.17692378 0.15384974 0.1590956  0.18164732 0.16159111 0.16689245]] entropy:[1.7900374]
DEBUG:chainerrl.agents.a3c:t:4172 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.1383405  -0.00114519  0.03185123  0.16576394  0.04716405  0.08057851]] probs:[[0.17688707 0.15385735 0.15901877 0.18180506 0.16147254 0.16695921]] entropy:[1.7900192]
DEBUG:chainerrl.agents.a3c:t:4173 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.13833502 -0.00114787  0.03183854  0.16577768  0.04714853  0.08058584]] probs:[[

DEBUG:chainerrl.agents.a3c:t:4199 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.14013472 0.05901788 0.08502928 0.04176897 0.02990666 0.12668666]] probs:[[0.17677024 0.16299736 0.16729277 0.16020994 0.15832071 0.17440894]] entropy:[1.790901]
DEBUG:chainerrl.agents.a3c:t:4200 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.14159623 0.05759579 0.08590584 0.04244785 0.03253197 0.12477623]] probs:[[0.17696407 0.16270623 0.16737828 0.16026013 0.15867886 0.17401242]] entropy:[1.7909198]
DEBUG:chainerrl.agents.a3c:t:4201 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.14215478 0.05814135 0.08569668 0.04073249 0.03231734 0.12540439]] probs:[[0.1770712  0.16280262 0.16735107 0.15999293 0.15865222 0.1741299 ]] entropy:[1.7908987]
DEBUG:chainerrl.agents.a3c:t:4202 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.1422355  0.05917299 0.08468165 0.03808468 0.03077916 0.12622678]] probs:[[0.17717606 0.16305399 0.1

DEBUG:chainerrl.agents.a3c:t:4228 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.1460573  0.06908589 0.04817616 0.09377723 0.06335896 0.09393071]] probs:[[0.17694187 0.16383336 0.16044323 0.167929   0.16289778 0.16795477]] entropy:[1.7912545]
DEBUG:chainerrl.agents.a3c:t:4229 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.14577973 0.06947073 0.0472637  0.09195445 0.0620442  0.09424981]] probs:[[0.17699872 0.16399461 0.16039291 0.16772358 0.16278121 0.168109  ]] entropy:[1.7912493]
DEBUG:chainerrl.agents.a3c:pi_loss:[-1.4827446] v_loss:[[5.225455e-05]]
DEBUG:chainerrl.agents.a3c:grad norm:0.2452135272986963
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:4230 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.13430522 0.07733018 0.06157076 0.09503745 0.04003124 0.07608339]] probs:[[0.17576446 0.1660302  0.16343418 0.16899632 0.15995152 0.16582333]] entropy:[1.7913272]
DEBUG:chainerrl.agents.a3c:t:4231 r:0.0 

DEBUG:chainerrl.agents.a3c:t:4257 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.15370193 0.06322442 0.0545068  0.0588861  0.05602009 0.13396104]] probs:[[0.17806296 0.1626596  0.16124776 0.16195546 0.16149196 0.1745823 ]] entropy:[1.7909074]
DEBUG:chainerrl.agents.a3c:t:4258 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.15199837 0.06270829 0.05360808 0.06262133 0.05767476 0.13917075]] probs:[[0.17753732 0.16237213 0.1609012  0.16235802 0.16155687 0.17527449]] entropy:[1.7909018]
DEBUG:chainerrl.agents.a3c:t:4259 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.16374202 0.06938101 0.05319379 0.05790645 0.05132608 0.12116911]] probs:[[0.17995766 0.16375323 0.16112384 0.16188496 0.1608232  0.17245711]] entropy:[1.790852]
DEBUG:chainerrl.agents.a3c:t:4260 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.16328616 0.06902887 0.05376588 0.05790969 0.05206081 0.12246204]] probs:[[0.1798227  0.1636474  0.1

DEBUG:chainerrl.agents.a3c:pi_loss:[-1.2859714] v_loss:[[0.00025205]]
DEBUG:chainerrl.agents.a3c:grad norm:1.0646328444462774
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:4286 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.14519149 0.0745741  0.06058774 0.05492946 0.05485665 0.13007839]] probs:[[0.17658447 0.16454466 0.16225928 0.16134378 0.16133203 0.1739358 ]] entropy:[1.7910691]
DEBUG:chainerrl.agents.a3c:t:4287 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.15040654 0.05458779 0.05213739 0.05264543 0.03946989 0.12564714]] probs:[[0.17881285 0.16247448 0.16207685 0.16215922 0.16003668 0.17443992]] entropy:[1.7908366]
DEBUG:chainerrl.agents.a3c:t:4288 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.15068722 0.05270351 0.04989827 0.05291006 0.03826152 0.1247654 ]] probs:[[0.17902817 0.16231833 0.16186363 0.16235186 0.15999098 0.17444706]] entropy:[1.7908143]
DEBUG:chainerrl.agents.a3c:t:4289 r:0.0 a:

DEBUG:chainerrl.agents.a3c:t:4315 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.06018308 0.11787058 0.08576841 0.10596916 0.04401026 0.04315954]] probs:[[0.16395518 0.17369148 0.16820416 0.17163657 0.16132489 0.16118771]] entropy:[1.7913334]
DEBUG:chainerrl.agents.a3c:t:4316 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.06454194 0.11294467 0.07204071 0.10754829 0.05137186 0.06418754]] probs:[[0.16426761 0.17241418 0.16550405 0.17148629 0.16211839 0.16420941]] entropy:[1.7914903]
DEBUG:chainerrl.agents.a3c:t:4317 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.09176382 0.08322633 0.01170776 0.11015317 0.0595367  0.08250871]] probs:[[0.16971599 0.16827321 0.15665881 0.17286582 0.16433372 0.1681525 ]] entropy:[1.7912805]
DEBUG:chainerrl.agents.a3c:pi_loss:[-1.6024663] v_loss:[[0.00035282]]
DEBUG:chainerrl.agents.a3c:grad norm:4.24236058431154
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:4318 r:0.0 a:4 

DEBUG:chainerrl.agents.a3c:t:4344 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.08820612 0.07507972 0.04595425 0.10101885 0.030491   0.0733549 ]] probs:[[0.16984676 0.16763185 0.1628199  0.17203696 0.16032155 0.16734296]] entropy:[1.7914727]
DEBUG:chainerrl.agents.a3c:t:4345 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.08752857 0.08640477 0.04547723 0.10003094 0.0411363  0.06793591]] probs:[[0.16933241 0.16914222 0.16235939 0.17146274 0.16165613 0.16604702]] entropy:[1.7915181]
DEBUG:chainerrl.agents.a3c:t:4346 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.08813263 0.1046178  0.05360579 0.10209721 0.05318438 0.06713272]] probs:[[0.16830423 0.17110176 0.1625924  0.17067103 0.1625239  0.16480672]] entropy:[1.7915329]
DEBUG:chainerrl.agents.a3c:t:4347 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.08840434 0.10496756 0.05368941 0.10205068 0.05283662 0.06723898]] probs:[[0.16833782 0.17114925 0.

DEBUG:chainerrl.agents.a3c:pi_loss:[-1.670989] v_loss:[[0.00065987]]
DEBUG:chainerrl.agents.a3c:grad norm:5.3108233630101935
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:4374 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.06285958 0.08250977 0.06433431 0.15914328 0.15926684 0.07432613]] probs:[[0.16038153 0.16356422 0.16061822 0.17659152 0.17661335 0.16223115]] entropy:[1.7908589]
DEBUG:chainerrl.agents.a3c:t:4375 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.06287164 0.08235462 0.06442569 0.15942599 0.15911748 0.07440083]] probs:[[0.16037916 0.16353446 0.1606286  0.17663671 0.17658222 0.1622389 ]] entropy:[1.7908579]
DEBUG:chainerrl.agents.a3c:t:4376 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.06613433 0.06462742 0.05220219 0.17187026 0.15522233 0.08719535]] probs:[[0.1610138  0.16077134 0.15878609 0.1789714  0.17601655 0.16444086]] entropy:[1.7906518]
DEBUG:chainerrl.agents.a3c:t:4377 r:0.0 a:2

DEBUG:chainerrl.agents.a3c:t:4403 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.0002718   0.04165522  0.06066765  0.2252094   0.25309715 -0.02974899]] probs:[[0.1511753  0.15756272 0.16058703 0.18930846 0.19466215 0.14670435]] entropy:[1.785702]
DEBUG:chainerrl.agents.a3c:t:4404 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.00394886  0.04582838  0.05612113  0.24350028  0.23730254 -0.00844493]] probs:[[0.15010378 0.15776463 0.15939684 0.19224605 0.19105825 0.14943041]] entropy:[1.7860076]
DEBUG:chainerrl.agents.a3c:t:4405 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.00073333  0.0429033   0.05995489  0.22635649  0.25079137 -0.02872127]] probs:[[0.15123421 0.15774813 0.16046104 0.18951209 0.19419983 0.14684463]] entropy:[1.7857761]
DEBUG:chainerrl.agents.a3c:pi_loss:[-1.6776793] v_loss:[[0.00071554]]
DEBUG:chainerrl.agents.a3c:grad norm:1.4822682189377971
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3

DEBUG:chainerrl.agents.a3c:t:4431 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.18585087  0.2300436   0.16363592  0.12831728 -0.20752306  0.09002624]] probs:[[0.18015733 0.18829753 0.17619927 0.17008477 0.12156592 0.16369517]] entropy:[1.7828102]
DEBUG:chainerrl.agents.a3c:t:4432 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.18575582  0.23006247  0.16366005  0.12841742 -0.20747946  0.09018442]] probs:[[0.18013321 0.18829376 0.17619668 0.17009519 0.12156649 0.16371469]] entropy:[1.7828128]
DEBUG:chainerrl.agents.a3c:t:4433 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.18576825  0.23006907  0.16364972  0.12843497 -0.20748515  0.09017527]] probs:[[0.180135   0.18829453 0.17619441 0.17009775 0.12156549 0.16371278]] entropy:[1.7828121]
DEBUG:chainerrl.agents.a3c:t:4434 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.1857665   0.23007053  0.1636499   0.12843828 -0.20748599  0.09018081]] probs:[[

DEBUG:chainerrl.agents.a3c:t:4460 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.15275913  0.24650814  0.09111873  0.14394604 -0.10068758  0.06666049]] probs:[[0.17472267 0.19189513 0.16427793 0.1731896  0.13560596 0.1603087 ]] entropy:[1.7863891]
DEBUG:chainerrl.agents.a3c:t:4461 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.15253405  0.2465798   0.09097462  0.14398417 -0.10034801  0.06679167]] probs:[[0.17467909 0.19190419 0.16425024 0.17319196 0.1356487  0.16032581]] entropy:[1.7863986]
DEBUG:chainerrl.agents.a3c:pi_loss:[-1.7037002] v_loss:[[0.00111877]]
DEBUG:chainerrl.agents.a3c:grad norm:2.2705933010725614
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:4462 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.16384941  0.21307607  0.08533247  0.13876446 -0.0715166   0.07735281]] probs:[[0.17675364 0.18567236 0.16340634 0.17237493 0.13968515 0.1621076 ]] entropy:[1.7879113]
DEBUG:chainerrl.agents.a

DEBUG:chainerrl.agents.a3c:t:4488 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.03628499  0.17676337  0.19528802  0.08522467  0.01836705  0.15685368]] probs:[[0.14500213 0.1794321  0.18278699 0.16373645 0.15314734 0.17589498]] entropy:[1.7882131]
DEBUG:chainerrl.agents.a3c:t:4489 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.03689365  0.1781866   0.19501437  0.08614459  0.01936239  0.15598316]] probs:[[0.14487515 0.1796396  0.1826881  0.16384332 0.15325885 0.17569493]] entropy:[1.7882109]
DEBUG:chainerrl.agents.a3c:t:4490 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.04401444  0.1753374   0.22083683  0.10609516  0.02456373  0.18654779]] probs:[[0.14203012 0.17686576 0.18509893 0.16503355 0.15211204 0.17885964]] entropy:[1.7874538]
DEBUG:chainerrl.agents.a3c:t:4491 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.06798497  0.18683599  0.21279338  0.1131536   0.01877316  0.19113426]] probs:[[

DEBUG:chainerrl.agents.a3c:t:4517 r:0.3 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.06330764 0.15097092 0.21415408 0.11306581 0.012235   0.10240075]] probs:[[0.15884282 0.17339607 0.18470529 0.16694647 0.15093397 0.16517544]] entropy:[1.7897221]
DEBUG:chainerrl.agents.a3c:pi_loss:[2.130282] v_loss:[[0.1370855]]
DEBUG:chainerrl.agents.a3c:grad norm:399.3685577416367
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:4518 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.2890275  -0.03935855  0.01569907  0.07031602  0.0675272   0.41831145]] probs:[[0.19143116 0.1378467  0.14564902 0.15382516 0.15339679 0.21785118]] entropy:[1.77776]
DEBUG:chainerrl.agents.a3c:t:4519 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.2882293  -0.03770555  0.01614582  0.06997118  0.0694888   0.42028975]] probs:[[0.1911217  0.13796164 0.14559473 0.15364616 0.15357207 0.21810375]] entropy:[1.7777493]
DEBUG:chainerrl.agents.a3c:t:4520 r:

INFO: outdir:result global_step:9261 local_step:4537 R:2.1
INFO: statistics:[('average_value', 0.39674253617866223), ('average_entropy', 1.759845255554798)]


DEBUG:chainerrl.agents.a3c:t:4538 r:0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.5461588   0.1308433  -0.03373928  0.39865193 -1.0163598   0.51050425]] probs:[[0.23487824 0.15505065 0.13152134 0.20266622 0.04923224 0.2266513 ]] entropy:[1.704254]
DEBUG:chainerrl.agents.a3c:t:4539 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.5987685   0.16603902 -0.10595927  0.43445656 -1.1480587   0.6224102 ]] probs:[[0.23868023 0.15484045 0.11796606 0.20251477 0.04160821 0.24439026]] entropy:[1.6829487]
DEBUG:chainerrl.agents.a3c:t:4540 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.6053524   0.1733794  -0.11417143  0.43860823 -1.1728297   0.6413072 ]] probs:[[0.23876715 0.15501407 0.1162758  0.20209636 0.04033851 0.24750817]] entropy:[1.6794202]
DEBUG:chainerrl.agents.a3c:t:4541 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.6064355   0.17542908 -0.1150846   0.43837795 -1.1776297   0.6442037 ]] probs:[[0.2

DEBUG:chainerrl.agents.a3c:t:4567 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.42757523  0.22481723  0.02599576  0.39078254 -1.0101289   0.43271244]] probs:[[0.21311799 0.17400569 0.1426318  0.2054193  0.05060963 0.21421564]] entropy:[1.7176875]
DEBUG:chainerrl.agents.a3c:t:4568 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.42757523  0.22481726  0.02599573  0.39078254 -1.0101287   0.43271244]] probs:[[0.21311797 0.17400566 0.14263178 0.20541927 0.05060963 0.2142156 ]] entropy:[1.7176874]
DEBUG:chainerrl.agents.a3c:t:4569 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.42745417  0.22463484  0.02585451  0.39081088 -1.0101054   0.43258867]] probs:[[0.2131129  0.17399086 0.14262553 0.2054451  0.05061574 0.21420994]] entropy:[1.7176919]
DEBUG:chainerrl.agents.a3c:pi_loss:[-1.3746047] v_loss:[[5.067186e-08]]
DEBUG:chainerrl.agents.a3c:grad norm:1.0911317467171227
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents

DEBUG:chainerrl.agents.a3c:t:4595 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.3053235   0.19900541  0.15945081  0.25339139 -0.69860613  0.2434297 ]] probs:[[0.19923419 0.17913914 0.17219168 0.18915161 0.07300671 0.18727669]] entropy:[1.752146]
DEBUG:chainerrl.agents.a3c:t:4596 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.30717456  0.19965589  0.15772425  0.25124168 -0.70372057  0.23779507]] probs:[[0.19993167 0.17955057 0.17217739 0.1890559  0.07275375 0.18653075]] entropy:[1.7518789]
DEBUG:chainerrl.agents.a3c:t:4597 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.30609688  0.19823998  0.15785109  0.25154072 -0.70042896  0.23981412]] probs:[[0.1996712  0.17925602 0.17216033 0.18906972 0.07297713 0.18686552]] entropy:[1.7520934]
DEBUG:chainerrl.agents.a3c:t:4598 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.30455017  0.1976927   0.15865631  0.25240773 -0.69833386  0.24181749]] probs:[[0

DEBUG:chainerrl.agents.a3c:t:4624 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.22708036  0.1910093   0.17453206  0.17319794 -0.54556966  0.16429707]] probs:[[0.19004692 0.18331389 0.18031813 0.18007773 0.08776133 0.178482  ]] entropy:[1.7652895]
DEBUG:chainerrl.agents.a3c:t:4625 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.22476426  0.19060025  0.17617051  0.1743587  -0.5435542   0.16657186]] probs:[[0.18949853 0.18313384 0.18051025 0.18018349 0.08788797 0.17878588]] entropy:[1.7654142]
DEBUG:chainerrl.agents.a3c:pi_loss:[-1.5105847] v_loss:[[0.00015238]]
DEBUG:chainerrl.agents.a3c:grad norm:1.2462484214131626
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:4626 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.19503275  0.17870732  0.16363609  0.17548639 -0.4795482   0.16385251]] probs:[[0.18476732 0.1817754  0.17905638 0.18119086 0.09411491 0.17909513]] entropy:[1.7698613]
DEBUG:chainerrl.agents.a

DEBUG:chainerrl.agents.a3c:t:4652 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.13214362  0.15814398  0.10620843  0.16065161 -0.31065154  0.16433573]] probs:[[0.17529675 0.1799143  0.17080885 0.18036601 0.11258237 0.18103173]] entropy:[1.7799085]
DEBUG:chainerrl.agents.a3c:t:4653 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.13214253  0.15824094  0.106149    0.16067563 -0.31060892  0.164388  ]] probs:[[0.17529206 0.17992713 0.17079431 0.18036571 0.11258427 0.18103655]] entropy:[1.7799084]
DEBUG:chainerrl.agents.a3c:t:4654 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.13214287  0.15825652  0.10613907  0.16067831 -0.31060696  0.1643985 ]] probs:[[0.17529145 0.17992924 0.17079197 0.18036552 0.11258407 0.18103777]] entropy:[1.7799082]
DEBUG:chainerrl.agents.a3c:t:4655 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.13214266  0.15825354  0.10614092  0.16067448 -0.3106166   0.16439667]] probs:[[

DEBUG:chainerrl.agents.a3c:t:4681 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.05962554  0.25726983  0.1056143   0.32066107 -0.18079583  0.03973553]] probs:[[0.14274321 0.19596629 0.16839074 0.20879103 0.12645379 0.15765488]] entropy:[1.7770395]
DEBUG:chainerrl.agents.a3c:pi_loss:[-1.5546366] v_loss:[[0.00024382]]
DEBUG:chainerrl.agents.a3c:grad norm:1.1580067540327375
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:4682 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.03311953  0.2302285   0.09370925  0.27253714 -0.12990382  0.05154714]] probs:[[0.1472727  0.19164295 0.1671873  0.19992507 0.13368705 0.16028486]] entropy:[1.7820559]
DEBUG:chainerrl.agents.a3c:t:4683 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.03342981  0.22928824  0.09331697  0.2725141  -0.12872264  0.05048659]] probs:[[0.14727236 0.19152182 0.1671732  0.19998205 0.1338863  0.16016428]] entropy:[1.7821014]
DEBUG:chainerrl.agents.a

DEBUG:chainerrl.agents.a3c:t:4709 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.05875462  0.07560971  0.10182358  0.24524394 -0.11208801  0.06646477]] probs:[[0.16348648 0.16626541 0.1706815  0.19700314 0.13781163 0.16475187]] entropy:[1.7864064]
DEBUG:chainerrl.agents.a3c:t:4710 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.05842878  0.07627951  0.10287392  0.24645509 -0.11203436  0.06717823]] probs:[[0.16333501 0.16627684 0.1707582  0.19712335 0.1377362  0.16477038]] entropy:[1.7863672]
DEBUG:chainerrl.agents.a3c:t:4711 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.05833044  0.07651614  0.10325359  0.24688663 -0.11199921  0.06742212]] probs:[[0.16328332 0.1662799  0.17078577 0.1971654  0.13771099 0.1647746 ]] entropy:[1.7863537]
DEBUG:chainerrl.agents.a3c:t:4712 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.05924916  0.07469934  0.10041992  0.24360727 -0.11211523  0.0654707 ]] probs:[[

DEBUG:chainerrl.agents.a3c:pi_loss:[-1.5430996] v_loss:[[0.00015391]]
DEBUG:chainerrl.agents.a3c:grad norm:0.4343095398339647
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:4738 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.06617301  0.08153145  0.07793056  0.1805815  -0.0555311   0.05797786]] probs:[[0.1659541  0.16852257 0.16791682 0.18606938 0.14693746 0.16459963]] entropy:[1.7894254]
DEBUG:chainerrl.agents.a3c:t:4739 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.06618046  0.08154505  0.07793287  0.1805854  -0.05554301  0.05798706]] probs:[[0.1659546  0.16852412 0.16791648 0.18606931 0.14693508 0.16460043]] entropy:[1.7894253]
DEBUG:chainerrl.agents.a3c:t:4740 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.06586372  0.08124613  0.07804258  0.18044339 -0.05587151  0.05827879]] probs:[[0.1659205  0.16849248 0.16795357 0.18606356 0.14690314 0.16466676]] entropy:[1.7894225]
DEBUG:chainerrl.agents.a

DEBUG:chainerrl.agents.a3c:t:4767 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.11593145 0.06665358 0.11414349 0.03137291 0.0711337  0.05076428]] probs:[[0.17354634 0.16520163 0.17323633 0.15947482 0.16594341 0.16259743]] entropy:[1.7912759]
DEBUG:chainerrl.agents.a3c:t:4768 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.11588515 0.06730224 0.11539895 0.03285934 0.07124285 0.05158509]] probs:[[0.17341593 0.16519226 0.17333163 0.15959942 0.1658445  0.16261621]] entropy:[1.7912828]
DEBUG:chainerrl.agents.a3c:t:4769 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.08402882 0.06934448 0.1255899  0.04741075 0.08321423 0.05606352]] probs:[[0.16768643 0.16524205 0.17480251 0.16165714 0.16754988 0.16306199]] entropy:[1.7914369]
DEBUG:chainerrl.agents.a3c:pi_loss:[-1.4682076] v_loss:[[3.258542e-05]]
DEBUG:chainerrl.agents.a3c:grad norm:0.14779193402664612
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:4770 r:0.0

DEBUG:chainerrl.agents.a3c:t:4796 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.14254752 0.15198095 0.21763624 0.02871936 0.01834243 0.10285241]] probs:[[0.17169888 0.17332625 0.18508792 0.15322602 0.15164421 0.16501677]] entropy:[1.7893068]
DEBUG:chainerrl.agents.a3c:t:4797 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.14260866 0.15206501 0.21756408 0.028272   0.01872612 0.10268386]] probs:[[0.17171392 0.1733454  0.18507946 0.15316154 0.15170643 0.16499333]] entropy:[1.7893068]
DEBUG:chainerrl.agents.a3c:t:4798 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.14260863 0.1520649  0.21756408 0.02827212 0.01872604 0.102684  ]] probs:[[0.17171387 0.17334536 0.18507943 0.15316153 0.1517064  0.16499332]] entropy:[1.7893066]
DEBUG:chainerrl.agents.a3c:t:4799 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.1425475  0.15198095 0.2176362  0.02871937 0.01834244 0.10285237]] probs:[[0.17169888 0.17332625 0.

DEBUG:chainerrl.agents.a3c:t:4825 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.16187246 0.10821339 0.13853937 0.06584518 0.07487956 0.11882022]] probs:[[0.17520264 0.16604921 0.17116195 0.15916096 0.16060539 0.16781984]] entropy:[1.7911962]
DEBUG:chainerrl.agents.a3c:pi_loss:[-1.1415744] v_loss:[[0.00084418]]
DEBUG:chainerrl.agents.a3c:grad norm:1.811098575221489
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:4826 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.15458581 0.11963901 0.13081186 0.044933   0.10224696 0.11231057]] probs:[[0.1740365  0.16805953 0.16994776 0.15596198 0.1651619  0.16683242]] entropy:[1.7912028]
DEBUG:chainerrl.agents.a3c:t:4827 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.15458494 0.11963572 0.13081317 0.04494792 0.1022352  0.11231482]] probs:[[0.17403623 0.16805887 0.16994788 0.15596421 0.16515985 0.16683301]] entropy:[1.7912029]
DEBUG:chainerrl.agents.a3c:t:4828 r:0.0 a:5

DEBUG:chainerrl.agents.a3c:t:4854 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.04854074 0.01628158 0.21794896 0.07027146 0.27726108 0.09807942]] probs:[[0.154259   0.14936213 0.18273576 0.15764783 0.19390208 0.16209322]] entropy:[1.7871786]
DEBUG:chainerrl.agents.a3c:t:4855 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.04795177 0.01597206 0.21798383 0.07033613 0.2771566  0.09812201]] probs:[[0.1541888  0.14933589 0.18276659 0.15767913 0.19390777 0.16212182]] entropy:[1.7871692]
DEBUG:chainerrl.agents.a3c:t:4856 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.04835633 0.01617549 0.21802954 0.0702356  0.27713805 0.09817814]] probs:[[0.15423718 0.14935271 0.18275835 0.15764897 0.19388658 0.16211621]] entropy:[1.7871768]
DEBUG:chainerrl.agents.a3c:t:4857 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.04833044 0.01616235 0.21802874 0.07024031 0.2771364  0.09817724]] probs:[[0.1542341  0.14935163 0.

DEBUG:chainerrl.agents.a3c:t:4883 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.08916682 0.15630516 0.15842074 0.02299652 0.23670526 0.08514507]] probs:[[0.16045988 0.17160276 0.17196617 0.15018585 0.18596943 0.15981585]] entropy:[1.7894237]
DEBUG:chainerrl.agents.a3c:t:4884 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.088104   0.15800622 0.1575339  0.022669   0.23337789 0.08269873]] probs:[[0.16046403 0.17208217 0.17200091 0.15030023 0.1855536  0.15959904]] entropy:[1.789456]
DEBUG:chainerrl.agents.a3c:t:4885 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.09006535 0.15501814 0.15877642 0.02362923 0.23920324 0.08639266]] probs:[[0.16048467 0.17125458 0.17189941 0.15016915 0.18629591 0.15989634]] entropy:[1.7894022]
DEBUG:chainerrl.agents.a3c:t:4886 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.09006944 0.15501282 0.15877837 0.02363102 0.23921382 0.0863991 ]] probs:[[0.16048478 0.17125309 0.1

DEBUG:chainerrl.agents.a3c:t:4913 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.09477475 0.20562896 0.11078803 0.07561492 0.15516712 0.10628408]] probs:[[0.16159719 0.18054156 0.16420573 0.15853049 0.17165715 0.16346781]] entropy:[1.7907943]
DEBUG:chainerrl.agents.a3c:pi_loss:[-1.6852084] v_loss:[[0.00082634]]
DEBUG:chainerrl.agents.a3c:grad norm:4.161480361533803
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:4914 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.32389146 0.23411277 0.04090855 0.02403519 0.07449687 0.04713617]] probs:[[0.20217937 0.18481894 0.15234876 0.14979969 0.15755281 0.15330048]] entropy:[1.784943]
DEBUG:chainerrl.agents.a3c:t:4915 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.32366297 0.23423915 0.04082508 0.02399421 0.07406574 0.04685424]] probs:[[0.20216405 0.18487051 0.1523593  0.14981642 0.15750895 0.15328069]] entropy:[1.784939]
DEBUG:chainerrl.agents.a3c:t:4916 r:0.0 a:1 p

DEBUG:chainerrl.agents.a3c:t:4942 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.24029915 0.18042195 0.06067305 0.07264394 0.07173488 0.05917797]] probs:[[0.18859671 0.17763549 0.15758812 0.15948594 0.15934101 0.15735269]] entropy:[1.7891853]
DEBUG:chainerrl.agents.a3c:t:4943 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.24045765 0.17690448 0.06143324 0.07210703 0.06293516 0.05946907]] probs:[[0.18898754 0.17735049 0.15800974 0.15970534 0.15824723 0.15769969]] entropy:[1.7891539]
DEBUG:chainerrl.agents.a3c:t:4944 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.24075852 0.17421949 0.06175534 0.07171725 0.05776961 0.05995664]] probs:[[0.18926555 0.17708184 0.15824555 0.15982984 0.15761606 0.15796116]] entropy:[1.7891331]
DEBUG:chainerrl.agents.a3c:t:4945 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.24066453 0.1732069  0.061992   0.07166982 0.05601826 0.05989788]] probs:[[0.18933338 0.17698266 0.

DEBUG:chainerrl.agents.a3c:pi_loss:[-1.033228] v_loss:[[0.05952901]]
DEBUG:chainerrl.agents.a3c:grad norm:50.25939362176477
DEBUG:chainerrl.agents.a3c:update


INFO: outdir:result global_step:10150 local_step:4970 R:0.5
INFO: statistics:[('average_value', 0.3856706560893891), ('average_entropy', 1.7661531557169878)]


DEBUG:chainerrl.agents.a3c:t:4971 r:0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.1016513   0.06161417  0.18173274  0.1846979   0.12745589  0.07619061]] probs:[[0.13719991 0.1615322  0.18214864 0.18268953 0.17252569 0.163904  ]] entropy:[1.7873367]
DEBUG:chainerrl.agents.a3c:t:4972 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.13477027  0.09381057  0.14311317  0.18110052  0.15923405  0.11597606]] probs:[[0.13200668 0.165908   0.17429268 0.18104097 0.17712522 0.16962649]] entropy:[1.786752]
DEBUG:chainerrl.agents.a3c:t:4973 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.1392448   0.0994478   0.13679495  0.18119556  0.16410758  0.12202995]] probs:[[0.13126463 0.16665201 0.17299367 0.18084778 0.1777837  0.17045818]] entropy:[1.7865899]
DEBUG:chainerrl.agents.a3c:t:4974 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.13992786  0.10043259  0.13588125  0.1811449   0.1644692   0.12276198]] probs:[[0.1

DEBUG:chainerrl.agents.a3c:t:5000 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.09675982  0.00980401  0.20348048  0.10085242  0.13675699  0.18870142]] probs:[[0.1374659  0.1529238  0.18560414 0.1675008  0.17362411 0.18288127]] entropy:[1.7865077]
DEBUG:chainerrl.agents.a3c:t:5001 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.09670759  0.0097827   0.20356205  0.10077246  0.13672973  0.18870227]] probs:[[0.13747294 0.15292037 0.1856191  0.16748723 0.1736192  0.18288122]] entropy:[1.7865072]
DEBUG:chainerrl.agents.a3c:t:5002 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.09668081  0.00977167  0.20360422  0.10073128  0.13671581  0.18870285]] probs:[[0.13747652 0.15291859 0.1856268  0.16748022 0.17361666 0.18288122]] entropy:[1.786507]
DEBUG:chainerrl.agents.a3c:pi_loss:[-1.5529509] v_loss:[[0.00020699]]
DEBUG:chainerrl.agents.a3c:grad norm:0.9931253637617801
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3

DEBUG:chainerrl.agents.a3c:t:5028 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.09874114  0.04323864  0.07877273  0.10758439  0.14812782  0.25857332]] probs:[[0.13725992 0.15819937 0.16392191 0.16871347 0.17569426 0.19621104]] entropy:[1.7860268]
DEBUG:chainerrl.agents.a3c:t:5029 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.09863734  0.04400631  0.07901154  0.10811478  0.14829138  0.25883698]] probs:[[0.13722685 0.15826629 0.16390455 0.1687448  0.17566241 0.19619513]] entropy:[1.7860277]
DEBUG:chainerrl.agents.a3c:t:5030 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.09823384  0.0462945   0.07966723  0.10980128  0.14887507  0.25958517]] probs:[[0.1371369  0.15846093 0.16383845 0.1688507  0.17557892 0.19613414]] entropy:[1.786032]
DEBUG:chainerrl.agents.a3c:t:5031 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.09856393  0.04454425  0.07917808  0.10848679  0.14840537  0.2590222 ]] probs:[[0

DEBUG:chainerrl.agents.a3c:t:5057 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.0352659   0.065836    0.05265292  0.07480139  0.14753048  0.20936826]] probs:[[0.1472241  0.16288717 0.16075392 0.16435409 0.17675282 0.18802784]] entropy:[1.7887999]
DEBUG:chainerrl.agents.a3c:t:5058 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.03533927  0.06579408  0.052713    0.07481302  0.14757295  0.20937386]] probs:[[0.14721294 0.16287994 0.16076317 0.16435559 0.1767599  0.18802844]] entropy:[1.7887983]
DEBUG:chainerrl.agents.a3c:pi_loss:[-1.5627762] v_loss:[[0.00019872]]
DEBUG:chainerrl.agents.a3c:grad norm:0.5205842209560841
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:5059 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.01036626  0.05393358  0.06701341  0.04547     0.15551914  0.20586449]] probs:[[0.15091799 0.16094078 0.16305968 0.15958439 0.1781493  0.1873479 ]] entropy:[1.7890795]
DEBUG:chainerrl.agents.a

DEBUG:chainerrl.agents.a3c:t:5085 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.00168379  0.04714432  0.07673004  0.06050471  0.16972652  0.18251978]] probs:[[0.15186003 0.15945907 0.16424726 0.16160381 0.18025446 0.1825753 ]] entropy:[1.7895522]
DEBUG:chainerrl.agents.a3c:t:5086 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.0014008   0.04705545  0.07705114  0.06001903  0.16968003  0.18251832]] probs:[[0.15190385 0.15944579 0.16430093 0.16152625 0.18024708 0.18257608]] entropy:[1.7895546]
DEBUG:chainerrl.agents.a3c:t:5087 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.00150476  0.04708688  0.07693432  0.06019798  0.1696992   0.18252005]] probs:[[0.15188766 0.15945038 0.16428131 0.16155471 0.18025006 0.1825759 ]] entropy:[1.7895536]
DEBUG:chainerrl.agents.a3c:t:5088 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.0014468   0.04706816  0.07699879  0.06009671  0.16968603  0.18251781]] probs:[[

DEBUG:chainerrl.agents.a3c:t:5114 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.04534915 -0.00714451 -0.00519477  0.09759768  0.17546263  0.21806875]] probs:[[0.1592179  0.15107554 0.1513704  0.16775796 0.18134245 0.1892357 ]] entropy:[1.788018]
DEBUG:chainerrl.agents.a3c:pi_loss:[-1.3630517] v_loss:[[6.8251786e-05]]
DEBUG:chainerrl.agents.a3c:grad norm:0.25608280136322703
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:5115 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.05464164 0.00688986 0.01088499 0.08709421 0.15793541 0.20535457]] probs:[[0.16090047 0.15339775 0.15401182 0.16620776 0.17840919 0.187073  ]] entropy:[1.7890165]
DEBUG:chainerrl.agents.a3c:t:5116 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.05464203 0.00659157 0.01094469 0.08766539 0.15824343 0.20522529]] probs:[[0.16088618 0.15333831 0.15400727 0.16628788 0.17844824 0.18703213]] entropy:[1.789013]
DEBUG:chainerrl.agents.a3c:t:5117 

DEBUG:chainerrl.agents.a3c:t:5143 r:0.05 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.02166282 0.06212366 0.08988092 0.12897864 0.11454521 0.08425152]] probs:[[0.15656567 0.16303034 0.16761902 0.17430234 0.17180462 0.16667807]] entropy:[1.7911537]
DEBUG:chainerrl.agents.a3c:t:5144 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.01536649 0.06649554 0.08953866 0.13196519 0.12056452 0.0877785 ]] probs:[[0.15530007 0.16344689 0.16725695 0.17450577 0.17252758 0.1669628 ]] entropy:[1.7910469]
DEBUG:chainerrl.agents.a3c:t:5145 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.01436318 0.06181975 0.09553593 0.13126002 0.11820167 0.09019912]] probs:[[0.15515015 0.16269055 0.16826937 0.1743893  0.17212686 0.16737373]] entropy:[1.7910304]
DEBUG:chainerrl.agents.a3c:t:5146 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.01043548 0.06248767 0.09729259 0.13265875 0.12084001 0.09264562]] probs:[[0.1544022  0.16265202 0

DEBUG:chainerrl.agents.a3c:t:5172 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.06904774 0.17256188 0.00648318 0.09034032 0.0424986  0.05556565]] probs:[[0.16582803 0.1839135  0.15577097 0.1693968  0.16148338 0.16360734]] entropy:[1.7903982]
DEBUG:chainerrl.agents.a3c:t:5173 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.06767022 0.19469006 0.00889665 0.0927211  0.03763383 0.03751076]] probs:[[0.16544162 0.18784896 0.15599824 0.16963843 0.16054624 0.16052648]] entropy:[1.7898589]
DEBUG:chainerrl.agents.a3c:t:5174 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.06867224 0.20315227 0.00950553 0.09253231 0.03459506 0.0318787 ]] probs:[[0.16553523 0.18936266 0.15602516 0.16953239 0.15998928 0.15955527]] entropy:[1.7896149]
DEBUG:chainerrl.agents.a3c:t:5175 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.06777058 0.19767612 0.00906625 0.09281326 0.03680714 0.0354257 ]] probs:[[0.16543284 0.18838182 0.

DEBUG:chainerrl.agents.a3c:t:5201 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[6.29001334e-02 4.38193753e-02 1.03528146e-04 1.87752411e-01
  1.51207849e-01 3.43732610e-02]] probs:[[0.1634677  0.16037819 0.15351816 0.18520582 0.17855974 0.15887037]] entropy:[1.7894807]
DEBUG:chainerrl.agents.a3c:t:5202 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[6.29038140e-02 4.38058898e-02 1.10203866e-04 1.87737390e-01
  1.51191503e-01 3.43677923e-02]] probs:[[0.16346948 0.16037717 0.15352029 0.18520437 0.1785581  0.15887064]] entropy:[1.7894814]
DEBUG:chainerrl.agents.a3c:pi_loss:[-1.692288] v_loss:[[0.00081144]]
DEBUG:chainerrl.agents.a3c:grad norm:1.5486863417127996
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:5203 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.05469166 0.04293934 0.00027863 0.18749318 0.15951206 0.03603492]] probs:[[0.16209011 0.16019632 0.15350595 0.18511067 0.18000287 0.15909407]] entropy:[1.

DEBUG:chainerrl.agents.a3c:t:5230 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.1286739  0.0139824  0.05237017 0.09420865 0.11813731 0.07617558]] probs:[[0.17474353 0.15580854 0.16190596 0.16882356 0.17291199 0.16580644]] entropy:[1.7910048]
DEBUG:chainerrl.agents.a3c:t:5231 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.1257249  0.01223169 0.05075644 0.09373261 0.11693458 0.08100633]] probs:[[0.17432196 0.15561901 0.16173117 0.16883327 0.17279634 0.16669828]] entropy:[1.7910128]
DEBUG:chainerrl.agents.a3c:t:5232 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.12525323 0.01156961 0.0506918  0.09383187 0.11754525 0.08108669]] probs:[[0.17425022 0.15552533 0.16173042 0.16886015 0.17291225 0.16672167]] entropy:[1.7910047]
DEBUG:chainerrl.agents.a3c:t:5233 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.12510969 0.01336254 0.04954797 0.09316754 0.11399602 0.08503558]] probs:[[0.17422448 0.1558038  0.

DEBUG:chainerrl.agents.a3c:pi_loss:[-1.5639755] v_loss:[[0.00021987]]
DEBUG:chainerrl.agents.a3c:grad norm:0.7009663897261491
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:5259 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.26044247 0.10515293 0.02998573 0.06696215 0.0571994  0.13473217]] probs:[[0.19333479 0.16552693 0.15354085 0.15932451 0.15777662 0.17049621]] entropy:[1.7887471]
DEBUG:chainerrl.agents.a3c:t:5260 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.2598652  0.10457464 0.02995703 0.0667917  0.05735446 0.13547896]] probs:[[0.19324006 0.16544564 0.15354982 0.15931123 0.15781486 0.17063844]] entropy:[1.7887597]
DEBUG:chainerrl.agents.a3c:t:5261 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.25975356 0.10448574 0.02994612 0.06676032 0.05735579 0.13565321]] probs:[[0.19322099 0.16543308 0.15355013 0.1593083  0.1578171  0.17067039]] entropy:[1.7887616]
DEBUG:chainerrl.agents.a3c:t:5262 r:0.0 a:

DEBUG:chainerrl.agents.a3c:t:5288 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.1831035  0.12271862 0.05207944 0.07986537 0.07869297 0.13794413]] probs:[[0.17930108 0.16879442 0.15728231 0.16171382 0.16152434 0.17138405]] entropy:[1.790789]
DEBUG:chainerrl.agents.a3c:t:5289 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.18310347 0.12271862 0.05207946 0.07986534 0.07869297 0.1379441 ]] probs:[[0.17930108 0.16879442 0.15728232 0.16171382 0.16152434 0.17138405]] entropy:[1.7907889]
DEBUG:chainerrl.agents.a3c:t:5290 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.18311273 0.12263946 0.05208653 0.07990424 0.07875846 0.13785109]] probs:[[0.17930445 0.16878267 0.15728493 0.16172166 0.16153647 0.17136976]] entropy:[1.79079]
DEBUG:chainerrl.agents.a3c:pi_loss:[-1.5107546] v_loss:[[6.9518384e-05]]
DEBUG:chainerrl.agents.a3c:grad norm:0.19318214089199798
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:5291 r:0.0 a

DEBUG:chainerrl.agents.a3c:t:5317 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.15193723 0.08315433 0.10035415 0.02412003 0.08611875 0.21446256]] probs:[[0.1734879  0.16195606 0.16476576 0.15267183 0.16243687 0.1846816 ]] entropy:[1.7899444]
DEBUG:chainerrl.agents.a3c:t:5318 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.15238293 0.07689629 0.09910093 0.01134893 0.07166832 0.20762642]] probs:[[0.17472988 0.16202565 0.1656636  0.15174589 0.1611808  0.18465418]] entropy:[1.7897817]
DEBUG:chainerrl.agents.a3c:t:5319 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.15205143 0.07820817 0.09923442 0.01503547 0.07584501 0.20799477]] probs:[[0.17441383 0.16199858 0.16544087 0.15208124 0.1616162  0.18444921]] entropy:[1.78986]
DEBUG:chainerrl.agents.a3c:t:5320 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.15189765 0.0766635  0.09878059 0.01525255 0.07388601 0.2043068 ]] probs:[[0.1746165  0.1619614  0.16

DEBUG:chainerrl.agents.a3c:t:5346 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.01995308 -0.02850099  0.27145976  0.15179448  0.25324613  0.03154855]] probs:[[0.15028545 0.1431771  0.1932613  0.17146477 0.18977317 0.15203822]] entropy:[1.7849078]
DEBUG:chainerrl.agents.a3c:pi_loss:[-2.3055284] v_loss:[[0.01314911]]
DEBUG:chainerrl.agents.a3c:grad norm:26.678257324412545
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:5347 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.00861961  0.0454164   0.2917365   0.19995117  0.126336   -0.03161345]] probs:[[0.14793922 0.15615319 0.19976819 0.1822487  0.16931435 0.14457634]] entropy:[1.784977]
DEBUG:chainerrl.agents.a3c:t:5348 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.0080037   0.04523798  0.29156956  0.19996859  0.12454254 -0.0326871 ]] probs:[[0.14809336 0.15619178 0.19981985 0.18232945 0.16908291 0.14448267]] entropy:[1.7849715]
DEBUG:chainerrl.agents.a3

DEBUG:chainerrl.agents.a3c:t:5375 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.04735916 0.09398612 0.1954547  0.13093373 0.1035082  0.03886156]] probs:[[0.15763386 0.1651579  0.18279596 0.17137422 0.16673805 0.15630002]] entropy:[1.7903516]
DEBUG:chainerrl.agents.a3c:t:5376 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.04728639 0.09381931 0.1948085  0.13240264 0.10338514 0.03769772]] probs:[[0.15763932 0.16514811 0.1826975  0.17164458 0.16673546 0.15613501]] entropy:[1.7903427]
DEBUG:chainerrl.agents.a3c:t:5377 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.0474638  0.09397385 0.1936952  0.13450591 0.10339683 0.03552791]] probs:[[0.157687   0.16519426 0.18251702 0.17202747 0.16675824 0.15581605]] entropy:[1.7903292]
DEBUG:chainerrl.agents.a3c:t:5378 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.04744752 0.0940427  0.19431011 0.13327394 0.10336708 0.03665192]] probs:[[0.15767188 0.16519248 0.

DEBUG:chainerrl.agents.a3c:t:5404 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.13415173 0.05176094 0.20034522 0.16690694 0.09338044 0.00120765]] probs:[[0.17070073 0.15720037 0.18238238 0.17638466 0.16388102 0.1494509 ]] entropy:[1.7894956]
DEBUG:chainerrl.agents.a3c:t:5405 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.13420872 0.05176636 0.20036851 0.16693012 0.09330781 0.00123312]] probs:[[0.17070861 0.1571995  0.18238464 0.17638683 0.16386734 0.14945307]] entropy:[1.7894949]
DEBUG:chainerrl.agents.a3c:t:5406 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.13417618 0.0517312  0.20033285 0.16693209 0.09328116 0.00123208]] probs:[[0.17070676 0.15719739 0.1823821  0.176391   0.16386653 0.14945617]] entropy:[1.789495]
DEBUG:chainerrl.agents.a3c:t:5407 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.13421746 0.05176718 0.2003721  0.16693367 0.09329659 0.00123707]] probs:[[0.17070982 0.15719937 0.1

DEBUG:chainerrl.agents.a3c:t:5434 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.11696673 0.0648341  0.17021431 0.16083656 0.08401934 0.04996112]] probs:[[0.16802433 0.15948918 0.1772137  0.1755596  0.16257855 0.15713467]] entropy:[1.7907087]
DEBUG:chainerrl.agents.a3c:pi_loss:[-1.5526893] v_loss:[[0.0001764]]
DEBUG:chainerrl.agents.a3c:grad norm:0.25495656147866425
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:5435 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.11839975 0.07665464 0.14923327 0.15308844 0.09672697 0.05445827]] probs:[[0.16828372 0.1614033  0.17355332 0.17422368 0.16467577 0.15786019]] entropy:[1.7911097]
DEBUG:chainerrl.agents.a3c:t:5436 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.11839066 0.07663666 0.14927055 0.15312237 0.09674132 0.05444433]] probs:[[0.16828083 0.16139908 0.17355838 0.17422818 0.1646768  0.15785672]] entropy:[1.7911088]
DEBUG:chainerrl.agents.a3c:t:5437 r:0.0 a:

DEBUG:chainerrl.agents.a3c:t:5463 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.15812786 0.07816067 0.08108055 0.16464113 0.04900392 0.11269907]] probs:[[0.17520086 0.16173609 0.16220903 0.17634572 0.15708847 0.16741978]] entropy:[1.7908527]
DEBUG:chainerrl.agents.a3c:t:5464 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.15993753 0.08009166 0.07886913 0.16410631 0.05054698 0.10649639]] probs:[[0.17562628 0.16214849 0.16195036 0.17635995 0.15742792 0.166487  ]] entropy:[1.790856]
DEBUG:chainerrl.agents.a3c:t:5465 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.15881379 0.07836068 0.07984569 0.1667293  0.05081387 0.10946759]] probs:[[0.17530961 0.16175787 0.16199826 0.17670278 0.15736277 0.16686873]] entropy:[1.7908392]
DEBUG:chainerrl.agents.a3c:t:5466 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.16248268 0.06992932 0.08730987 0.16557117 0.04418537 0.1241758 ]] probs:[[0.17565042 0.16012302 0.1

DEBUG:chainerrl.agents.a3c:t:5492 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.13511896 0.1007353  0.07402284 0.19006075 0.05514966 0.08981807]] probs:[[0.17116642 0.16538112 0.16102187 0.18083376 0.15801138 0.16358544]] entropy:[1.7907544]
DEBUG:chainerrl.agents.a3c:t:5493 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.13572352 0.10142528 0.07342524 0.18924952 0.05536358 0.08875658]] probs:[[0.17129819 0.16552259 0.16095223 0.18071693 0.15807125 0.16343886]] entropy:[1.7907594]
DEBUG:chainerrl.agents.a3c:t:5494 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.14878647 0.07640596 0.10288964 0.18825287 0.04355847 0.1241727 ]] probs:[[0.17237282 0.16033721 0.16464026 0.17931178 0.15515609 0.16818187]] entropy:[1.7906469]
DEBUG:chainerrl.agents.a3c:t:5495 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.14804727 0.07685555 0.10158999 0.1886751  0.04416682 0.12311601]] probs:[[0.1722931  0.16045368 0.

DEBUG:chainerrl.agents.a3c:t:5521 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.08003106  0.1619676   0.08619426  0.5081129  -0.08323993 -0.03208267]] probs:[[0.13696401 0.17446372 0.16173247 0.24662332 0.13652521 0.1436912 ]] entropy:[1.7674314]
DEBUG:chainerrl.agents.a3c:t:5522 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.07629712  0.16444838  0.08620578  0.50782233 -0.08072715 -0.03487066]] probs:[[0.13736384 0.17475389 0.16160193 0.24634984 0.13675666 0.14317384]] entropy:[1.7675669]
DEBUG:chainerrl.agents.a3c:pi_loss:[-1.5693247] v_loss:[[0.00033357]]
DEBUG:chainerrl.agents.a3c:grad norm:2.4972139989263713
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:5523 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.03704778  0.16314206  0.07331585  0.43832552 -0.07850731 -0.0359159 ]] probs:[[0.14477114 0.17685744 0.16166362 0.23288096 0.13889171 0.14493509]] entropy:[1.7742431]
DEBUG:chainerrl.agents.a

DEBUG:chainerrl.agents.a3c:t:5549 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.05651839  0.11220058  0.09350228  0.21656688 -0.07351147  0.11122324]] probs:[[0.1465107  0.17343754 0.17022468 0.19251688 0.14404207 0.17326811]] entropy:[1.7866671]
DEBUG:chainerrl.agents.a3c:t:5550 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.05740213  0.11586634  0.09587548  0.2176407  -0.0700113   0.11529013]] probs:[[0.14604102 0.17366984 0.17023249 0.19227572 0.14421111 0.1735698 ]] entropy:[1.7866435]
DEBUG:chainerrl.agents.a3c:t:5551 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.05696636  0.11551001  0.09495754  0.21678242 -0.07075059  0.11454329]] probs:[[0.14618587 0.17370446 0.17017084 0.19221754 0.14418465 0.17353661]] entropy:[1.7866682]
DEBUG:chainerrl.agents.a3c:t:5552 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.05697094  0.11809891  0.0959881   0.2162466  -0.06860797  0.11713421]] probs:[[

DEBUG:chainerrl.agents.a3c:t:5578 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.0919753   0.09690072  0.07282197  0.13679297 -0.03670161  0.07039547]] probs:[[0.16978756 0.1706259  0.1665665  0.17757013 0.14928707 0.16616282]] entropy:[1.7903945]
DEBUG:chainerrl.agents.a3c:pi_loss:[-1.578378] v_loss:[[0.00026213]]
DEBUG:chainerrl.agents.a3c:grad norm:0.4500964440943361
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:5579 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.07381386  0.09026212  0.07065918  0.14065002 -0.0273789   0.08450752]] probs:[[0.16674824 0.16951363 0.16622302 0.17827392 0.15070018 0.16854095]] entropy:[1.7905385]
DEBUG:chainerrl.agents.a3c:t:5580 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.07380639  0.09028274  0.07066339  0.14058138 -0.02737631  0.08453979]] probs:[[0.16674757 0.16951771 0.16622429 0.1782623  0.15070109 0.16854697]] entropy:[1.7905393]
DEBUG:chainerrl.agents.a3

INFO: outdir:result global_step:11399 local_step:5585 R:0.8500000000000001
INFO: statistics:[('average_value', 0.3694184180485084), ('average_entropy', 1.7763637855726646)]
INFO: Starting new video recorder writing to /home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4
DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4
DEBUG: Starting ffmpeg with "ffmpeg -nostats -loglevel error -y -r 30 -f rawvideo -s:v 160x210 -pix_fmt rgb24 -i - -vf scale=trunc(iw/2)*2:trunc(ih/2)*2 -vcodec libx264 -pix_fmt yuv420p /home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4"


DEBUG:chainerrl.agents.a3c:t:5586 r:0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.39394194 -0.41828117  0.46770865 -0.0111975  -0.17683266  0.2551934 ]] probs:[[0.21631706 0.0960167  0.23287734 0.14425835 0.12223807 0.1882925 ]] entropy:[1.746159]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5587 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.41449532 -0.45323133  0.47850174 -0.0481119  -0.21386129  0.30560693]] probs:[[0.21997637 0.09236919 0.23451665 0.1385059  0.11735032 0.19728152]] entropy:[1.738669]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5588 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.41618222 -0.46123645  0.4804509  -0.05349265 -0.2203908   0.30923864]] probs:[[0.22050144 0.09169662 0.23513807 0.1378587  0.11666787 0.19813739]] entropy:[1.7374015]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5589 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.41498438 -0.4576435   0.4776033  -0.05367389 -0.2229896   0.30417666]] probs:[[0.2206635  0.09220469 0.234923   0.13810036 0.11659016 0.19751829]] entropy:[1.7378559]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5590 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.41493633 -0.4578474   0.4776743  -0.05383299 -0.22313608  0.30414644]] probs:[[0.22066566 0.09219122 0.23495327 0.13808636 0.11657982 0.19752374]] entropy:[1.7378299]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5591 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.41491738 -0.4578797   0.4776848  -0.05386619 -0.22315751  0.30412325]] probs:[[0.22066507 0.09218974 0.23495956 0.13808402 0.11657922 0.19752237]] entropy:[1.7378263]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5592 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.4162166  -0.46262872  0.4808515  -0.05460172 -0.2215649   0.30926993]] probs:[[0.22057723 0.09159736 0.23530504 0.1377485  0.11656701 0.19820487]] entropy:[1.7371945]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5593 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.41620457 -0.46262342  0.48084065 -0.05461778 -0.22157344  0.30925217]] probs:[[0.22057709 0.0915989  0.23530518 0.13774785 0.11656735 0.19820361]] entropy:[1.7371957]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-1.2883322] v_loss:[[0.00036603]]
DEBUG:chainerrl.agents.a3c:grad norm:13.705249079524908
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:5594 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.36834592 -0.41256627  0.39581397 -0.03266798 -0.18526484  0.24711713]] probs:[[0.21662937 0.09921366 0.22266223 0.14506386 0.12453379 0.19189706]] entropy:[1.7513077]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5595 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.36834615 -0.41249922  0.39570254 -0.03263514 -0.18542631  0.24724372]] probs:[[0.21663141 0.09922122 0.22263947 0.14506996 0.12451483 0.19192313]] entropy:[1.7513093]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5596 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.36840445 -0.41281006  0.39657646 -0.0329196  -0.1843191   0.24647713]] probs:[[0.21661673 0.09917789 0.22280604 0.14501041 0.12463706 0.19175188]] entropy:[1.7512938]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5597 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.3684044  -0.41280934  0.39657524 -0.03291944 -0.18432103  0.24647851]] probs:[[0.21661676 0.09917797 0.2228058  0.14501046 0.12463685 0.19175218]] entropy:[1.7512938]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5598 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.36834612 -0.41249782  0.3957015  -0.03263576 -0.18542932  0.24724603]] probs:[[0.21663144 0.09922139 0.22263928 0.14506988 0.12451449 0.1919236 ]] entropy:[1.7513094]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5599 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.36834577 -0.41249898  0.39570397 -0.03263682 -0.18542613  0.24724336]] probs:[[0.21663134 0.09922125 0.2226398  0.14506972 0.12451486 0.19192307]] entropy:[1.7513094]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5600 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.3683455  -0.41249874  0.39570436 -0.03263712 -0.18542618  0.24724333]] probs:[[0.2166313  0.09922128 0.22263989 0.14506967 0.12451486 0.19192307]] entropy:[1.7513094]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5601 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.36840373 -0.41280934  0.39657786 -0.03292145 -0.18431942  0.24647713]] probs:[[0.2166166  0.09917797 0.22280636 0.14501014 0.12463704 0.1917519 ]] entropy:[1.7512938]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-1.3370423] v_loss:[[4.9283153e-05]]
DEBUG:chainerrl.agents.a3c:grad norm:1.81571833513024
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:5602 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.3040368  -0.35985124  0.3475447  -0.00440783 -0.13498344  0.23240711]] probs:[[0.20536217 0.10572983 0.21449426 0.15085672 0.13239037 0.19116662]] entropy:[1.7621869]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5603 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.30403692 -0.3598512   0.34754485 -0.00440827 -0.1349836   0.23240729]] probs:[[0.2053622  0.10572984 0.2144943  0.15085666 0.13239035 0.19116665]] entropy:[1.762187]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5604 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.30400053 -0.3599641   0.34737697 -0.00431288 -0.1350413   0.23233141]] probs:[[0.2053677  0.10572458 0.21447185 0.15088058 0.13239107 0.19116423]] entropy:[1.7621918]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5605 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.3040002  -0.35996392  0.3473775  -0.00431314 -0.13504145  0.23233132]] probs:[[0.20536765 0.1057246  0.21447198 0.15088055 0.13239107 0.19116423]] entropy:[1.762192]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5606 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.30399984 -0.3599638   0.3473779  -0.00431351 -0.13504142  0.23233132]] probs:[[0.20536758 0.10572462 0.21447206 0.15088049 0.13239107 0.19116424]] entropy:[1.762192]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5607 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.30399948 -0.35996372  0.34737825 -0.00431397 -0.13504155  0.23233135]] probs:[[0.2053675  0.10572463 0.21447214 0.15088043 0.13239105 0.19116424]] entropy:[1.7621917]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5608 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.30403548 -0.3598506   0.34754652 -0.00441007 -0.13498393  0.23240733]] probs:[[0.20536195 0.10572992 0.2144947  0.1508564  0.13239032 0.1911667 ]] entropy:[1.7621869]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5609 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.30403545 -0.35985056  0.34754664 -0.00441041 -0.13498423  0.2324076 ]] probs:[[0.20536195 0.10572993 0.21449475 0.15085636 0.1323903  0.19116677]] entropy:[1.762187]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-1.4072161] v_loss:[[1.14066864e-07]]
DEBUG:chainerrl.agents.a3c:grad norm:1.0319101099221255
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:5610 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.12607388 -0.36300957  0.19720039 -0.03960741  0.00992385  0.46641716]] probs:[[0.1715268  0.1051782  0.18417126 0.14533743 0.15271746 0.24106891]] entropy:[1.7611289]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5611 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.12599517 -0.36319187  0.19694446 -0.03946692  0.00982381  0.46628806]] probs:[[0.17153144 0.10517014 0.18414357 0.14537323 0.1527183  0.24106325]] entropy:[1.7611349]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5612 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.12599495 -0.36319155  0.19694524 -0.03946652  0.00982355  0.4662873 ]] probs:[[0.17153141 0.10517018 0.18414374 0.1453733  0.15271828 0.24106309]] entropy:[1.761135]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5613 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.12599467 -0.3631914   0.19694582 -0.03946598  0.00982333  0.46628657]] probs:[[0.17153135 0.10517021 0.18414386 0.14537337 0.15271826 0.24106292]] entropy:[1.7611351]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5614 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.12599438 -0.3631912   0.19694635 -0.03946562  0.00982301  0.4662859 ]] probs:[[0.17153133 0.10517024 0.18414396 0.14537345 0.15271822 0.24106279]] entropy:[1.7611351]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5615 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.12607278 -0.36300856  0.19720286 -0.03960549  0.00992262  0.46641433]] probs:[[0.17152663 0.10517832 0.18417174 0.14533775 0.15271728 0.24106824]] entropy:[1.761129]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5616 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.12607272 -0.36300868  0.1972028  -0.03960576  0.0099225   0.4664144 ]] probs:[[0.17152664 0.10517833 0.18417174 0.14533773 0.15271728 0.2410683 ]] entropy:[1.7611291]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5617 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.12607267 -0.36300868  0.19720292 -0.03960596  0.00992251  0.46641448]] probs:[[0.17152664 0.10517833 0.18417177 0.1453377  0.15271729 0.24106833]] entropy:[1.7611293]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-1.4657342] v_loss:[[4.2959913e-05]]
DEBUG:chainerrl.agents.a3c:grad norm:1.4468037196067691
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:5618 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.12151548 -0.316268    0.1830448  -0.01073738  0.02871465  0.38881525]] probs:[[0.172329   0.11123243 0.18326528 0.15098079 0.15705635 0.22513613]] entropy:[1.7701329]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5619 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.12151616 -0.31626526  0.18304847 -0.01073984  0.02871567  0.38881633]] probs:[[0.17232893 0.11123262 0.18326576 0.15098025 0.15705633 0.22513613]] entropy:[1.7701331]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5620 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.12151584 -0.31626514  0.18304892 -0.01073951  0.02871537  0.3888158 ]] probs:[[0.17232887 0.11123263 0.18326585 0.15098031 0.15705629 0.225136  ]] entropy:[1.770133]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5621 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.12151557 -0.31626502  0.18304943 -0.01073914  0.02871506  0.3888153 ]] probs:[[0.17232886 0.11123266 0.18326597 0.15098038 0.15705626 0.22513594]] entropy:[1.7701333]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5622 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.1217822  -0.315492    0.18415324 -0.01149303  0.0291517   0.3893594 ]] probs:[[0.17230387 0.11127287 0.18339287 0.15080452 0.15706019 0.22516575]] entropy:[1.770112]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5623 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.12178203 -0.3154924   0.18415244 -0.0114929   0.02915144  0.38935933]] probs:[[0.17230389 0.11127284 0.18339276 0.15080456 0.15706018 0.22516578]] entropy:[1.7701118]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5624 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.12178199 -0.31549245  0.18415241 -0.01149333  0.02915141  0.38935962]] probs:[[0.17230387 0.11127283 0.18339275 0.1508045  0.15706019 0.22516586]] entropy:[1.7701119]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5625 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.12151448 -0.31626695  0.1830474  -0.01073754  0.0287141   0.3888142 ]] probs:[[0.17232883 0.11123254 0.18326576 0.15098076 0.15705626 0.22513588]] entropy:[1.7701333]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-1.4698797] v_loss:[[3.998101e-05]]
DEBUG:chainerrl.agents.a3c:grad norm:1.0606386840334516
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:5626 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.10637119 -0.28549173  0.14205348  0.03441391  0.03308994  0.31862485]] probs:[[0.17216162 0.11634628 0.17841567 0.16020857 0.15999658 0.21287122]] entropy:[1.7766172]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5627 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.10746474 -0.28396156  0.14422557  0.03227038  0.03359592  0.32028174]] probs:[[0.17220442 0.11642601 0.1786526  0.15973048 0.15994236 0.21304412]] entropy:[1.7765639]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5628 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.10745925 -0.2839693   0.14421469  0.03228091  0.0335934   0.32027382]] probs:[[0.17220421 0.1164256  0.1786514  0.15973285 0.15994263 0.21304335]] entropy:[1.7765641]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5629 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.10745926 -0.28396925  0.14421475  0.03228042  0.03359349  0.320274  ]] probs:[[0.1722042  0.1164256  0.1786514  0.15973276 0.15994264 0.21304336]] entropy:[1.7765641]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5630 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.10638346 -0.28547302  0.1420809   0.03438725  0.03309558  0.31864613]] probs:[[0.17216197 0.11634725 0.17841873 0.16020264 0.15999585 0.21287356]] entropy:[1.7766166]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5631 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.10638475 -0.28547075  0.14208454  0.03438454  0.03309613  0.3186481 ]] probs:[[0.172162   0.11634738 0.17841916 0.16020203 0.15999575 0.21287374]] entropy:[1.7766166]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5632 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.10421994 -0.26401094  0.1848884  -0.00929993  0.04983279  0.34046692]] probs:[[0.1700042  0.11763566 0.1842865  0.15176046 0.1610051  0.21530813]] entropy:[1.7755016]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5633 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.10129208 -0.2627506   0.19022153 -0.01467885  0.05232017  0.34229833]] probs:[[0.16940244 0.11771125 0.18515746 0.15085305 0.16130634 0.21556951]] entropy:[1.7753047]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-1.4719896] v_loss:[[3.2753618e-05]]
DEBUG:chainerrl.agents.a3c:grad norm:0.6538049870592645
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:5634 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.08962902 -0.21872483  0.1766264  -0.00261742  0.05735157  0.29605672]] probs:[[0.16849352 0.12378467 0.18380855 0.15364595 0.16314182 0.20712553]] entropy:[1.7797194]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5635 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.09166063 -0.21696424  0.178506   -0.00177123  0.05820481  0.29755285]] probs:[[0.16858566 0.12381879 0.18388109 0.15354782 0.16303879 0.20712784]] entropy:[1.7797105]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5636 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.08824202 -0.22434585  0.17427608 -0.00463613  0.05692722  0.293007  ]] probs:[[0.16865946 0.12338307 0.18381241 0.15370013 0.16345976 0.20698507]] entropy:[1.7796382]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5637 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.08576157 -0.22775613  0.17303938 -0.00555305  0.05601616  0.29204622]] probs:[[0.16850343 0.12315435 0.18387091 0.15379822 0.16356505 0.20710808]] entropy:[1.7795484]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5638 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.08774762 -0.22876953  0.17399384 -0.00540091  0.05687879  0.29235983]] probs:[[0.1687346  0.12295394 0.18393332 0.15372702 0.16360553 0.20704563]] entropy:[1.7794875]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5639 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.0896114  -0.22684246  0.17367166 -0.00502566  0.05758938  0.2917    ]] probs:[[0.16895978 0.12312583 0.18377662 0.15370323 0.16363505 0.20679942]] entropy:[1.7796038]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5640 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.09339105 -0.21847416  0.17896561 -0.00192641  0.05893309  0.29740587]] probs:[[0.16883473 0.12360055 0.1839189  0.153485   0.16311611 0.20704478]] entropy:[1.7796532]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5641 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.09425019 -0.21694796  0.18029253 -0.00131409  0.0592506   0.29861447]] probs:[[0.16881536 0.12366885 0.18398383 0.15342952 0.1630091  0.2070934 ]] entropy:[1.7796502]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-1.4087083] v_loss:[[1.2531471e-05]]
DEBUG:chainerrl.agents.a3c:grad norm:0.8200614663962991
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:5642 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.08267672 -0.17533119  0.16358812  0.01116507  0.05592767  0.26152676]] probs:[[0.16786139 0.12968788 0.18200788 0.15627651 0.16343077 0.2007356 ]] entropy:[1.7829963]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5643 r:0.05 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.0873424  -0.17330225  0.16696183  0.00972134  0.0551331   0.26173198]] probs:[[0.16841914 0.12977616 0.18237685 0.15584075 0.16308092 0.20050623]] entropy:[1.7829876]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5644 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.08075164 -0.17624128  0.16220982  0.01173068  0.05592469  0.2613984 ]] probs:[[0.16764405 0.1296515  0.18187165 0.15646338 0.16353321 0.20083623]] entropy:[1.7829957]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5645 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.09074537 -0.17190944  0.16939418  0.00870552  0.05425663  0.2618592 ]] probs:[[0.16883731 0.12983713 0.18265231 0.15553889 0.16278768 0.20034671]] entropy:[1.7829748]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5646 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.09814575 -0.16908987  0.17491274  0.00679214  0.05330572  0.2617479 ]] probs:[[0.16972528 0.12992345 0.18326773 0.1549074  0.1622829  0.1998932 ]] entropy:[1.7829504]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5647 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.10094945 -0.16819416  0.17707975  0.00595325  0.05249801  0.26156446]] probs:[[0.17008395 0.12994984 0.18353812 0.15467033 0.1620396  0.19971815]] entropy:[1.7829319]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5648 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.10138807 -0.16787377  0.17674936  0.00597383  0.05235691  0.2617589 ]] probs:[[0.17014585 0.12998176 0.18346378 0.15466195 0.16200462 0.19974206]] entropy:[1.7829399]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5649 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.08389699 -0.17503625  0.16422568  0.01076954  0.05452672  0.2614928 ]] probs:[[0.16805595 0.12971812 0.1821127  0.15620504 0.16319188 0.20071636]] entropy:[1.7829876]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-1.3714113] v_loss:[[0.00053538]]
DEBUG:chainerrl.agents.a3c:grad norm:0.7715186592965446
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:5650 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.09883724 -0.13742676  0.19476698  0.0086652   0.04975348  0.23035751]] probs:[[0.16958666 0.13390091 0.18666095 0.15496387 0.1614637  0.19342393]] entropy:[1.7845802]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5651 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.09989432 -0.13685313  0.19480938  0.00850074  0.04979016  0.23059341]] probs:[[0.16971678 0.13393888 0.18661469 0.15489344 0.16142277 0.19341345]] entropy:[1.7845867]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5652 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.10389607 -0.13420615  0.19343413  0.00796878  0.05109735  0.23232454]] probs:[[0.17018576 0.13412715 0.18612689 0.15461889 0.16143326 0.19350803]] entropy:[1.784639]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5653 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.09297531 -0.14761929  0.19723049  0.01731914  0.05278787  0.24687727]] probs:[[0.16806257 0.13212411 0.1865299  0.15581667 0.16144247 0.19602424]] entropy:[1.7838683]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5654 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.0959537  -0.14273849  0.19625437  0.01161713  0.04957819  0.23734355]] probs:[[0.16895203 0.1330763  0.18677703 0.1552875  0.1612957  0.19461139]] entropy:[1.7842313]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5655 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.09807597 -0.14059365  0.19528794  0.00890535  0.04819669  0.23322693]] probs:[[0.1694768  0.13349265 0.18677936 0.15501866 0.16123077 0.19400172]] entropy:[1.7843882]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5656 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.09921713 -0.13976553  0.19482745  0.00752156  0.04757845  0.2313511 ]] probs:[[0.16974832 0.13366467 0.1867792  0.15487546 0.1612052  0.19372717]] entropy:[1.784452]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5657 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.0957494  -0.14538698  0.19718449  0.01282179  0.04744969  0.24022603]] probs:[[0.16888489 0.13269867 0.18691471 0.15544467 0.16092165 0.19513546]] entropy:[1.7840475]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-1.5668069] v_loss:[[0.00030546]]
DEBUG:chainerrl.agents.a3c:grad norm:2.241089174170967
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:5658 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.09290633 -0.10404127  0.19375136  0.01732603  0.03357659  0.19309162]] probs:[[0.16942501 0.13913754 0.18740189 0.15709177 0.15966545 0.1872783 ]] entropy:[1.7864323]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5659 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.0973952  -0.1019353   0.19344316  0.01651946  0.03093987  0.1931199 ]] probs:[[0.17010967 0.13936731 0.18725875 0.15689357 0.15917243 0.18719822]] entropy:[1.7864534]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5660 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.10071889 -0.10007861  0.19266085  0.01590565  0.02993754  0.1934097 ]] probs:[[0.17059459 0.13955969 0.18702304 0.15672247 0.15893711 0.18716314]] entropy:[1.786487]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5661 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.1027353  -0.09880127  0.1920074   0.0155322   0.02982911  0.1937067 ]] probs:[[0.17087391 0.13968492 0.18682979 0.1566044  0.15885943 0.18714754]] entropy:[1.7865155]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5662 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.10362118 -0.09829712  0.19197899  0.01516305  0.02980045  0.19379987]] probs:[[0.17099598 0.13973138 0.18679242 0.15651971 0.15882762 0.18713285]] entropy:[1.7865195]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5663 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.10338726 -0.10181366  0.19259012  0.01397491  0.02954317  0.19461869]] probs:[[0.17103978 0.1393091  0.18699819 0.15641047 0.15886456 0.18737791]] entropy:[1.7863863]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5664 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.1035194  -0.10311316  0.19317654  0.01337161  0.02958092  0.19496584]] probs:[[0.17107466 0.13913818 0.18712133 0.15632737 0.15888198 0.18745646]] entropy:[1.7863265]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5665 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.10313521 -0.10371043  0.19262972  0.01303448  0.02984877  0.19520482]] probs:[[0.17104597 0.1390852  0.18705954 0.15630852 0.15895894 0.18754184]] entropy:[1.7863175]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-1.2511562] v_loss:[[0.00037087]]
DEBUG:chainerrl.agents.a3c:grad norm:1.059746435829975
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:5666 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.07524521 -0.09122932  0.18819022  0.03425476  0.03440592  0.17901297]] probs:[[0.16679513 0.14121616 0.18673888 0.16009635 0.16012055 0.18503298]] entropy:[1.7873077]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5667 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.07648604 -0.08972624  0.18721035  0.03395251  0.03465945  0.17878966]] probs:[[0.1669709  0.14140205 0.186521   0.16001795 0.16013111 0.18495695]] entropy:[1.7873683]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5668 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.07435086 -0.09100088  0.18813029  0.03417994  0.03418461  0.17822894]] probs:[[0.16669944 0.14129369 0.18678753 0.16013567 0.16013642 0.18494722]] entropy:[1.7873263]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5669 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.07319036 -0.09314628  0.18862936  0.03387185  0.03407927  0.17843397]] probs:[[0.16657792 0.14105171 0.1869614  0.16015542 0.16018863 0.18506494]] entropy:[1.7872568]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5670 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.07513231 -0.09181374  0.18778709  0.03347513  0.03455561  0.1788538 ]] probs:[[0.16682748 0.14117698 0.18672091 0.1600207  0.16019368 0.1850603 ]] entropy:[1.7873005]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5671 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.07599929 -0.0909026   0.18713388  0.03309766  0.03489325  0.17891087]] probs:[[0.16694619 0.14128366 0.18656993 0.15993538 0.16022281 0.18504205]] entropy:[1.7873346]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5672 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.07653502 -0.09221824  0.18707076  0.03176743  0.03536028  0.17927799]] probs:[[0.16706535 0.141123   0.18659133 0.15975118 0.16032618 0.18514292]] entropy:[1.7872908]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5673 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.08190038 -0.08842552  0.18482262  0.03044314  0.03719946  0.1807296 ]] probs:[[0.16773456 0.14146563 0.18591787 0.15932171 0.16040179 0.18515845]] entropy:[1.7874019]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-1.4849769] v_loss:[[3.9853938e-05]]
DEBUG:chainerrl.agents.a3c:grad norm:0.6200181008398051
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:5674 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.07897163 -0.05539325  0.16677363  0.04290821  0.04511037  0.1531257 ]] probs:[[0.16738361 0.14633864 0.18274473 0.16145474 0.16181068 0.18026759]] entropy:[1.7890186]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5675 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.07887649 -0.05544516  0.16689424  0.04290056  0.04512606  0.1532509 ]] probs:[[0.16736394 0.14632776 0.18276268 0.16144988 0.1618096  0.18028611]] entropy:[1.7890139]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5676 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.04249699 -0.07093716  0.15015669  0.05103852  0.04761536  0.15960106]] probs:[[0.16275041 0.14529754 0.18125004 0.1641465  0.16358556 0.18296994]] entropy:[1.7888399]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5677 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.04904009 -0.06710341  0.14420657  0.05083764  0.04817217  0.15577656]] probs:[[0.1638334  0.14586864 0.18019083 0.16412815 0.16369125 0.18228775]] entropy:[1.7890892]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5678 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.04904521 -0.06717718  0.14417721  0.05098172  0.04785631  0.1555205 ]] probs:[[0.16384895 0.145871   0.18020174 0.16416655 0.16365427 0.18225746]] entropy:[1.7890916]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5679 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.04741388 -0.06744187  0.14549647  0.05138053  0.04706193  0.15388274]] probs:[[0.16365235 0.1458952  0.18051735 0.16430278 0.16359475 0.18203759]] entropy:[1.7890868]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5680 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.0378247  -0.06900749  0.15138972  0.05471398  0.04177528  0.14103866]] probs:[[0.16263704 0.14615808 0.18219654 0.1654072  0.16328083 0.18032035]] entropy:[1.7891073]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5681 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.0261755  -0.06246534  0.15777583  0.06103879  0.03290027  0.10160921]] probs:[[0.1619065  0.14817266 0.18467903 0.16765065 0.16299896 0.17459217]] entropy:[1.7894554]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-1.5647153] v_loss:[[0.00020884]]
DEBUG:chainerrl.agents.a3c:grad norm:1.0599025384721024
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:5682 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.01138917 -0.0407344   0.14771965  0.07518222  0.01555894  0.04689881]] probs:[[0.1612521  0.15306234 0.18480466 0.17187405 0.16192588 0.16708097]] entropy:[1.7899975]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5683 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.01146307 -0.04421298  0.14175737  0.07083692  0.01277905  0.04178226]] probs:[[0.1618573  0.153092   0.18438195 0.17175843 0.16207045 0.16683984]] entropy:[1.7900702]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5684 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.01177958 -0.04488089  0.14053896  0.06974106  0.01231171  0.04143059]] probs:[[0.16200544 0.15308134 0.18426764 0.17167297 0.16209166 0.16688098]] entropy:[1.7900879]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5685 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.01192627 -0.04486754  0.14035738  0.06950664  0.0124031   0.0412788 ]] probs:[[0.16203865 0.15309231 0.18424493 0.17164274 0.16211593 0.16686538]] entropy:[1.7900937]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5686 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.01297483 -0.0460309   0.1448073   0.06953152  0.01670299  0.04292487]] probs:[[0.1619186  0.15264088 0.1847357  0.17134008 0.16252339 0.1668414 ]] entropy:[1.790021]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5687 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.0044561  -0.05592879  0.15315247  0.07439617  0.03218892  0.0520816 ]] probs:[[0.15997273 0.15059866 0.18561965 0.17156178 0.16447131 0.16777584]] entropy:[1.7896942]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5688 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.00706268 -0.058945    0.15488401  0.0732966   0.03675039  0.06048961]] probs:[[0.1600277  0.14980572 0.18552104 0.17098585 0.16484979 0.16881   ]] entropy:[1.789635]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5689 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.0249386  -0.0596928   0.151294    0.06803263  0.04533855  0.08705499]] probs:[[0.16175294 0.14862683 0.18353869 0.1688759  0.16508658 0.17211907]] entropy:[1.7897359]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-1.3182925] v_loss:[[0.00029653]]
DEBUG:chainerrl.agents.a3c:grad norm:0.8967110021775231
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:5690 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.06890211 -0.02658746  0.09131048  0.07153288 -0.00677825  0.17399094]] probs:[[0.16744402 0.15219454 0.17123853 0.16788512 0.15523945 0.18599838]] entropy:[1.7895626]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5691 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.07812821 -0.02360512  0.09114922  0.06909169 -0.00425308  0.18532938]] probs:[[0.16830881 0.15202837 0.17051469 0.16679475 0.15499908 0.18735433]] entropy:[1.7893926]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5692 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.06880399 -0.02843033  0.08366539  0.07469484 -0.00175853  0.18669003]] probs:[[0.16707852 0.15159759 0.16958009 0.16806567 0.15569538 0.18798271]] entropy:[1.7893473]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5693 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.06916256 -0.02827438  0.08377307  0.07455397 -0.00168989  0.1869598 ]] probs:[[0.16711514 0.1516001  0.1695747  0.16801855 0.15568435 0.18800719]] entropy:[1.7893442]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5694 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.0715724  -0.02708235  0.08540143  0.0731965  -0.00206144  0.18714058]] probs:[[0.16741645 0.15168859 0.16974774 0.16768858 0.15553187 0.18792681]] entropy:[1.7893499]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5695 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.07442686 -0.0256645   0.08736121  0.0715618  -0.00256245  0.1872665 ]] probs:[[0.16777779 0.15179776 0.16996199 0.1672978  0.15534541 0.18781926]] entropy:[1.7893559]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5696 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.06876294 -0.02852839  0.08345087  0.07478665 -0.00163637  0.1870641 ]] probs:[[0.16706388 0.15157565 0.16953582 0.16807327 0.15570714 0.18804426]] entropy:[1.7893393]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5697 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.06874807 -0.0285283   0.08344677  0.07478023 -0.00161992  0.18707451]] probs:[[0.16706134 0.15157562 0.16953507 0.16807213 0.15570965 0.18804617]] entropy:[1.7893393]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-0.89243835] v_loss:[[0.0029124]]
DEBUG:chainerrl.agents.a3c:grad norm:11.449506364480612
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:5698 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.08731578  0.00989198  0.07661381  0.06323012 -0.014863    0.18593468]] probs:[[0.1695614  0.15692866 0.16775644 0.1655262  0.1530916  0.18713571]] entropy:[1.7896599]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5699 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.07212434  0.00331419  0.06587175  0.0713323  -0.01073668  0.18568133]] probs:[[0.1675824  0.15643881 0.16653784 0.16744971 0.15425608 0.18773514]] entropy:[1.7896819]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5700 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.07278716  0.00365995  0.06633246  0.0709321  -0.01084826  0.18569817]] probs:[[0.16766652 0.15646774 0.16658778 0.16735579 0.15421405 0.1877081 ]] entropy:[1.7896835]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5701 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.07361478  0.00401904  0.06690761  0.07052953 -0.01100488  0.18577819]] probs:[[0.16776939 0.15649039 0.1666479  0.16725257 0.15415686 0.18768288]] entropy:[1.7896833]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5702 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.07318441  0.00381669  0.06660641  0.07075279 -0.01094266  0.18571068]] probs:[[0.1677173  0.15647747 0.16661766 0.16730995 0.15418492 0.18769269]] entropy:[1.7896836]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5703 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.0725474   0.00351029  0.06624008  0.0710981  -0.01087329  0.1855785 ]] probs:[[0.16763933 0.15645646 0.1665853  0.16739655 0.15422216 0.18770018]] entropy:[1.7896843]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5704 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.0898868   0.00919424  0.08112419  0.06415024 -0.01436755  0.19043116]] probs:[[0.16963202 0.15648168 0.1681521  0.16532198 0.15283778 0.18757446]] entropy:[1.7895534]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5705 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.09227666  0.01031609  0.08192088  0.06223521 -0.01576611  0.1894006 ]] probs:[[0.17003919 0.15665852 0.16828738 0.16500694 0.15262535 0.18738267]] entropy:[1.7895567]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-1.4987445] v_loss:[[7.0605594e-05]]
DEBUG:chainerrl.agents.a3c:grad norm:0.14205793125490143
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:5706 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.08143813  0.02164715  0.0865089   0.06288373 -0.00971337  0.1859381 ]] probs:[[0.16801932 0.15826772 0.16887347 0.16493057 0.15338138 0.18652755]] entropy:[1.789832]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5707 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.07601993  0.01386961  0.09356485  0.06937166 -0.0069835   0.20073588]] probs:[[0.16655591 0.15651953 0.16950391 0.16545229 0.15328941 0.18867898]] entropy:[1.7894702]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5708 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.07742931  0.0148171   0.09347674  0.0688522  -0.00688818  0.1999947 ]] probs:[[0.1667646  0.15664326 0.16946232 0.16534035 0.15327992 0.18850954]] entropy:[1.789498]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5709 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.0776314   0.01492328  0.09350424  0.06868833 -0.00701114  0.19982396]] probs:[[0.16680217 0.15666352 0.1694709  0.16531709 0.15326461 0.18848172]] entropy:[1.7895012]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5710 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.07664835  0.0152994   0.09168957  0.06873746 -0.0065891   0.19817865]] probs:[[0.16674653 0.15682428 0.16927356 0.16543262 0.15342893 0.18829411]] entropy:[1.7895516]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5711 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.07916456  0.01767549  0.08961532  0.06696028 -0.00898602  0.1939895 ]] probs:[[0.16733493 0.15735562 0.16909288 0.16530514 0.1532157  0.18769574]] entropy:[1.7896378]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5712 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.08393526  0.02029459  0.09051041  0.06162803 -0.01430232  0.18790607]] probs:[[0.1683819  0.15799983 0.16949269 0.16466735 0.152627   0.18683118]] entropy:[1.7897032]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5713 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.08108385  0.0171505   0.09377946  0.06619431 -0.01300516  0.19678102]] probs:[[0.16753443 0.1571586  0.16967493 0.1650584  0.15249012 0.18808351]] entropy:[1.7895044]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-1.5067368] v_loss:[[6.947961e-05]]
DEBUG:chainerrl.agents.a3c:grad norm:0.8708503022998605
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:5714 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.05495919  0.04567881  0.11116228  0.07449783 -0.02360223  0.15497248]] probs:[[0.1639899  0.16247505 0.17347057 0.16722555 0.15159969 0.18123928]] entropy:[1.7902249]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5715 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.05525592  0.04581936  0.11162613  0.07542595 -0.02251163  0.15423308]] probs:[[0.16398302 0.16244286 0.17349227 0.16732414 0.15171371 0.18104398]] entropy:[1.7902498]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5716 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.0553987   0.0459624   0.11153619  0.07535569 -0.02250802  0.15419139]] probs:[[0.16400443 0.16246411 0.17347455 0.16731034 0.1517124  0.18103422]] entropy:[1.7902522]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5717 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.05312365  0.0445052   0.11215641  0.07882232 -0.01874256  0.15634023]] probs:[[0.16346154 0.1620588  0.17340162 0.16771673 0.15212637 0.18123496]] entropy:[1.7902541]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5718 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.05375157  0.04496872  0.1119542   0.07818947 -0.01935172  0.15581061]] probs:[[0.16358906 0.16215858 0.17339292 0.16763608 0.15205683 0.18116653]] entropy:[1.7902594]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5719 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.05209887  0.0438146   0.11238408  0.07972125 -0.01776346  0.15711397]] probs:[[0.1632614  0.16191448 0.17340636 0.16783392 0.15224488 0.1813389 ]] entropy:[1.7902468]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5720 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.05184438  0.04361586  0.11241968  0.07992957 -0.01753915  0.15729888]] probs:[[0.16321415 0.16187663 0.17340647 0.16786301 0.1522737  0.18136609]] entropy:[1.7902452]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5721 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.05169936  0.04357021  0.11231981  0.08002318 -0.01746304  0.15730228]] probs:[[0.16319379 0.16187255 0.17339268 0.16788214 0.15228839 0.18137039]] entropy:[1.7902458]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-0.7528912] v_loss:[[0.00439091]]
DEBUG:chainerrl.agents.a3c:grad norm:10.02328332461251
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:5722 r:0.05 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.04561394 0.09667987 0.06433025 0.07322907 0.01778566 0.12694436]] probs:[[0.16242813 0.17093812 0.16549681 0.16697611 0.15797034 0.17619054]] entropy:[1.7911484]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5723 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.04512062 0.09651753 0.06399559 0.07379803 0.01814826 0.12657373]] probs:[[0.16236039 0.17092337 0.16545403 0.16708386 0.15803967 0.17613867]] entropy:[1.7911532]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5724 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.03821495 0.09334894 0.06158461 0.08129485 0.02399055 0.12420211]] probs:[[0.16128989 0.17043215 0.16510355 0.16839008 0.15901187 0.17577247]] entropy:[1.7911904]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5725 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.04540906 0.09787281 0.06131046 0.07270436 0.01661803 0.12517701]] probs:[[0.16254297 0.17129825 0.1651483  0.16704074 0.15792993 0.17603984]] entropy:[1.791145]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5726 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.04714685 0.09791825 0.06336952 0.07130098 0.01576722 0.1265993 ]] probs:[[0.16274221 0.17121822 0.16540386 0.16672097 0.15771471 0.17620003]] entropy:[1.7911341]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5727 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.04267604 0.09659634 0.05733007 0.07496059 0.01676613 0.12306903]] probs:[[0.16230842 0.17130038 0.16470441 0.16763398 0.15815705 0.17589574]] entropy:[1.7911518]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5728 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.04562669 0.09816006 0.05965314 0.07289016 0.01562422 0.1233731 ]] probs:[[0.16268116 0.17145582 0.16497909 0.16717742 0.15787281 0.1758337 ]] entropy:[1.7911503]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5729 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.04190214 0.09600228 0.0575796  0.07633707 0.01775893 0.12246267]] probs:[[0.16216743 0.17118236 0.16472983 0.16784891 0.15829907 0.17577238]] entropy:[1.7911642]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-1.4380091] v_loss:[[0.00021317]]
DEBUG:chainerrl.agents.a3c:grad norm:0.5237709507389768
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:5730 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.00836079 0.0681021  0.02458554 0.11522197 0.11564339 0.14985141]] probs:[[0.15489618 0.16443188 0.15742983 0.17236534 0.172438   0.1784388 ]] entropy:[1.7904564]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5731 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.01926474 0.07405064 0.0321411  0.10838691 0.11296148 0.14854155]] probs:[[0.15628126 0.16508214 0.1583066  0.17084888 0.17163223 0.17784886]] entropy:[1.7907158]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5732 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.00861838 0.06803291 0.02496915 0.11646508 0.1152449  0.14721778]] probs:[[0.15497246 0.16445911 0.15752721 0.17262025 0.17240974 0.17801125]] entropy:[1.7904887]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5733 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.00691028 0.06643555 0.02546294 0.11843605 0.11738311 0.14884211]] probs:[[0.15462296 0.16410638 0.1575184  0.17286576 0.17268385 0.17820266]] entropy:[1.7904263]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5734 r:0.1 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.007914   0.06854204 0.02582878 0.11881167 0.11637326 0.14789316]] probs:[[0.15473482 0.1644063  0.15753183 0.17288221 0.17246117 0.1779837 ]] entropy:[1.7904615]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5735 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.00806407 0.06770652 0.02503053 0.11717498 0.11579082 0.14765148]] probs:[[0.15486117 0.16437846 0.15751103 0.17271449 0.17247559 0.17805925]] entropy:[1.7904698]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5736 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.00918164 0.06905102 0.0240039  0.11503571 0.1138447  0.147137  ]] probs:[[0.15512171 0.1646924  0.1574381  0.17244256 0.17223729 0.178068  ]] entropy:[1.7905055]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5737 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.04193088 0.10843981 0.03827025 0.13733755 0.11549283 0.12330325]] probs:[[0.15806945 0.16893996 0.15749186 0.17389317 0.1701357  0.17146975]] entropy:[1.791002]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-0.878749] v_loss:[[0.0076152]]
DEBUG:chainerrl.agents.a3c:grad norm:10.429709091998646
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:5738 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.07637163 0.11163382 0.02022748 0.10128375 0.14457683 0.1218584 ]] probs:[[0.16330154 0.16916265 0.15438575 0.16742082 0.17482817 0.17090112]] entropy:[1.7909911]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5739 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.07569695 0.10852692 0.01967604 0.1033916  0.14756614 0.1202444 ]] probs:[[0.16321082 0.16865794 0.15431899 0.16779405 0.17537244 0.17064582]] entropy:[1.790968]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5740 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.07567097 0.10818832 0.0196585  0.10371531 0.14806888 0.12015371]] probs:[[0.1631963  0.16859023 0.15430656 0.16783781 0.17544957 0.17061959]] entropy:[1.790964]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5741 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.07683259 0.11030761 0.02073041 0.10416725 0.14972556 0.1229414 ]] probs:[[0.16313215 0.16868542 0.15423207 0.16765282 0.17546745 0.17083009]] entropy:[1.7909508]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5742 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.07706394 0.11065207 0.02092143 0.10422286 0.1500289  0.12341356]] probs:[[0.16312608 0.16869825 0.15422012 0.16761713 0.17547357 0.17086488]] entropy:[1.7909486]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5743 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.07866256 0.1129673  0.02199624 0.10445907 0.15217163 0.12691799]] probs:[[0.16308802 0.16877979 0.1541034  0.16734986 0.1755281  0.17115088]] entropy:[1.790929]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5744 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.07352802 0.10826079 0.01736622 0.10492271 0.14827275 0.12323363]] probs:[[0.16283482 0.16858989 0.1539418  0.16802807 0.17547227 0.17113316]] entropy:[1.7909119]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5745 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.0747273  0.10821015 0.01865995 0.10427903 0.1482154  0.12152968]] probs:[[0.16303404 0.1685853  0.15414467 0.16792387 0.17546631 0.17084579]] entropy:[1.7909409]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-1.5760098] v_loss:[[0.0002524]]
DEBUG:chainerrl.agents.a3c:grad norm:0.4775147462839105
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:5746 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.08613881 0.11684252 0.03303957 0.09953973 0.13668539 0.1187667 ]] probs:[[0.16452865 0.16965865 0.15602021 0.16674833 0.1730588  0.16998543]] entropy:[1.7912178]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5747 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.0881386  0.11860618 0.03488745 0.09930272 0.13820027 0.12046261]] probs:[[0.16462278 0.16971563 0.15608576 0.16647096 0.17307386 0.170031  ]] entropy:[1.7912204]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5748 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.07857402 0.11346938 0.027984   0.0961896  0.1321325  0.12265643]] probs:[[0.16382597 0.16964366 0.15574415 0.16673742 0.17283945 0.17120935]] entropy:[1.7911683]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5749 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.0784455  0.11368798 0.0283183  0.09533246 0.13200097 0.12358741]] probs:[[0.16379477 0.16967022 0.15578657 0.16658424 0.17280601 0.1713582 ]] entropy:[1.7911673]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5750 r:0.15 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.07819302 0.11358048 0.02834962 0.09523115 0.13177828 0.12394512]] probs:[[0.16376139 0.16966026 0.15579905 0.16657549 0.17277595 0.17142786]] entropy:[1.7911669]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5751 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.07797796 0.11346238 0.02833507 0.0952445  0.13164027 0.12408372]] probs:[[0.16373526 0.16964962 0.15580542 0.16658694 0.17276168 0.17146112]] entropy:[1.7911667]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5752 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.07835552 0.11366724 0.0283501  0.09525174 0.13193606 0.12374698]] probs:[[0.16378178 0.16966851 0.15579319 0.16657257 0.17279664 0.17138737]] entropy:[1.7911673]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5753 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.0783622  0.11355549 0.02821606 0.09559056 0.1319485  0.12341956]] probs:[[0.1637888  0.1696557  0.15577795 0.16663505 0.17280504 0.17133747]] entropy:[1.7911675]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-0.49946433] v_loss:[[0.01834207]]
DEBUG:chainerrl.agents.a3c:grad norm:25.929525462591343
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:5754 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.01460897 0.05940001 0.07209878 0.13179505 0.06702267 0.23226306]] probs:[[0.15322661 0.16024582 0.16229372 0.17227708 0.161472   0.19048473]] entropy:[1.7892178]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5755 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.01596832 0.05856449 0.07101281 0.13521586 0.06733409 0.22862637]] probs:[[0.15345843 0.1601364  0.16214228 0.17289376 0.1615469  0.1898222 ]] entropy:[1.7892969]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5756 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.01396219 0.05599437 0.06984443 0.13488273 0.06572939 0.2244793 ]] probs:[[0.15345937 0.16004708 0.16227917 0.1731843  0.16161275 0.18941739]] entropy:[1.7893406]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5757 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.01500514 0.06041172 0.07272311 0.13017215 0.06720892 0.2337249 ]] probs:[[0.15323314 0.16035132 0.16233768 0.1719369  0.16144498 0.19069597]] entropy:[1.7892054]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5758 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.01467916 0.06050625 0.07370135 0.12964462 0.06706985 0.23507328]] probs:[[0.15314214 0.16032349 0.16245298 0.17180015 0.16137923 0.19090207]] entropy:[1.7891738]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5759 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.0145106  0.05987925 0.07376358 0.13082185 0.0671446  0.23426576]] probs:[[0.15312487 0.16023193 0.16247216 0.17201212 0.1614003  0.19075862]] entropy:[1.7891828]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5760 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.01183065 0.05689169 0.07580997 0.1312495  0.06583042 0.23510118]] probs:[[0.15279664 0.1598393  0.16289195 0.17217763 0.16127446 0.19102004]] entropy:[1.7891041]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5761 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.01591683 0.05847284 0.08279781 0.12613413 0.05892946 0.24299301]] probs:[[0.15318535 0.15984501 0.16378091 0.1710346  0.15991803 0.1922362 ]] entropy:[1.7889689]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-1.614505] v_loss:[[0.00045111]]
DEBUG:chainerrl.agents.a3c:grad norm:2.70657237801763
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:5762 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.03073717 0.06241001 0.09439766 0.13706255 0.07648069 0.17966875]] probs:[[0.15582408 0.16083847 0.16606648 0.173305   0.16311756 0.18084843]] entropy:[1.7905393]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5763 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.03097638 0.05888497 0.08510969 0.13510615 0.08231638 0.18906245]] probs:[[0.15582103 0.16023102 0.16448863 0.17292152 0.1640298  0.18250804]] entropy:[1.7903917]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5764 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.03403275 0.06441179 0.09028957 0.13710326 0.07915636 0.17830099]] probs:[[0.15628332 0.16110392 0.16532734 0.17325094 0.16349691 0.18053755]] entropy:[1.7906094]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5765 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.03042372 0.06195115 0.08934365 0.1376613  0.07727487 0.18107761]] probs:[[0.15584871 0.16084051 0.16530722 0.17349058 0.16332415 0.18118882]] entropy:[1.7905052]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5766 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.03266797 0.06268575 0.08923042 0.13665602 0.07917122 0.18109226]] probs:[[0.15610708 0.16086411 0.16519137 0.17321442 0.16353801 0.18108499]] entropy:[1.7905458]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5767 r:0.2 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.0305616  0.06221737 0.09311307 0.13786632 0.07540522 0.1822353 ]] probs:[[0.15577222 0.1607822  0.16582721 0.17341708 0.16291662 0.18128467]] entropy:[1.7904875]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5768 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.03832138 0.06541701 0.09874889 0.13500336 0.07451776 0.18637936]] probs:[[0.15655072 0.16085055 0.16630237 0.1724422  0.1623211  0.18153311]] entropy:[1.7905427]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5769 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.06845158 0.07773982 0.11135769 0.12047116 0.07446607 0.18406998]] probs:[[0.16038062 0.1618772  0.16741168 0.16894436 0.16134813 0.18003805]] entropy:[1.7909445]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[0.10962966] v_loss:[[0.03963146]]
DEBUG:chainerrl.agents.a3c:grad norm:68.04661344853048
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:5770 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.06260512  0.08712795  0.01247179  0.16333431  0.10792322  0.27814472]] probs:[[0.14114478 0.16394307 0.15214941 0.17692494 0.167388   0.19844976]] entropy:[1.785858]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5771 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.06124084  0.08745167  0.01892733  0.16759683  0.11171116  0.2828833 ]] probs:[[0.14083542 0.16341361 0.15259086 0.17704955 0.16742642 0.1986841 ]] entropy:[1.7857876]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5772 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.06365797  0.08606753  0.01459765  0.16659634  0.11007284  0.27816057]] probs:[[0.14086342 0.16361503 0.15232958 0.1773358  0.16759019 0.19826595]] entropy:[1.7858276]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5773 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.05334452  0.09072498  0.02311481  0.17126673  0.11839785  0.28683233]] probs:[[0.14126636 0.16315761 0.15249115 0.17684232 0.16773571 0.19850694]] entropy:[1.7858872]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5774 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.06564372  0.08546766  0.01348765  0.16381598  0.10772882  0.27819347]] probs:[[0.14078456 0.16375022 0.15237768 0.17709576 0.16743638 0.19855538]] entropy:[1.7857862]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5775 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.06602219  0.08503885  0.01314937  0.16500428  0.1078625   0.27695924]] probs:[[0.14075764 0.16371064 0.15235464 0.17733948 0.16749008 0.19834757]] entropy:[1.7857997]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5776 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.06341231  0.08618554  0.01452175  0.16665305  0.11021376  0.27810758]] probs:[[0.1408888  0.16362362 0.15230802 0.17733423 0.1676028  0.19824246]] entropy:[1.7858341]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5777 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.0661371   0.08490742  0.01275518  0.16564578  0.10782591  0.27625275]] probs:[[0.14075977 0.16371042 0.15231441 0.17747638 0.16750574 0.19823328]] entropy:[1.7858077]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-1.6246121] v_loss:[[0.00056346]]
DEBUG:chainerrl.agents.a3c:grad norm:1.558694278764793
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:5778 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.01867927  0.11321744  0.03220012  0.15851803  0.13246913  0.26370132]] probs:[[0.1454227  0.16592592 0.15301318 0.1736153  0.16915122 0.19287172]] entropy:[1.787646]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5779 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.01809427  0.11299571  0.03274389  0.15817764  0.13137828  0.26598707]] probs:[[0.14545988 0.1658345  0.153046   0.17349908 0.16891116 0.19324942]] entropy:[1.7876059]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5780 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.01717945  0.11336812  0.03293552  0.15814956  0.13271312  0.26551172]] probs:[[0.14554162 0.16583772 0.15302129 0.17343296 0.16907707 0.1930894 ]] entropy:[1.787639]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5781 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.01706097  0.11339599  0.03297237  0.15816134  0.13283326  0.26551533]] probs:[[0.1455515  0.16583395 0.15301919 0.17342623 0.16908883 0.19308032]] entropy:[1.7876416]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5782 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.01903111  0.11281759  0.03307318  0.15776901  0.13193488  0.26516664]] probs:[[0.1453601  0.16584653 0.15313478 0.17347166 0.16904756 0.19313934]] entropy:[1.7876154]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5783 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.01933707  0.11202126  0.03436371  0.15835756  0.13203515  0.26653212]] probs:[[0.14525692 0.16564755 0.15327057 0.17350365 0.1689962  0.1933251 ]] entropy:[1.7875838]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5784 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.01991524  0.11119577  0.03539471  0.15872887  0.13175064  0.2676649 ]] probs:[[0.14514785 0.16548224 0.15340213 0.17353807 0.16891891 0.19351074]] entropy:[1.7875504]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5785 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.01715307  0.11323305  0.03322956  0.15823638  0.1327686   0.26577267]] probs:[[0.14553072 0.16579852 0.15305078 0.17343044 0.16906932 0.19312023]] entropy:[1.7876354]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-2.1273263] v_loss:[[0.00653492]]
DEBUG:chainerrl.agents.a3c:grad norm:12.434094934272586
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:5786 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.02513933 0.06919489 0.08637775 0.12203759 0.14654699 0.28249425]] probs:[[0.15077202 0.15756287 0.16029364 0.16611281 0.17023446 0.19502424]] entropy:[1.7882745]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5787 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.02440097 0.06796122 0.08729129 0.12240817 0.14602903 0.28374878]] probs:[[0.15065183 0.15735929 0.16043064 0.16616455 0.17013621 0.1952575 ]] entropy:[1.7882215]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5788 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.02171361 0.07110607 0.08573941 0.11678722 0.14972925 0.27463213]] probs:[[0.15058333 0.15820776 0.16053988 0.16560248 0.17114861 0.19391795]] entropy:[1.7884462]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5789 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.02004616 0.07210485 0.08528186 0.11448292 0.15144439 0.2705034 ]] probs:[[0.15049087 0.15853272 0.16063553 0.16539541 0.17162305 0.19332246]] entropy:[1.7885314]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5790 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.02399565 0.06914405 0.08657213 0.12094936 0.14712496 0.2812192 ]] probs:[[0.150672   0.1576305  0.16040179 0.16601183 0.17041466 0.19486926]] entropy:[1.7882924]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5791 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.01114556 0.07398582 0.08280391 0.10878542 0.15573753 0.255593  ]] probs:[[0.14983082 0.15954836 0.1609615  0.16519831 0.17313969 0.1913213 ]] entropy:[1.7887555]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5792 r:0.25 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.00907422 0.07364901 0.08230241 0.10819793 0.15453272 0.25477752]] probs:[[0.14965637 0.15963925 0.16102667 0.165251   0.17308803 0.19133869]] entropy:[1.7887431]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5793 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.01223572 0.0741604  0.08305772 0.10908382 0.15635204 0.25602126]] probs:[[0.14992382 0.15950128 0.16092674 0.16517003 0.17316477 0.19131337]] entropy:[1.788762]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[1.089172] v_loss:[[0.08253773]]
DEBUG:chainerrl.agents.a3c:grad norm:499.99791592783856
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:5794 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.12637702  0.13794445  0.05202834 -0.1323722   0.5626987   0.02580348]] probs:[[0.16225012 0.16413784 0.15062457 0.12525979 0.25100195 0.1467258 ]] entropy:[1.7655665]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5795 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.12637562  0.13794373  0.05202518 -0.13237028  0.56269634  0.02580387]] probs:[[0.16225006 0.1641379  0.15062426 0.12526016 0.25100166 0.14672603]] entropy:[1.7655666]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5796 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.1254439   0.13732864  0.05051859 -0.13150905  0.5618092   0.02560494]] probs:[[0.16219994 0.16413915 0.15049118 0.12544619 0.2509353  0.14678822]] entropy:[1.7656398]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5797 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.11982297  0.13321026  0.04055221 -0.12379135  0.5549297   0.02484186]] probs:[[0.16192825 0.1641106  0.14958765 0.12691772 0.25019985 0.14725594]] entropy:[1.7663049]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5798 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.12097433  0.13416453  0.04254476 -0.12564573  0.55654174  0.02505113]] probs:[[0.16197857 0.16412924 0.14976007 0.12657614 0.25039294 0.14716302]] entropy:[1.7661414]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5799 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.1253475   0.13728136  0.05026685 -0.13135229  0.56160766  0.02564653]] probs:[[0.16219828 0.16414553 0.15046626 0.12547666 0.25090632 0.14680697]] entropy:[1.7656602]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5800 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.12143642  0.13455355  0.04337874 -0.12637627  0.55715984  0.02508023]] probs:[[0.16199993 0.16413891 0.14983553 0.12644194 0.25046504 0.14711869]] entropy:[1.7660785]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5801 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.09727495  0.10185498  0.06412853 -0.13161062  0.5980522  -0.02870622]] probs:[[0.1587543  0.15948308 0.15357843 0.12627627 0.26194513 0.13996279]] entropy:[1.7601168]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-1.5951538] v_loss:[[0.00041293]]
DEBUG:chainerrl.agents.a3c:grad norm:4.245329700318976
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:5802 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.06793473  0.10178811  0.05311133 -0.04455033  0.5078946  -0.00286899]] probs:[[0.15630814 0.16169028 0.15400821 0.13967863 0.24269104 0.14562365]] entropy:[1.771983]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5803 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.06793473  0.1017881   0.0531113  -0.04455034  0.5078946  -0.00286897]] probs:[[0.15630814 0.16169028 0.15400821 0.13967861 0.24269104 0.14562367]] entropy:[1.771983]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5804 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.06793489  0.10178827  0.0531111  -0.04455047  0.50789464 -0.00286898]] probs:[[0.15630817 0.16169032 0.1540082  0.13967863 0.24269107 0.14562367]] entropy:[1.7719831]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5805 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.06793467  0.1017881   0.05311127 -0.04455039  0.50789446 -0.00286896]] probs:[[0.15630816 0.16169031 0.15400822 0.13967864 0.24269104 0.14562368]] entropy:[1.7719831]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5806 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.06793461  0.10178805  0.05311128 -0.04455036  0.50789446 -0.00286903]] probs:[[0.15630814 0.1616903  0.15400822 0.13967864 0.24269104 0.14562367]] entropy:[1.7719831]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5807 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.06793467  0.10178807  0.0531114  -0.04455033  0.50789446 -0.00286899]] probs:[[0.15630816 0.1616903  0.15400824 0.13967864 0.24269104 0.14562367]] entropy:[1.7719833]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5808 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.06793357  0.10178779  0.05311326 -0.04454875  0.5078928  -0.00286965]] probs:[[0.15630801 0.16169028 0.15400857 0.13967888 0.24269067 0.14562361]] entropy:[1.7719834]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5809 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.06793477  0.10178812  0.05311128 -0.04455031  0.5078946  -0.002869  ]] probs:[[0.15630814 0.16169028 0.15400821 0.13967863 0.24269104 0.14562365]] entropy:[1.771983]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-2.3132842] v_loss:[[0.00919154]]
DEBUG:chainerrl.agents.a3c:grad norm:37.31121445258185
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:5810 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.02465093  0.16116954  0.06752413  0.02363366  0.48655495 -0.03861554]] probs:[[0.14278238 0.17193942 0.15656897 0.14984572 0.23806114 0.14080234]] entropy:[1.7730846]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5811 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.02677348  0.160673    0.06796811  0.02339873  0.4865914  -0.03874583]] probs:[[0.14253144 0.17191654 0.15669547 0.149865   0.23815638 0.14083518]] entropy:[1.773028]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5812 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.0310763   0.15951325  0.06868145  0.02285638  0.48617163 -0.03911232]] probs:[[0.1420519  0.1718775  0.1569536  0.1499235  0.23827855 0.14091495]] entropy:[1.7729449]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5813 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.03266663  0.15919106  0.0695615   0.0228512   0.48686633 -0.03877508]] probs:[[0.14181633 0.17181021 0.1570809  0.14991231 0.23842761 0.1409527 ]] entropy:[1.7728688]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5814 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.0364023   0.15782683  0.07027587  0.0222908   0.48656404 -0.03910212]] probs:[[0.14140815 0.17172246 0.15732734 0.14995623 0.23855901 0.14102688]] entropy:[1.7727892]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5815 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.02678436  0.16070709  0.06810447  0.02344028  0.48679554 -0.03865614]] probs:[[0.14251661 0.17190638 0.15670224 0.14985725 0.23818281 0.1408347 ]] entropy:[1.773016]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5816 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.02495597  0.16110264  0.06760363  0.02360462  0.48658368 -0.03862385]] probs:[[0.14274473 0.171935   0.1565879  0.14984755 0.23807782 0.14080699]] entropy:[1.7730751]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5817 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.02480768  0.16115713  0.06758463  0.02360365  0.486552   -0.03862283]] probs:[[0.14276303 0.17194092 0.15658177 0.14984441 0.2380655  0.14080429]] entropy:[1.7730811]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-1.5772911] v_loss:[[0.00028916]]
DEBUG:chainerrl.agents.a3c:grad norm:1.308053429488147
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:5818 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.0588504   0.14934745  0.06064586  0.03801989  0.41427782 -0.02338628]] probs:[[0.14082104 0.17341504 0.15869533 0.155145   0.2260188  0.14590475]] entropy:[1.7780578]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5819 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.06752994  0.1456269   0.05892688  0.03578058  0.4070464  -0.02864866]] probs:[[0.14028797 0.17361741 0.15919885 0.1555563  0.2254895  0.14584997]] entropy:[1.7781652]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5820 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.06794465  0.14522056  0.05831452  0.03542082  0.40574926 -0.02955947]] probs:[[0.14032905 0.1736697  0.159214   0.1556104  0.22535656 0.14582032]] entropy:[1.7782106]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5821 r:0.3 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.03811895  0.15655333  0.06087671  0.04145002  0.4219004  -0.01802419]] probs:[[0.14273079 0.17340545 0.15758356 0.15455179 0.2261005  0.14562793]] entropy:[1.7782031]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5822 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.04250982  0.15463617  0.05956273  0.04044149  0.41869032 -0.01998349]] probs:[[0.14243731 0.1734775  0.15774417 0.15475655 0.22590217 0.1456823 ]] entropy:[1.7782466]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5823 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.04409643  0.15395565  0.05910757  0.04006517  0.4172961  -0.02092341]] probs:[[0.14234322 0.17352006 0.15781842 0.15484162 0.2257964  0.14568026]] entropy:[1.7782722]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5824 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.04827502  0.15218128  0.05848396  0.03936246  0.41463694 -0.02255327]] probs:[[0.14202593 0.17355004 0.15802744 0.15503441 0.22563566 0.14572647]] entropy:[1.7783009]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5825 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.05242339  0.14963676  0.05716246  0.03755006  0.40949675 -0.02594669]] probs:[[0.1418876  0.17365931 0.15832044 0.15524565 0.22519246 0.1456945 ]] entropy:[1.7784344]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-0.01402748] v_loss:[[0.06085153]]
DEBUG:chainerrl.agents.a3c:grad norm:63.494322623455815
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:5826 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.11289045  0.04856057  0.14035621  0.13164108  0.41241878 -0.0811348 ]] probs:[[0.13398208 0.15745772 0.17259584 0.17109819 0.22656123 0.13830504]] entropy:[1.7756729]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5827 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.11437068  0.04810416  0.14081666  0.1316598   0.4125538  -0.08116086]] probs:[[0.13380535 0.15741111 0.17270301 0.17112881 0.22662814 0.1383236 ]] entropy:[1.7756097]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5828 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.11457381  0.04803911  0.14087676  0.13165279  0.41257837 -0.0811525 ]] probs:[[0.13378105 0.15740426 0.17271711 0.1711313  0.2266386  0.13832773]] entropy:[1.7756009]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5829 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.11461975  0.04802156  0.14088744  0.13165054  0.41258383 -0.08115018]] probs:[[0.13377568 0.15740241 0.17271996 0.17113191 0.22664115 0.13832887]] entropy:[1.775599]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5830 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.11462605  0.04801956  0.14088991  0.1316473   0.4125831  -0.08114871]] probs:[[0.133775   0.15740229 0.17272061 0.17113155 0.22664127 0.13832924]] entropy:[1.7755988]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5831 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.11444277  0.04845122  0.14114551  0.13149242  0.41252655 -0.08106115]] probs:[[0.13378489 0.15745302 0.17274585 0.17108634 0.22660366 0.13832621]] entropy:[1.7756155]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5832 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.11441144  0.04854561  0.141198    0.13145602  0.41251796 -0.08103003]] probs:[[0.13378584 0.15746409 0.17275074 0.17107598 0.22659624 0.13832718]] entropy:[1.7756187]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5833 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.11455494  0.04818233  0.14098716  0.1315895   0.41255537 -0.08111724]] probs:[[0.13377914 0.1574216  0.17273048 0.1711148  0.22662589 0.13832805]] entropy:[1.7756054]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-1.7168024] v_loss:[[0.00097392]]
DEBUG:chainerrl.agents.a3c:grad norm:2.3607093026101174
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:5834 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.08132405  0.03638504  0.14797021  0.11688857  0.35993287 -0.04612812]] probs:[[0.13903692 0.15640497 0.17486842 0.16951682 0.21615516 0.14401759]] entropy:[1.7804598]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5835 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.08129569  0.03655333  0.14804506  0.11682666  0.35991353 -0.04603954]] probs:[[0.13903512 0.15642484 0.17487429 0.16949934 0.21614206 0.1440244 ]] entropy:[1.7804651]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5836 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.0808093   0.03770142  0.14874654  0.11638144  0.3598663  -0.04581437]] probs:[[0.13905871 0.15655494 0.17494157 0.1693702  0.21606338 0.14401121]] entropy:[1.7804952]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5837 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.0810691   0.03703218  0.14834838  0.11665033  0.35990348 -0.04598596]] probs:[[0.13904783 0.15647861 0.1749037  0.16944654 0.21611068 0.14401264]] entropy:[1.7804768]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5838 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.07233094  0.0508625   0.15930082  0.10931402  0.3562275  -0.04244851]] probs:[[0.13973656 0.15805647 0.17615965 0.16757046 0.21450163 0.14397524]] entropy:[1.7810537]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5839 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.07257561  0.050279    0.158999    0.10872553  0.35578743 -0.04234404]] probs:[[0.13975233 0.15802075 0.17616947 0.16753176 0.21448393 0.14404178]] entropy:[1.7810683]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5840 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.08131833  0.03631696  0.14792441  0.11680464  0.35993996 -0.04606726]] probs:[[0.13904077 0.15639775 0.17486423 0.1695063  0.21616141 0.14402951]] entropy:[1.7804606]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5841 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.08127867  0.03635222  0.14795028  0.11687905  0.35997766 -0.04606676]] probs:[[0.13904122 0.15639757 0.1748624  0.16951275 0.2161617  0.14402434]] entropy:[1.7804596]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-1.660562] v_loss:[[0.00107961]]
DEBUG:chainerrl.agents.a3c:grad norm:2.955520459538698
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:5842 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.01893587  0.0811256   0.17224373  0.08322112  0.24670748 -0.0058958 ]] probs:[[0.14834885 0.16396092 0.17960253 0.16430485 0.19348693 0.15029599]] entropy:[1.787303]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5843 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.01464406  0.07923916  0.1704631   0.09128477  0.24413033 -0.00707835]] probs:[[0.14888799 0.16354325 0.17916398 0.16552515 0.19286081 0.15001872]] entropy:[1.787466]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5844 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.00096206  0.08128493  0.17547823  0.08450107  0.25929913  0.01535383]] probs:[[0.14966415 0.16249394 0.17854382 0.16301739 0.19415464 0.15212607]] entropy:[1.7875497]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5845 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.025923   0.08424154 0.1769929  0.11246488 0.26694313 0.04651062]] probs:[[0.15135747 0.1604469  0.17604055 0.16503975 0.19260946 0.15450586]] entropy:[1.7882729]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5846 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.03820426 0.08866686 0.17963974 0.12480592 0.26882994 0.06223677]] probs:[[0.1520252  0.15989365 0.1751218  0.16577773 0.19145866 0.15572299]] entropy:[1.7886176]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5847 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.03637854 0.10209759 0.1721513  0.13952741 0.26031846 0.0557438 ]] probs:[[0.15168628 0.16198982 0.17374474 0.16816798 0.1897589  0.15465236]] entropy:[1.7888696]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5848 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.03714423 0.10085881 0.1729768  0.14091523 0.26077583 0.05501869]] probs:[[0.15176189 0.16174603 0.17384174 0.1683565  0.18979497 0.15449893]] entropy:[1.7888473]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5849 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.03776504 0.09915642 0.17418732 0.1424042  0.26121938 0.05365317]] probs:[[0.15183274 0.16144605 0.1740255  0.1685814  0.18984993 0.15426435]] entropy:[1.7888095]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-1.6605308] v_loss:[[0.00069826]]
DEBUG:chainerrl.agents.a3c:grad norm:2.6831815612056085
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:5850 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.05363191 0.09376112 0.13176489 0.15169728 0.2701851  0.04907318]] probs:[[0.15473919 0.16107503 0.16731429 0.17068273 0.19215345 0.15403537]] entropy:[1.7888303]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5851 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.04671138 0.08362707 0.12455361 0.15255988 0.27031645 0.04737003]] probs:[[0.15428653 0.16008857 0.16677636 0.17151316 0.1929472  0.15438819]] entropy:[1.7886518]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5852 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.05091891 0.08936807 0.12882859 0.15218484 0.26993912 0.04832656]] probs:[[0.15458193 0.16064122 0.16710693 0.17105587 0.19243231 0.15418172]] entropy:[1.7887663]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5853 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.04626388 0.08232386 0.12425482 0.15310654 0.2705248  0.04690127]] probs:[[0.1542585  0.15992257 0.16677086 0.17165257 0.1930387  0.15435685]] entropy:[1.7886229]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5854 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.05558586 0.09771659 0.13381381 0.15034004 0.2700436  0.04972653]] probs:[[0.15486741 0.1615315  0.16746886 0.17025948 0.19191012 0.15396266]] entropy:[1.7888923]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5855 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.05374481 0.09478601 0.13172598 0.15099294 0.26997393 0.04958225]] probs:[[0.15474214 0.16122505 0.16729209 0.17054653 0.19209483 0.15409936]] entropy:[1.7888522]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5856 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.05402131 0.09524764 0.13203041 0.15092884 0.26999277 0.04966332]] probs:[[0.15475808 0.16127153 0.167314   0.17050603 0.19206515 0.15408511]] entropy:[1.788859]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5857 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.05291587 0.09347152 0.13080443 0.15112332 0.26991153 0.04935022]] probs:[[0.1546943  0.16109699 0.16722487 0.17065746 0.19218272 0.15414369]] entropy:[1.788833]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-1.5504833] v_loss:[[0.00018568]]
DEBUG:chainerrl.agents.a3c:grad norm:1.9344651004590678
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:5858 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.07766354 0.09201559 0.14851467 0.14138244 0.23720087 0.07096375]] probs:[[0.15822884 0.16051613 0.16984624 0.16863915 0.18559738 0.15717228]] entropy:[1.7900732]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5859 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.07766499 0.09201418 0.14850932 0.14138566 0.23720899 0.07095796]] probs:[[0.15822904 0.16051586 0.16984528 0.16863966 0.18559884 0.15717134]] entropy:[1.7900729]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5860 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.07766505 0.09201416 0.14850931 0.14138572 0.23720902 0.07095797]] probs:[[0.15822904 0.16051584 0.16984527 0.16863966 0.18559884 0.15717132]] entropy:[1.7900729]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5861 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.07766356 0.09201557 0.14851463 0.14138229 0.23720072 0.07096385]] probs:[[0.15822884 0.16051611 0.16984622 0.16863912 0.18559735 0.15717229]] entropy:[1.790073]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5862 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.07766503 0.09201418 0.14850934 0.14138567 0.23720896 0.07095798]] probs:[[0.15822904 0.16051586 0.16984528 0.16863967 0.18559884 0.15717134]] entropy:[1.7900729]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5863 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.07766505 0.09201416 0.14850931 0.14138572 0.23720902 0.07095797]] probs:[[0.15822904 0.16051584 0.16984527 0.16863966 0.18559884 0.15717132]] entropy:[1.7900729]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5864 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.07766356 0.09201557 0.14851463 0.14138229 0.23720072 0.07096385]] probs:[[0.15822884 0.16051611 0.16984622 0.16863912 0.18559735 0.15717229]] entropy:[1.790073]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5865 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.07766503 0.09201418 0.14850934 0.14138567 0.23720896 0.07095798]] probs:[[0.15822904 0.16051586 0.16984528 0.16863967 0.18559884 0.15717134]] entropy:[1.7900729]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-1.5272257] v_loss:[[0.00011338]]
DEBUG:chainerrl.agents.a3c:grad norm:0.3371939646058174
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:5866 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.08315774 0.10150145 0.13267002 0.14413081 0.2329855  0.07342952]] probs:[[0.1591319  0.16207792 0.1672092  0.16913657 0.18485306 0.15759134]] entropy:[1.7902939]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5867 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.08315953 0.10149959 0.13266486 0.14413315 0.23299327 0.07342437]] probs:[[0.15913217 0.16207758 0.1672083  0.16913694 0.18485446 0.1575905 ]] entropy:[1.7902938]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5868 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.08315954 0.10149956 0.13266486 0.14413318 0.23299327 0.07342435]] probs:[[0.15913217 0.16207758 0.1672083  0.16913694 0.18485446 0.1575905 ]] entropy:[1.7902938]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5869 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.08315776 0.10150143 0.13267004 0.14413078 0.23298548 0.07342953]] probs:[[0.1591319  0.1620779  0.1672092  0.16913655 0.18485303 0.15759133]] entropy:[1.7902939]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5870 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.08315953 0.10149959 0.13266486 0.14413315 0.23299327 0.07342437]] probs:[[0.15913217 0.16207758 0.1672083  0.16913694 0.18485446 0.1575905 ]] entropy:[1.7902938]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5871 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.08315776 0.10150143 0.13267004 0.14413078 0.23298548 0.07342953]] probs:[[0.1591319  0.1620779  0.1672092  0.16913655 0.18485303 0.15759133]] entropy:[1.7902939]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5872 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.08315955 0.10149958 0.13266486 0.14413317 0.23299325 0.07342437]] probs:[[0.15913217 0.16207759 0.16720831 0.16913696 0.18485446 0.1575905 ]] entropy:[1.7902938]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5873 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.08315772 0.10150144 0.13266999 0.14413075 0.23298556 0.07342952]] probs:[[0.1591319  0.1620779  0.1672092  0.16913655 0.18485306 0.15759133]] entropy:[1.7902939]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-1.5196987] v_loss:[[9.491916e-05]]
DEBUG:chainerrl.agents.a3c:grad norm:0.27400892044645336
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:5874 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.1098674  0.11843287 0.07522625 0.15549543 0.18344435 0.1297776 ]] probs:[[0.1634601  0.16486624 0.15789463 0.17109124 0.17594051 0.16674726]] entropy:[1.7911721]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5875 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.10986817 0.11843488 0.07523205 0.1554916  0.18343465 0.12978178]] probs:[[0.1634603  0.16486663 0.15789558 0.17109065 0.17593886 0.166748  ]] entropy:[1.7911721]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5876 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.10987052 0.11843526 0.07522362 0.1554918  0.18345463 0.12977824]] probs:[[0.16346033 0.16486634 0.15789391 0.17109032 0.175942   0.16674706]] entropy:[1.7911719]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5877 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.10988945 0.11845116 0.07520369 0.15547192 0.18352829 0.12977974]] probs:[[0.16346142 0.16486692 0.15788881 0.1710848  0.17595279 0.16674525]] entropy:[1.7911712]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5878 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.11002827 0.11856838 0.07506382 0.15532362 0.18405792 0.1297944 ]] probs:[[0.16346934 0.16487136 0.15785249 0.17104399 0.17603011 0.16673264]] entropy:[1.7911662]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5879 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.11142761 0.11972223 0.0740866  0.15471281 0.1877552  0.13016398]] probs:[[0.16355526 0.16491753 0.15756056 0.17079024 0.17652783 0.16664858]] entropy:[1.7911309]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5880 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.11934324 0.12682702 0.07127926 0.15462175 0.20597178 0.13422309]] probs:[[0.16388136 0.1651124  0.15619086 0.16976604 0.17871124 0.16633812]] entropy:[1.7909386]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5881 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.13377519 0.14008185 0.06807208 0.1531955  0.23744777 0.13804473]] probs:[[0.16458571 0.16562699 0.1541195  0.16781326 0.1825646  0.16528992]] entropy:[1.7905138]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-1.5097506] v_loss:[[7.150089e-05]]
DEBUG:chainerrl.agents.a3c:grad norm:0.1157667393245333
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:5882 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.14396071 0.15325144 0.05364128 0.16398393 0.24934433 0.12575422]] probs:[[0.16566458 0.16721089 0.15135767 0.16901515 0.18407601 0.1626757 ]] entropy:[1.790086]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5883 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.14490347 0.15394376 0.0535779  0.16395523 0.2508252  0.12607375]] probs:[[0.16572432 0.16722932 0.15125999 0.16891193 0.1842415  0.16263299]] entropy:[1.790061]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5884 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.14502773 0.15402651 0.05355072 0.1639456  0.25103548 0.12611426]] probs:[[0.16573264 0.16723077 0.15124467 0.1688978  0.1842666  0.16262752]] entropy:[1.7900568]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5885 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.14505029 0.15404771 0.05356058 0.1639455  0.25105557 0.12612024]] probs:[[0.16573416 0.16723207 0.15124413 0.16889551 0.18426783 0.16262631]] entropy:[1.7900567]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5886 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.14505255 0.15404932 0.05356025 0.1639452  0.25105926 0.12612082]] probs:[[0.1657343  0.16723211 0.15124388 0.16889523 0.18426827 0.16262619]] entropy:[1.7900567]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5887 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.1450477  0.15404023 0.05354762 0.16394345 0.25106654 0.12612009]] probs:[[0.16573405 0.16723116 0.15124248 0.16889551 0.18427023 0.16262661]] entropy:[1.7900565]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5888 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.1443595  0.15431379 0.05336184 0.16403298 0.25073513 0.12618978]] probs:[[0.16564173 0.1672988  0.1512342  0.16893274 0.1842333  0.16265924]] entropy:[1.7900586]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5889 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.14879179 0.15556122 0.05143247 0.1639107  0.25081053 0.12553647]] probs:[[0.16628772 0.16741721 0.1508612  0.1688209  0.18414773 0.16246526]] entropy:[1.7900295]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-1.4868137] v_loss:[[4.2894757e-05]]
DEBUG:chainerrl.agents.a3c:grad norm:0.09047646992417302
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:5890 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.1439002  0.15671925 0.06198578 0.15273462 0.25250286 0.12686884]] probs:[[0.16553666 0.16767234 0.15251733 0.16700555 0.1845269  0.16274121]] entropy:[1.7901629]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5891 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.14388274 0.15671451 0.06199377 0.1527354  0.2525033  0.1268719 ]] probs:[[0.16553406 0.16767184 0.15251884 0.167006   0.18452734 0.16274202]] entropy:[1.790163]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5892 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.14965306 0.15831363 0.06007267 0.15235908 0.25280398 0.12610544]] probs:[[0.16635865 0.16780567 0.15210417 0.16680942 0.18443498 0.16248706]] entropy:[1.7901316]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5893 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.15042074 0.15852563 0.05974212 0.15232356 0.2527961  0.12597568]] probs:[[0.16647235 0.16782705 0.15204105 0.16678941 0.18441793 0.16245225]] entropy:[1.790127]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5894 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.15050513 0.15854888 0.05970576 0.15231963 0.2527953  0.12596142]] probs:[[0.16648483 0.1678294  0.1520341  0.16678719 0.18441607 0.16244842]] entropy:[1.7901262]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5895 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.14551134 0.15716471 0.06129187 0.15266004 0.25248626 0.12659651]] probs:[[0.1657744  0.16771753 0.15238473 0.16696373 0.1844914  0.16266829]] entropy:[1.7901542]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5896 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.14441423 0.15686153 0.06176483 0.15271078 0.2524979  0.12678213]] probs:[[0.16561244 0.16768675 0.15247504 0.16699217 0.18451561 0.16271794]] entropy:[1.7901601]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5897 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.1497619  0.15834364 0.06002585 0.1523541  0.25280288 0.12608708]] probs:[[0.16637477 0.1678087  0.15209523 0.16680661 0.18443255 0.16248213]] entropy:[1.7901309]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-1.4897614] v_loss:[[3.9184895e-05]]
DEBUG:chainerrl.agents.a3c:grad norm:0.19425606961185532
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:5898 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.13100722 0.15253142 0.08067694 0.1462285  0.25896215 0.12582007]] probs:[[0.16341637 0.16697189 0.15539512 0.16592279 0.18572299 0.1625709 ]] entropy:[1.7902349]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5899 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.12942404 0.15212882 0.08137612 0.14627936 0.25897044 0.12609299]] probs:[[0.16318439 0.16693184 0.1555291  0.16595823 0.18575473 0.16264172]] entropy:[1.7902379]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5900 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.126489   0.15092948 0.08157184 0.14684209 0.2582271  0.12591928]] probs:[[0.16282348 0.166852   0.15567175 0.1661714  0.18575059 0.16273075]] entropy:[1.790243]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5901 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.12615456 0.1508128  0.0817166  0.14683257 0.2582088  0.12594585]] probs:[[0.16277753 0.16684122 0.15570238 0.16617848 0.18575686 0.16274354]] entropy:[1.7902437]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5902 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.1276853  0.15166897 0.08210418 0.14633898 0.25892574 0.12634663]] probs:[[0.16293415 0.16688916 0.15567416 0.166002   0.18578435 0.16271618]] entropy:[1.7902411]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5903 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.12786125 0.1517195  0.08203691 0.14633794 0.2589366  0.12632659]] probs:[[0.16295871 0.16689338 0.15565975 0.16599764 0.18578169 0.16270882]] entropy:[1.7902408]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5904 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.12800819 0.1517596  0.08197718 0.14633338 0.25894234 0.12630735]] probs:[[0.16297963 0.16689697 0.15564756 0.1659938  0.1857793  0.16270266]] entropy:[1.7902406]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5905 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.12618591 0.1508231  0.08170158 0.14683424 0.2582097  0.12594266]] probs:[[0.16278191 0.1668422  0.15569937 0.16617803 0.18575622 0.16274232]] entropy:[1.7902437]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-1.45874] v_loss:[[9.6277345e-06]]
DEBUG:chainerrl.agents.a3c:grad norm:0.10009194390892291
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:5906 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.13112989 0.15264346 0.07224731 0.14489634 0.2511274  0.1372637 ]] probs:[[0.16361062 0.1671686  0.15425496 0.16587853 0.18447    0.16461726]] entropy:[1.7903233]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5907 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.13109268 0.15262629 0.07226202 0.14489792 0.25112462 0.13726369]] probs:[[0.16360568 0.16716689 0.1542583  0.16587994 0.18447076 0.16461839]] entropy:[1.7903231]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5908 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.20178774 0.1853653  0.13497657 0.08641865 0.20100203 0.11893839]] probs:[[0.1745254  0.17168267 0.16324614 0.15550862 0.17438832 0.16064885]] entropy:[1.7907987]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5909 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.1972477  0.18537854 0.13248436 0.09098551 0.20191632 0.11838602]] probs:[[0.17380644 0.17175572 0.16290691 0.15628482 0.17461978 0.16062632]] entropy:[1.7908611]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5910 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.15544716 0.16565847 0.08688019 0.11075278 0.24487819 0.1423883 ]] probs:[[0.16719995 0.16891605 0.15611978 0.15989159 0.18284184 0.16503072]] entropy:[1.7904941]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5911 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.2027416  0.18352687 0.13354006 0.08400802 0.20424184 0.1218838 ]] probs:[[0.17464267 0.17131898 0.1629658  0.15509044 0.17490487 0.16107726]] entropy:[1.7907591]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5912 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.19969872 0.18711205 0.13186315 0.09006869 0.207308   0.12287987]] probs:[[0.1738596  0.171685   0.16245686 0.15580699 0.17518759 0.16100399]] entropy:[1.7908046]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5913 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.17531747 0.1786549  0.10258742 0.10381296 0.25167412 0.15004872]] probs:[[0.16895938 0.16952421 0.15710719 0.15729985 0.18236588 0.16474348]] entropy:[1.7904432]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-1.4306153] v_loss:[[3.4846766e-05]]
DEBUG:chainerrl.agents.a3c:grad norm:0.15793159793038458
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:5914 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.1783598  0.15932576 0.10939583 0.07655299 0.25026542 0.15721843]] probs:[[0.17031872 0.16710754 0.15896875 0.15383255 0.18301666 0.16675575]] entropy:[1.7902541]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5915 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.21122077 0.16056752 0.13649036 0.04278452 0.25531465 0.15962249]] probs:[[0.17487477 0.1662374  0.16228268 0.14776658 0.1827582  0.16608037]] entropy:[1.7896333]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5916 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.20543535 0.15729211 0.1357779  0.03635519 0.25867513 0.15968937]] probs:[[0.17421243 0.16602397 0.16249023 0.14711216 0.1837388  0.16642244]] entropy:[1.7894962]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5917 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.22035505 0.16404127 0.14266656 0.03078453 0.25995013 0.16044292]] probs:[[0.17605491 0.16641457 0.16289526 0.14565276 0.18316567 0.16581683]] entropy:[1.7892792]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5918 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.22107232 0.16474526 0.14402643 0.02957774 0.25948364 0.15924177]] probs:[[0.17618033 0.16653092 0.1631161  0.14547634 0.1830793  0.16561694]] entropy:[1.7892604]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5919 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.2091556  0.13374445 0.17342512 0.03255577 0.20704073 0.11871948]] probs:[[0.17724848 0.16437352 0.17102711 0.14855462 0.17687401 0.16192228]] entropy:[1.7899616]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5920 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.20568462 0.1330447  0.17145029 0.03489234 0.20794445 0.11525664]] probs:[[0.17683215 0.16444252 0.17088088 0.14906892 0.17723222 0.16154326]] entropy:[1.7900165]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5921 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.22401476 0.13525423 0.17709872 0.02788124 0.20696935 0.1225928 ]] probs:[[0.17928262 0.1640552  0.17106564 0.14735283 0.17625257 0.16199112]] entropy:[1.7897137]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-1.4565468] v_loss:[[4.649233e-05]]
DEBUG:chainerrl.agents.a3c:grad norm:0.779168850599433
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:5922 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.19504794 0.16359697 0.150267   0.02087677 0.22104262 0.13554478]] probs:[[0.17439935 0.16899967 0.16676186 0.14652215 0.17899224 0.16432475]] entropy:[1.7898355]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5923 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.16653912 0.15990451 0.09470961 0.06956685 0.2561895  0.16755255]] probs:[[0.16873468 0.16761889 0.1570396  0.15314041 0.18456061 0.16890576]] entropy:[1.7899531]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5924 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.15460636 0.1687111  0.08433422 0.09979406 0.22946467 0.15160543]] probs:[[0.16756755 0.1699478  0.15619645 0.15862997 0.18059282 0.16706546]] entropy:[1.7906253]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5925 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.13373515 0.16069327 0.0559843  0.13590273 0.24823464 0.15741809]] probs:[[0.1639353  0.1684148  0.15167211 0.16429104 0.18382265 0.16786413]] entropy:[1.7901566]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5926 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.1321664  0.15710013 0.0569031  0.14564025 0.244169   0.15268219]] probs:[[0.16378528 0.1679204  0.15191072 0.16600704 0.18319643 0.16718017]] entropy:[1.7902604]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5927 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.14787315 0.16088057 0.08085217 0.13237996 0.21714588 0.13680036]] probs:[[0.16684434 0.16902873 0.15602876 0.16427931 0.17881183 0.1650071 ]] entropy:[1.7909383]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5928 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.1679924  0.15167572 0.09784595 0.1224048  0.20530367 0.12963025]] probs:[[0.17030294 0.1675467  0.15876617 0.16271354 0.17677718 0.16389349]] entropy:[1.7911564]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5929 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.13270327 0.16811348 0.05348673 0.1336878  0.25217518 0.16419065]] probs:[[0.16340512 0.16929498 0.15096016 0.16356607 0.18414149 0.16863218]] entropy:[1.790016]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-1.6444632] v_loss:[[0.00054552]]
DEBUG:chainerrl.agents.a3c:grad norm:2.4154562073993633
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:5930 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.12264093 0.16449654 0.05418804 0.13731933 0.2574407  0.1685574 ]] probs:[[0.16174261 0.16865613 0.15104131 0.16413425 0.18508331 0.16934241]] entropy:[1.7898952]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5931 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.12239075 0.16425353 0.05479532 0.13812794 0.25669116 0.16817287]] probs:[[0.16171198 0.16862538 0.15114224 0.16427699 0.18495587 0.16928758]] entropy:[1.7899207]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5932 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.12233061 0.16421703 0.05486911 0.13831721 0.25653988 0.16812938]] probs:[[0.16170369 0.16862074 0.15115474 0.16430956 0.18492953 0.16928172]] entropy:[1.7899247]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5933 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.1222905  0.16418828 0.05492434 0.13836037 0.25649816 0.16808885]] probs:[[0.1616989  0.16861765 0.15116467 0.16431835 0.18492375 0.16927664]] entropy:[1.7899266]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5934 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.12228864 0.1642002  0.0548955  0.1383877  0.25648856 0.16810514]] probs:[[0.16169813 0.16861917 0.15115988 0.16432239 0.18492144 0.1692789 ]] entropy:[1.7899263]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5935 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.12228762 0.16419923 0.05489771 0.13839078 0.25648594 0.16810381]] probs:[[0.16169801 0.16861905 0.15116026 0.16432293 0.18492101 0.16927873]] entropy:[1.7899264]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5936 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.12229665 0.16417998 0.05493425 0.13834503 0.25650698 0.16813824]] probs:[[0.1616985  0.1686148  0.15116487 0.16431443 0.1849238  0.16928355]] entropy:[1.7899263]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5937 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.12237877 0.16413894 0.0547508  0.1381802  0.25660682 0.16838147]] probs:[[0.16171    0.16860601 0.15113546 0.16428553 0.1849402  0.16932283]] entropy:[1.7899213]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-1.4506123] v_loss:[[5.8041223e-06]]
DEBUG:chainerrl.agents.a3c:grad norm:0.0663881497046657
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:5938 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.1301899  0.16897945 0.01622079 0.13747773 0.27751312 0.1770427 ]] probs:[[0.16271149 0.16914698 0.1451851  0.16390163 0.18853842 0.17051637]] entropy:[1.7888024]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5939 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.13065203 0.16921572 0.01535824 0.13610213 0.2783669  0.1775377 ]] probs:[[0.16278504 0.16918525 0.14505845 0.16367465 0.18869755 0.17059907]] entropy:[1.7887607]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5940 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.13277876 0.14508505 0.00876818 0.15017132 0.2732632  0.15629797]] probs:[[0.16425954 0.16629346 0.14510204 0.16714142 0.189035   0.16816859]] entropy:[1.7888366]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5941 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.12952122 0.16577823 0.02115959 0.14206679 0.27795473 0.16970487]] probs:[[0.16265802 0.16866374 0.14595355 0.16471152 0.18868586 0.16932732]] entropy:[1.7889278]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5942 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.12861308 0.16638379 0.02062321 0.14284094 0.2748783  0.17379536]] probs:[[0.16249117 0.16874595 0.14585803 0.1648196  0.18808404 0.17000127]] entropy:[1.7889731]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5943 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.12694186 0.16616467 0.02338736 0.14743072 0.2749268  0.17551956]] probs:[[0.1620325  0.16851415 0.14609285 0.16538659 0.18787594 0.17009798]] entropy:[1.789023]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5944 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.12434862 0.16525443 0.02705353 0.15318133 0.27547625 0.1759776 ]] probs:[[0.16143548 0.16817607 0.14646852 0.16615787 0.1877729  0.16998915]] entropy:[1.7890761]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5945 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.12723663 0.16533625 0.0221871  0.14751564 0.27462637 0.17468183]] probs:[[0.16215353 0.16845071 0.14598356 0.16547541 0.1879044  0.17003237]] entropy:[1.7890112]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-1.429229] v_loss:[[8.802635e-06]]
DEBUG:chainerrl.agents.a3c:grad norm:0.27698202528266064
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:5946 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.1162372  0.1659871  0.06643091 0.17455697 0.26230106 0.18983345]] probs:[[0.15882803 0.16692957 0.15111117 0.16836627 0.18380693 0.17095807]] entropy:[1.7899063]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5947 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.13279448 0.16239646 0.05276079 0.16621305 0.24939874 0.18152198]] probs:[[0.16231865 0.16719541 0.14983395 0.16783474 0.18239334 0.1704239 ]] entropy:[1.7900554]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5948 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.12693474 0.16644548 0.03968379 0.14805217 0.25857306 0.18399678]] probs:[[0.16187759 0.16840151 0.14835224 0.16533236 0.18465306 0.17138328]] entropy:[1.7896268]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5949 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.13315912 0.16155924 0.01252187 0.15775578 0.25269926 0.16626738]] probs:[[0.16391547 0.16863742 0.14528736 0.16799724 0.1847292  0.16943327]] entropy:[1.7893176]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5950 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.13452233 0.16577178 0.01662556 0.14590448 0.25270492 0.17561585]] probs:[[0.16395117 0.16915545 0.1457178  0.16582794 0.18451877 0.17082885]] entropy:[1.7893659]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5951 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.1349497  0.16570857 0.01160894 0.14959055 0.2511198  0.1727105 ]] probs:[[0.16416    0.16928785 0.14511128 0.16658114 0.18438236 0.17047735]] entropy:[1.7893085]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5952 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.13893203 0.15696129 0.0112133  0.13966376 0.25341284 0.17720751]] probs:[[0.16503464 0.16803707 0.14524713 0.16515544 0.18505187 0.17147386]] entropy:[1.7892523]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5953 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.1347823  0.16584198 0.01139855 0.14954916 0.2514658  0.17250533]] probs:[[0.16413474 0.16931272 0.1450827  0.16657649 0.18444866 0.17044467]] entropy:[1.7892979]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-1.749283] v_loss:[[0.00103966]]
DEBUG:chainerrl.agents.a3c:grad norm:3.7966405126869023
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:5954 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.15813892 0.15201649 0.02788207 0.16369854 0.20250824 0.17824125]] probs:[[0.16826476 0.16723771 0.14771457 0.16920285 0.17589866 0.1716815 ]] entropy:[1.790285]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5955 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.15646699 0.14986552 0.03969883 0.15758796 0.20769441 0.18180989]] probs:[[0.16771312 0.16660962 0.14922969 0.16790123 0.1765285  0.17201777]] entropy:[1.7904347]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5956 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.15915959 0.14647035 0.04515719 0.15569732 0.20445406 0.18558238]] probs:[[0.16808696 0.16596754 0.14997657 0.16750601 0.17587543 0.1725875 ]] entropy:[1.790531]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5957 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.1580008  0.1519891  0.0292245  0.16271873 0.20296957 0.17899722]] probs:[[0.1682052  0.16719703 0.14788106 0.16900066 0.17594182 0.17177424]] entropy:[1.7903035]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5958 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.15840673 0.14693522 0.04699924 0.15586592 0.20588666 0.18524009]] probs:[[0.16788515 0.16597027 0.15018572 0.16745915 0.17604859 0.17245106]] entropy:[1.79055]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5959 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.15923092 0.1472592  0.04172594 0.16237664 0.2005527  0.18237875]] probs:[[0.16818053 0.16617912 0.14953537 0.16871041 0.17527562 0.17211895]] entropy:[1.7905202]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5960 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.17512609 0.15204784 0.07943664 0.15788059 0.21934001 0.18689519]] probs:[[0.16875133 0.16490144 0.15335213 0.16586609 0.17637989 0.17074911]] entropy:[1.7908611]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5961 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.1863454  0.15917733 0.07498068 0.15855847 0.21643952 0.19144459]] probs:[[0.17018178 0.1656205  0.15224673 0.16551805 0.1753811  0.17105179]] entropy:[1.7907915]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-1.382228] v_loss:[[0.00155798]]
DEBUG:chainerrl.agents.a3c:grad norm:1.8435536047286865
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:5962 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.19709404 0.14460391 0.0822632  0.1682526  0.2059333  0.18629594]] probs:[[0.17211439 0.16331309 0.1534429  0.16722126 0.1736425  0.17026588]] entropy:[1.790915]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5963 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.20107026 0.14412127 0.08398104 0.16783664 0.20948578 0.18396494]] probs:[[0.17262356 0.16306752 0.15354967 0.16698094 0.17408241 0.1696959 ]] entropy:[1.7908945]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5964 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.20222375 0.14477883 0.08444788 0.16795577 0.21003652 0.18458232]] probs:[[0.17271939 0.16307715 0.15352944 0.1669009  0.17407408 0.16969909]] entropy:[1.7908903]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5965 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.22191678 0.1596762  0.09089868 0.16679181 0.206591   0.19661322]] probs:[[0.17473261 0.16418868 0.15327579 0.16536115 0.17207511 0.17036673]] entropy:[1.790863]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5966 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.2188371  0.15515487 0.08815908 0.16992852 0.21058823 0.19407749]] probs:[[0.17435573 0.16359852 0.1529972  0.16603342 0.17292342 0.17009176]] entropy:[1.7908278]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5967 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.21600668 0.15332481 0.08855478 0.16888975 0.21436389 0.19234544]] probs:[[0.17395754 0.16338828 0.15314105 0.1659513  0.173672   0.16988981]] entropy:[1.7908279]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5968 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.21647638 0.15350044 0.08867083 0.16900866 0.21413743 0.19255999]] probs:[[0.17401403 0.16339326 0.15313661 0.16594696 0.1736075  0.16990162]] entropy:[1.7908275]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5969 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.21671279 0.15397851 0.08855603 0.1689571  0.21394023 0.19314578]] probs:[[0.1740276  0.1634455  0.15309477 0.16591212 0.17354578 0.16997424]] entropy:[1.7908254]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-1.2994856] v_loss:[[0.00025796]]
DEBUG:chainerrl.agents.a3c:grad norm:0.5986712138928253
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:5970 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.21100199 0.14915131 0.0991419  0.16889386 0.22144163 0.18484804]] probs:[[0.1730811  0.16270025 0.15476382 0.1659443  0.17489746 0.16861303]] entropy:[1.7909415]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5971 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.21079515 0.14972116 0.09652418 0.16594103 0.22194453 0.18587352]] probs:[[0.17314503 0.16288681 0.15444817 0.16555035 0.17508629 0.1688833 ]] entropy:[1.790905]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5972 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.21052136 0.14898525 0.09876116 0.16817462 0.22177202 0.18488607]] probs:[[0.17303674 0.16270973 0.1547396  0.16586217 0.1749945  0.16865727]] entropy:[1.7909361]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5973 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.20907609 0.14847596 0.1000023  0.16884218 0.22185774 0.18477495]] probs:[[0.17279264 0.16263235 0.15493698 0.16597852 0.17501539 0.16864419]] entropy:[1.7909575]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5974 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.204347   0.14739838 0.10213017 0.16917583 0.22208704 0.18543805]] probs:[[0.17205533 0.16253081 0.15533738 0.16610913 0.17513484 0.16883253]] entropy:[1.791001]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5975 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.20631664 0.14803015 0.10074797 0.16851994 0.22199792 0.18541424]] probs:[[0.17237753 0.16261744 0.15510748 0.16598381 0.17510192 0.16881181]] entropy:[1.7909777]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5976 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.2014751  0.14812075 0.09718039 0.16355534 0.2224132  0.18906176]] probs:[[0.17180312 0.16287693 0.1547877  0.16541035 0.17543828 0.16968367]] entropy:[1.7909453]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5977 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.20086579 0.14796962 0.09756467 0.16371027 0.22241566 0.18908426]] probs:[[0.1717053  0.1628588  0.15485336 0.16544257 0.17544569 0.16969423]] entropy:[1.7909521]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-1.363183] v_loss:[[5.8912563e-05]]
DEBUG:chainerrl.agents.a3c:grad norm:0.2005702595824351
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:5978 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.19364725 0.14910306 0.10663615 0.1605811  0.2296059  0.1851853 ]] probs:[[0.1703937  0.16297022 0.15619427 0.16485156 0.17663231 0.16895792]] entropy:[1.791023]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5979 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.193339   0.14884014 0.10792501 0.16165337 0.22953682 0.184651  ]] probs:[[0.17031045 0.16289797 0.15636748 0.16499865 0.17658825 0.1688372 ]] entropy:[1.79104]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5980 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.19425611 0.1488447  0.10863732 0.16263121 0.22947359 0.18391936]] probs:[[0.1704164  0.16285065 0.15643273 0.16511133 0.17652497 0.16866392]] entropy:[1.7910477]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5981 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.1949613  0.14784795 0.10871019 0.1635724  0.2297144  0.18367417]] probs:[[0.17051512 0.16266789 0.15642442 0.16524598 0.17654523 0.16860132]] entropy:[1.7910413]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5982 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.19407453 0.14859867 0.10926834 0.1631944  0.22949627 0.18375118]] probs:[[0.17036904 0.16279489 0.15651637 0.16518842 0.17651196 0.1686193 ]] entropy:[1.7910545]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5983 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.19426383 0.15005523 0.10255396 0.15714857 0.22966635 0.187072  ]] probs:[[0.17060305 0.1632252  0.15565307 0.16438714 0.17675102 0.1693805 ]] entropy:[1.7909687]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5984 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.20270567 0.15063043 0.0981862  0.1617426  0.23009284 0.18004525]] probs:[[0.17196196 0.16323619 0.15489599 0.1650602  0.17673661 0.16810906]] entropy:[1.7909026]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5985 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.19444637 0.14440224 0.08898459 0.17326726 0.2229874  0.1750667 ]] probs:[[0.17123583 0.16287738 0.15409665 0.16764733 0.17619349 0.16794927]] entropy:[1.7908924]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-1.4092695] v_loss:[[5.2508272e-05]]
DEBUG:chainerrl.agents.a3c:grad norm:0.05240423451772848
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:5986 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.18772484 0.13591321 0.10630066 0.20260063 0.2016288  0.18120012]] probs:[[0.16966997 0.16110295 0.15640222 0.17221281 0.17204553 0.16856652]] entropy:[1.7911278]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5987 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.18883806 0.13626659 0.10718235 0.20313399 0.20229661 0.18058273]] probs:[[0.16977634 0.1610815  0.15646403 0.17222087 0.17207672 0.16838054]] entropy:[1.7911298]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5988 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.1839378  0.13362175 0.11086901 0.21014148 0.20177197 0.17726961]] probs:[[0.16896562 0.16067427 0.15705977 0.17345166 0.17200601 0.16784267]] entropy:[1.7911272]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5989 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.17373058 0.12969387 0.11282737 0.21232913 0.20121    0.17676541]] probs:[[0.1675579  0.16033931 0.15765762 0.17415182 0.17222613 0.16806719]] entropy:[1.7911241]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5990 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.17490563 0.13197508 0.11710728 0.20648623 0.20165947 0.18138228]] probs:[[0.16757393 0.16053213 0.15816303 0.17295046 0.1721177  0.16866277]] entropy:[1.7912048]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5991 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.18588349 0.14074486 0.12766086 0.1948169  0.20457998 0.19015521]] probs:[[0.16859336 0.1611525  0.1590577  0.17010623 0.17177513 0.1693151 ]] entropy:[1.791346]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5992 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.18968675 0.14347939 0.13179736 0.19144128 0.20592825 0.19177659]] probs:[[0.16895257 0.16132334 0.15944971 0.16924927 0.17171901 0.16930602]] entropy:[1.7913822]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5993 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.18726115 0.14108802 0.12702067 0.19469179 0.20467295 0.1901689 ]] probs:[[0.16879492 0.16117832 0.15892684 0.17005384 0.17175968 0.16928644]] entropy:[1.7913404]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-1.4786963] v_loss:[[2.2839564e-05]]
DEBUG:chainerrl.agents.a3c:grad norm:0.20888210262929913
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:5994 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.19999495 0.1586876  0.13702407 0.17817622 0.21019019 0.19202457]] probs:[[0.17009027 0.16320743 0.1597098  0.16641931 0.17183325 0.16873997]] entropy:[1.7914488]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5995 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.2008278  0.15891096 0.13670748 0.17822725 0.21018752 0.19185624]] probs:[[0.17021373 0.16322638 0.15964212 0.16640995 0.17181437 0.16869348]] entropy:[1.7914449]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5996 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.20107606 0.15901874 0.13641073 0.17796886 0.21022424 0.19217625]] probs:[[0.17025091 0.1632391  0.15958999 0.16636199 0.17181554 0.16874243]] entropy:[1.7914414]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5997 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.20097607 0.15892853 0.13671583 0.17824721 0.21017954 0.19185871]] probs:[[0.17023355 0.16322404 0.15963838 0.16640797 0.17180751 0.16868852]] entropy:[1.7914443]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5998 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.20094305 0.15867452 0.13687201 0.17835405 0.21003066 0.19188818]] probs:[[0.17023219 0.16318668 0.1596673  0.16642992 0.17178623 0.16869771]] entropy:[1.7914457]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:5999 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.19969517 0.15639956 0.13989577 0.1808272  0.20855436 0.18954805]] probs:[[0.1700769  0.16287045 0.16020453 0.16689798 0.17159034 0.16835985]] entropy:[1.7914736]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:6000 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.19457161 0.1478775  0.15157521 0.18727057 0.1977022  0.17913844]] probs:[[0.16969746 0.16195573 0.16255571 0.16846299 0.17022954 0.1670986 ]] entropy:[1.7915657]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:6001 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.19013011 0.13220699 0.17361307 0.19345665 0.17739336 0.16467683]] probs:[[0.16969617 0.16014609 0.1669163  0.1702616  0.1675485  0.16543135]] entropy:[1.7915572]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-1.4116246] v_loss:[[1.6961267e-05]]
DEBUG:chainerrl.agents.a3c:grad norm:0.020721442046229405
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:6002 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.24802022 0.13788143 0.15488964 0.19320586 0.15569158 0.1569491 ]] probs:[[0.17926918 0.16057317 0.16332757 0.16970712 0.16345862 0.1636643 ]] entropy:[1.7910621]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:6003 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.24798112 0.1371652  0.15598267 0.19365709 0.15445352 0.15619428]] probs:[[0.17929669 0.1604891  0.1635377  0.1698164  0.1632878  0.1635723 ]] entropy:[1.791054]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:6004 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.24822718 0.137107   0.1560083  0.19355264 0.15424626 0.15650176]] probs:[[0.17933406 0.1604737  0.16353571 0.16979228 0.16324781 0.16361643]] entropy:[1.7910511]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:6005 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.2480378  0.13707624 0.15616623 0.19369306 0.15426457 0.1561396 ]] probs:[[0.17930825 0.16047607 0.16356897 0.16982384 0.16325822 0.16356462]] entropy:[1.7910523]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:6006 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.24802782 0.13707136 0.15618104 0.19372089 0.15426046 0.1561033 ]] probs:[[0.1793068  0.1604756  0.16357173 0.16982889 0.16325788 0.163559  ]] entropy:[1.7910523]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:6007 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.24827613 0.13711153 0.15606214 0.19355734 0.1542239  0.15651585]] probs:[[0.17933963 0.16047159 0.16354162 0.16979004 0.16324125 0.16361582]] entropy:[1.7910507]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:6008 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.24808545 0.13709354 0.15619929 0.19368964 0.15426533 0.15616736]] probs:[[0.17931306 0.1604755  0.16357099 0.16981973 0.16325496 0.16356577]] entropy:[1.7910522]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:6009 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.24807368 0.1370896  0.15621018 0.19371647 0.1542641  0.15613188]] probs:[[0.17931138 0.16047525 0.16357316 0.16982467 0.16325514 0.16356035]] entropy:[1.7910523]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-1.3858875] v_loss:[[2.598748e-05]]
DEBUG:chainerrl.agents.a3c:grad norm:0.1063232543743851
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:6010 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.24145938 0.14115354 0.14804494 0.18964152 0.1651899  0.16049461]] probs:[[0.17813654 0.16113533 0.16224961 0.16914096 0.16505535 0.16428219]] entropy:[1.7911785]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:6011 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.240923   0.14096785 0.14819047 0.19005708 0.16523683 0.15966949]] probs:[[0.17806938 0.16113107 0.16229908 0.16923822 0.16508938 0.16417284]] entropy:[1.7911814]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:6012 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.24134044 0.14110743 0.14808057 0.1898123  0.16517624 0.16024418]] probs:[[0.17812201 0.16113392 0.16226146 0.16917619 0.16505927 0.16424718]] entropy:[1.7911787]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:6013 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.24092187 0.1409636  0.148211   0.1900925  0.16523315 0.15962757]] probs:[[0.17806903 0.16113023 0.16230226 0.16924407 0.16508864 0.16416581]] entropy:[1.7911812]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:6014 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.24135904 0.1411131  0.14809585 0.1898179  0.16517653 0.16024676]] probs:[[0.17812388 0.16113351 0.1622626  0.16917576 0.16505797 0.16424628]] entropy:[1.7911785]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:6015 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.24094336 0.1409709  0.14822495 0.19009274 0.16523403 0.15963782]] probs:[[0.1780712  0.16112992 0.16230302 0.16924255 0.16508725 0.16416597]] entropy:[1.791181]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:6016 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.24088706 0.14094216 0.14824271 0.19018257 0.16522245 0.15950535]] probs:[[0.1780648  0.16112857 0.1623092  0.16926117 0.1650887  0.16414756]] entropy:[1.7911811]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:6017 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.24137573 0.14111707 0.14811613 0.18983373 0.16517545 0.16023646]] probs:[[0.17812549 0.16113292 0.16226465 0.16917713 0.16505653 0.16424333]] entropy:[1.7911785]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-1.3961152] v_loss:[[1.7180942e-05]]
DEBUG:chainerrl.agents.a3c:grad norm:0.13775338240047055
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:6018 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.23973058 0.10221096 0.09710073 0.25001362 0.12279866 0.22354071]] probs:[[0.17785636 0.15500489 0.1542148  0.1796947  0.15822916 0.17500009]] entropy:[1.7895783]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:6019 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.2395808  0.10200995 0.09705402 0.25083342 0.12257195 0.22272873]] probs:[[0.17784671 0.15498854 0.15422232 0.17985927 0.15820841 0.17487475]] entropy:[1.7895709]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:6020 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.23951143 0.1019588  0.09706238 0.25097415 0.12254397 0.22255945]] probs:[[0.17783931 0.15498489 0.15422788 0.17988954 0.15820836 0.17484999]] entropy:[1.7895705]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:6021 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.23932135 0.10192642 0.09714837 0.2508335  0.12268273 0.22254811]] probs:[[0.177811   0.15498467 0.15424591 0.17986982 0.15823519 0.1748534 ]] entropy:[1.7895763]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:6022 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.23949875 0.10194602 0.09707174 0.25102    0.12253455 0.22250791]] probs:[[0.17783794 0.15498371 0.15423012 0.17989871 0.15820767 0.17484185]] entropy:[1.7895702]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:6023 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.23950507 0.1019481  0.09707656 0.25102243 0.1225336  0.22250794]] probs:[[0.17783862 0.15498364 0.15423048 0.1798987  0.15820712 0.17484142]] entropy:[1.7895702]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:6024 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.23933503 0.10192991 0.09716133 0.25084415 0.12267961 0.22254245]] probs:[[0.17781247 0.15498438 0.15424708 0.17987075 0.15823387 0.17485146]] entropy:[1.7895763]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:6025 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.23951557 0.10195199 0.09708379 0.25102302 0.12253318 0.2225113 ]] probs:[[0.17783976 0.1549836  0.15423094 0.17989805 0.15820639 0.17484128]] entropy:[1.7895701]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-1.4334611] v_loss:[[7.7784435e-08]]
DEBUG:chainerrl.agents.a3c:grad norm:0.09489135044484574
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:6026 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.23073201 0.11080594 0.10001773 0.23670107 0.13871585 0.21544096]] probs:[[0.17644908 0.15650786 0.1548285  0.17750546 0.16093752 0.17377152]] entropy:[1.7901368]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:6027 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.23071654 0.11080331 0.10002124 0.23668335 0.13872926 0.21544376]] probs:[[0.17644691 0.15650795 0.15482953 0.17750289 0.16094019 0.17377254]] entropy:[1.7901375]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:6028 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.23192295 0.11229327 0.09889429 0.22861993 0.14105111 0.22345641]] probs:[[0.1765507  0.15664442 0.15455954 0.17596851 0.16121458 0.17506224]] entropy:[1.7901607]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:6029 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.23206642 0.11234711 0.09886699 0.22847891 0.14104508 0.22371551]] probs:[[0.17656736 0.15664516 0.15454774 0.17593506 0.16120568 0.175099  ]] entropy:[1.7901585]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:6030 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.23207322 0.11234967 0.09886985 0.2284794  0.14104457 0.22372022]] probs:[[0.17656805 0.1566451  0.15454772 0.17593463 0.16120514 0.17509931]] entropy:[1.7901584]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:6031 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.23222648 0.11243258 0.09869509 0.22830711 0.14078599 0.22379051]] probs:[[0.17660335 0.1566654  0.15452793 0.17591253 0.16117097 0.17511979]] entropy:[1.7901552]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:6032 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.23222816 0.11243136 0.09869396 0.22830772 0.14078401 0.22379148]] probs:[[0.17660367 0.15666524 0.15452778 0.17591266 0.16117068 0.17511998]] entropy:[1.790155]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:6033 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.23222864 0.11241545 0.09867617 0.22831367 0.14076747 0.22378875]] probs:[[0.17660503 0.15666386 0.15452614 0.17591497 0.16116917 0.17512077]] entropy:[1.7901547]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-1.4305699] v_loss:[[3.1606305e-05]]
DEBUG:chainerrl.agents.a3c:grad norm:0.11574280044094874
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:6034 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.2226309  0.12290716 0.10416996 0.1961737  0.15604685 0.22352025]] probs:[[0.17532362 0.15868321 0.15573761 0.17074586 0.16403003 0.17547962]] entropy:[1.7906783]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:6035 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.22239542 0.12197982 0.10329376 0.19652517 0.15531768 0.22317201]] probs:[[0.17536046 0.15860677 0.15567057 0.17088202 0.16398351 0.1754967 ]] entropy:[1.790663]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:6036 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.22174452 0.11636111 0.09736671 0.1987863  0.15032591 0.22105768]] probs:[[0.17572486 0.15814877 0.15517317 0.1717365  0.16361251 0.17560421]] entropy:[1.7905506]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:6037 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.22005628 0.09791936 0.07777335 0.2071129  0.13368025 0.21423669]] probs:[[0.17696016 0.15661454 0.15349096 0.17468445 0.16231658 0.17593332]] entropy:[1.7901032]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:6038 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.2198446  0.08410503 0.06298915 0.2181346  0.12092398 0.21032296]] probs:[[0.17785597 0.15528071 0.15203619 0.1775521  0.16110453 0.17617053]] entropy:[1.7896243]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:6039 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.2195609  0.08077985 0.05962707 0.2201807  0.11736004 0.20996788]] probs:[[0.17804557 0.15497418 0.15173048 0.17815596 0.16074812 0.17634574]] entropy:[1.7895001]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:6040 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.21958221 0.08032812 0.05906989 0.22055115 0.11677538 0.20989133]] probs:[[0.17808358 0.15493396 0.15167509 0.17825623 0.16068503 0.17636614]] entropy:[1.7894791]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:6041 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.21975765 0.08015859 0.0588499  0.22067456 0.11630511 0.20995682]] probs:[[0.17812738 0.1549186  0.15165241 0.17829077 0.1606208  0.1763901 ]] entropy:[1.7894669]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-1.4366454] v_loss:[[5.036988e-07]]
DEBUG:chainerrl.agents.a3c:grad norm:0.06800923517565594
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:6042 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.23637635 0.10481768 0.05452201 0.22810774 0.14350949 0.14049113]] probs:[[0.18109055 0.15876712 0.15097931 0.17959936 0.1650305  0.16453314]] entropy:[1.7896886]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:6043 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.23592532 0.10483256 0.05461225 0.22804846 0.14420277 0.14049365]] probs:[[0.18100192 0.15876338 0.15098713 0.17958179 0.16513859 0.16452722]] entropy:[1.789699]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:6044 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.23592907 0.10483431 0.05461375 0.22804894 0.14420389 0.1404961 ]] probs:[[0.18100224 0.15876335 0.15098706 0.17958154 0.16513847 0.1645273 ]] entropy:[1.7896988]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:6045 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.23593254 0.10483607 0.05461507 0.2280493  0.14420523 0.14049847]] probs:[[0.18100256 0.15876335 0.150987   0.17958128 0.1651384  0.1645274 ]] entropy:[1.7896988]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:6046 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.23638903 0.10482282 0.05452548 0.22810982 0.14351274 0.14049944]] probs:[[0.18109177 0.158767   0.15097894 0.17959866 0.16503006 0.16453351]] entropy:[1.7896886]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:6047 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.23639072 0.1048235  0.05452579 0.22811    0.14351338 0.14050035]] probs:[[0.18109195 0.15876698 0.1509789  0.17959857 0.16503005 0.16453356]] entropy:[1.7896887]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:6048 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.22984837 0.10502975 0.06099287 0.23968299 0.14533068 0.13587771]] probs:[[0.17964818 0.15856771 0.15173641 0.18142366 0.16508867 0.16353545]] entropy:[1.789705]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:6049 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.23204042 0.10364653 0.05666333 0.235579   0.14445311 0.13489304]] probs:[[0.18031816 0.15859106 0.15131228 0.18095736 0.16519648 0.16362472]] entropy:[1.7896558]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-1.4176165] v_loss:[[1.7689375e-05]]
DEBUG:chainerrl.agents.a3c:grad norm:0.09843664848094766
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:6050 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.21508506 0.11364354 0.08519081 0.25792977 0.14951374 0.14734216]] probs:[[0.17554544 0.15861127 0.15416196 0.18323009 0.16440396 0.16404733]] entropy:[1.7900149]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:6051 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.21515326 0.11397391 0.08443645 0.25677365 0.14924106 0.14800821]] probs:[[0.17559236 0.15869528 0.15407637 0.1830548  0.16439188 0.16418932]] entropy:[1.7900283]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:6052 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.21667528 0.11375734 0.08254039 0.25302044 0.14808793 0.14742872]] probs:[[0.17604098 0.15882434 0.15394293 0.1825569  0.16437155 0.16426323]] entropy:[1.7900467]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:6053 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.22741757 0.10826916 0.06190285 0.2292257  0.14490162 0.14358616]] probs:[[0.17929739 0.15915799 0.1519469  0.17962188 0.16509646 0.16487941]] entropy:[1.7899426]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:6054 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.2253641  0.10931833 0.06524895 0.23360531 0.1464178  0.14395401]] probs:[[0.17867798 0.15910104 0.1522418  0.18015659 0.16511445 0.16470815]] entropy:[1.7899687]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:6055 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.2268226  0.1061518  0.05302098 0.22717248 0.14428033 0.14166632]] probs:[[0.17965318 0.15923123 0.15099196 0.17971605 0.1654197  0.16498786]] entropy:[1.7898252]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:6056 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.21988963 0.11291074 0.07175011 0.24177912 0.14858045 0.144984  ]] probs:[[0.17724237 0.15926018 0.15283802 0.1811649  0.16504349 0.16445097]] entropy:[1.7900375]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:6057 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.21496959 0.12455922 0.08634305 0.23825197 0.15602358 0.150035  ]] probs:[[0.17555255 0.16037712 0.15436375 0.17968777 0.1655035  0.16451533]] entropy:[1.790427]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-1.9047713] v_loss:[[0.00215531]]
DEBUG:chainerrl.agents.a3c:grad norm:10.785225571166082
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:6058 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.18675658 0.1306641  0.05491576 0.27936792 0.11939789 0.1455113 ]] probs:[[0.17201576 0.16263258 0.15076846 0.18870735 0.16081062 0.16506524]] entropy:[1.7893329]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:6059 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.18743747 0.12631527 0.04747087 0.27884358 0.12028242 0.14606696]] probs:[[0.1724039  0.16218178 0.14988576 0.18890534 0.1612063  0.16541699]] entropy:[1.7892094]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:6060 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.18872893 0.12719087 0.04816346 0.27827394 0.11951479 0.14632343]] probs:[[0.1725784  0.16227841 0.14994763 0.18874493 0.16103752 0.16541311]] entropy:[1.7892268]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:6061 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.1813925  0.1251296  0.03881522 0.277794   0.11954502 0.14808945]] probs:[[0.1717952  0.16239639 0.14896715 0.18918107 0.161492   0.16616812]] entropy:[1.7891129]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:6062 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.19088852 0.14154439 0.04929958 0.27016184 0.11156888 0.15184653]] probs:[[0.17277762 0.16445899 0.14996716 0.18703179 0.15960239 0.16616201]] entropy:[1.7894189]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:6063 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.18784392 0.13679413 0.04771707 0.2741468  0.11432808 0.15026377]] probs:[[0.17235869 0.16378063 0.14982244 0.1878945  0.16014214 0.16600162]] entropy:[1.7893282]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:6064 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.19066879 0.1284166  0.05006453 0.2768638  0.11938762 0.14710517]] probs:[[0.17279904 0.1623699  0.15013352 0.18835421 0.16091047 0.1654329 ]] entropy:[1.789285]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:6065 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.1905657  0.13128772 0.05110292 0.27579105 0.11831091 0.14729176]] probs:[[0.17273617 0.16279429 0.1502503  0.1881032  0.16069539 0.16542062]] entropy:[1.7893325]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-1.4812822] v_loss:[[0.00013216]]
DEBUG:chainerrl.agents.a3c:grad norm:0.8847654648064369
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:6066 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.19753426 0.13249584 0.06925309 0.24994497 0.12297063 0.15501869]] probs:[[0.17370248 0.16276468 0.15278974 0.18304914 0.16122168 0.16647221]] entropy:[1.7901024]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:6067 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.19712222 0.1302618  0.06796189 0.24967404 0.12477716 0.15495059]] probs:[[0.17370065 0.16246668 0.15265386 0.18307304 0.16157804 0.16652772]] entropy:[1.7900926]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:6068 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.19672611 0.14005603 0.07981887 0.23854022 0.1333329  0.15838474]] probs:[[0.1730633  0.1635285  0.15396883 0.18045321 0.16243277 0.16655342]] entropy:[1.7905027]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:6069 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.1944963  0.1538855  0.09531686 0.23058961 0.14669949 0.16048855]] probs:[[0.17174968 0.1649145  0.1555331  0.17806192 0.16373368 0.16600706]] entropy:[1.7908847]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:6070 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.19683623 0.13894163 0.07881436 0.23957473 0.13262449 0.15806428]] probs:[[0.17313418 0.16339527 0.1538603  0.18069407 0.16236633 0.16654989]] entropy:[1.7904676]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:6071 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.1971128  0.12892273 0.06594138 0.2513308  0.12335841 0.15448767]] probs:[[0.17379107 0.16233528 0.15242648 0.18347378 0.16143449 0.16653886]] entropy:[1.7900227]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:6072 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.1972475  0.12634838 0.06223999 0.2546146  0.12057979 0.15355083]] probs:[[0.17398107 0.16207309 0.15200889 0.1842537  0.16114084 0.16654238]] entropy:[1.7898834]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:6073 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.19812319 0.12222409 0.0606234  0.25924307 0.12249709 0.1490897 ]] probs:[[0.17419228 0.1614605  0.15181458 0.18517098 0.1615046  0.16585705]] entropy:[1.7897537]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-1.6507878] v_loss:[[0.00098372]]
DEBUG:chainerrl.agents.a3c:grad norm:4.771793689695918
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:6074 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.19854558 0.1427299  0.08529125 0.2604804  0.09563825 0.12504017]] probs:[[0.17440222 0.16493453 0.15572785 0.18554531 0.15734753 0.16204254]] entropy:[1.7898452]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:6075 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.19856507 0.14273116 0.08527099 0.2604849  0.09562914 0.1250315 ]] probs:[[0.1744059  0.16493501 0.15572494 0.18554644 0.15734635 0.1620414 ]] entropy:[1.7898449]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:6076 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.19856384 0.14273088 0.08526994 0.26048467 0.09562702 0.12503344]] probs:[[0.17440577 0.16493502 0.15572485 0.18554647 0.15734608 0.16204177]] entropy:[1.7898448]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:6077 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.19856846 0.14274131 0.08527035 0.26048097 0.09562969 0.12504321]] probs:[[0.1744059  0.16493611 0.15572432 0.18554507 0.1573459  0.16204274]] entropy:[1.7898449]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:6078 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.19857216 0.14274798 0.08526871 0.2604777  0.09563208 0.1250473 ]] probs:[[0.1744062  0.16493687 0.15572375 0.18554409 0.15734595 0.16204306]] entropy:[1.789845]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:6079 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.19864173 0.14282142 0.08525839 0.26048395 0.09570052 0.12509686]] probs:[[0.17441091 0.16494198 0.15571551 0.18553737 0.15735003 0.1620442 ]] entropy:[1.7898451]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:6080 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.1986484  0.1428658  0.08526416 0.2604908  0.09575523 0.12515438]] probs:[[0.17440708 0.16494457 0.15571195 0.18553333 0.15735415 0.16204889]] entropy:[1.789846]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:6081 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.19857918 0.14283155 0.08530001 0.2605016  0.09573871 0.12515998]] probs:[[0.17439708 0.16494088 0.15571938 0.18553753 0.1573534  0.16205172]] entropy:[1.7898463]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-1.8937969] v_loss:[[0.00210671]]
DEBUG:chainerrl.agents.a3c:grad norm:5.4544620863454405
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:6082 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.2051183  0.1263237  0.06223587 0.2548742  0.10888954 0.15405132]] probs:[[0.17542101 0.16212931 0.15206474 0.18437003 0.15932722 0.16668767]] entropy:[1.7897395]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:6083 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.2050946  0.12616001 0.06203912 0.25499463 0.10895432 0.15385802]] probs:[[0.17542744 0.16211255 0.15204401 0.18440334 0.15934715 0.16666551]] entropy:[1.7897341]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:6084 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.20508578 0.12616938 0.06203162 0.25498152 0.10894307 0.15386711]] probs:[[0.17542657 0.16211471 0.15204345 0.18440165 0.15934598 0.16666767]] entropy:[1.7897344]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:6085 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.20538133 0.1269562  0.06507406 0.2556378  0.11038325 0.15431452]] probs:[[0.17529121 0.16206922 0.15234403 0.18432584 0.15940538 0.16656436]] entropy:[1.7897775]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:6086 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.20505966 0.12619871 0.06200609 0.2550162  0.10894714 0.15386528]] probs:[[0.17542145 0.16211897 0.15203911 0.18440749 0.15934615 0.16666687]] entropy:[1.7897338]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:6087 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.20515656 0.12619936 0.0620649  0.25491932 0.1088352  0.15392454]] probs:[[0.1754384  0.16211902 0.152048   0.18438958 0.15932827 0.1666767 ]] entropy:[1.7897347]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:6088 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.20654199 0.1267842  0.06317164 0.2533272  0.10682809 0.15527578]] probs:[[0.17566073 0.16219456 0.15219827 0.18407433 0.15898988 0.1668822 ]] entropy:[1.7897549]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:6089 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.20518672 0.12621616 0.06208529 0.2548472  0.10877457 0.15397087]] probs:[[0.17544444 0.16212243 0.15205176 0.18437706 0.15931928 0.16668512]] entropy:[1.7897358]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-1.094245] v_loss:[[0.00171549]]
DEBUG:chainerrl.agents.a3c:grad norm:16.03778427020193
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:6090 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.22554547 0.11283056 0.1136236  0.2112912  0.09262539 0.16157927]] probs:[[0.17898892 0.15990965 0.16003653 0.17645566 0.15671109 0.16789818]] entropy:[1.7904522]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:6091 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.23470974 0.11816384 0.12141075 0.19915986 0.08244449 0.17053568]] probs:[[0.18035729 0.16051605 0.16103807 0.17405824 0.15488371 0.1691466 ]] entropy:[1.7903943]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:6092 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.22113022 0.11034866 0.10890993 0.21653739 0.09760979 0.15734865]] probs:[[0.17836718 0.15966257 0.15943304 0.17754985 0.15764156 0.16734585]] entropy:[1.7904515]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:6093 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.22100303 0.11029717 0.10881593 0.21668898 0.09780529 0.15722212]] probs:[[0.17834614 0.15965584 0.15941952 0.1775784  0.15767384 0.16732624]] entropy:[1.7904519]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:6094 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.22064105 0.1101475  0.10856386 0.21710803 0.09833688 0.15687998]] probs:[[0.17828652 0.15963635 0.15938374 0.17765775 0.15776204 0.16727361]] entropy:[1.7904536]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:6095 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.22064506 0.11014988 0.10855928 0.2171084  0.09833945 0.15687665]] probs:[[0.17828718 0.15963668 0.15938295 0.17765775 0.1577624  0.16727301]] entropy:[1.7904536]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:6096 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.22064456 0.110149   0.10855913 0.2171094  0.09834036 0.15687586]] probs:[[0.17828712 0.15963656 0.15938295 0.17765795 0.15776256 0.1672729 ]] entropy:[1.7904534]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:6097 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.22089446 0.11025841 0.1087435  0.21681218 0.09797381 0.15712026]] probs:[[0.17832781 0.15965056 0.15940888 0.17760131 0.15770131 0.16731015]] entropy:[1.7904528]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-1.2933826] v_loss:[[0.00054395]]
DEBUG:chainerrl.agents.a3c:grad norm:2.1370094926332786
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:6098 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.204461   0.13064729 0.10366854 0.21438658 0.09980534 0.15812556]] probs:[[0.17549103 0.16300392 0.15866506 0.17724155 0.1580533  0.16754508]] entropy:[1.7907366]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:6099 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.20446102 0.13064876 0.10366862 0.2143856  0.09980734 0.15812676]] probs:[[0.17549095 0.16300407 0.15866499 0.17724128 0.15805353 0.1675452 ]] entropy:[1.7907368]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:6100 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.20439632 0.13062125 0.10370225 0.2144027  0.09980997 0.15814401]] probs:[[0.17548032 0.16300026 0.15867099 0.17724505 0.1580546  0.16754879]] entropy:[1.7907372]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:6101 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.20441292 0.13062827 0.10369364 0.21439818 0.09980847 0.15813963]] probs:[[0.17548306 0.16300125 0.15866947 0.17724408 0.1580542  0.1675479 ]] entropy:[1.790737]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:6102 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.20508844 0.13025352 0.10476646 0.2161592  0.09983595 0.15882404]] probs:[[0.17548597 0.16283284 0.15873514 0.17743953 0.15795442 0.16755214]] entropy:[1.7907189]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:6103 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.20409472 0.13021158 0.10447746 0.21531802 0.09984809 0.15862386]] probs:[[0.17538318 0.16289243 0.158754   0.17736264 0.15802078 0.16758694]] entropy:[1.7907346]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:6104 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.20507745 0.13034298 0.10456714 0.21588397 0.09982745 0.1586996 ]] probs:[[0.17549984 0.16286206 0.1587178  0.17740667 0.15796731 0.16754639]] entropy:[1.7907208]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:6105 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.20453896 0.1305379  0.1039355  0.2147839  0.09981282 0.15829514]] probs:[[0.17548047 0.16296355 0.1586855  0.17728749 0.15803264 0.16755036]] entropy:[1.7907335]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-1.373323] v_loss:[[4.3277352e-05]]
DEBUG:chainerrl.agents.a3c:grad norm:0.1598368218587329
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:6106 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.19082585 0.12781925 0.11465811 0.20447993 0.10970811 0.16454582]] probs:[[0.1731454  0.16257267 0.16044705 0.17552575 0.1596548  0.1686544 ]] entropy:[1.791074]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:6107 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.19106668 0.12779078 0.11482238 0.20480815 0.10969456 0.16464621]] probs:[[0.17316356 0.16254595 0.1604516  0.17555952 0.15963094 0.16864842]] entropy:[1.7910702]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:6108 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.19159865 0.12756212 0.11552473 0.2060025  0.10971191 0.16509454]] probs:[[0.17317677 0.16243473 0.16049117 0.17568922 0.15956096 0.16864716]] entropy:[1.7910585]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:6109 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.19154309 0.12760255 0.11541595 0.20582959 0.10970905 0.16502571]] probs:[[0.17317805 0.16245154 0.16048382 0.17566991 0.15957056 0.16864617]] entropy:[1.7910602]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:6110 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.19492269 0.1259381  0.12048924 0.21416995 0.10994703 0.1681266 ]] probs:[[0.17321602 0.16166963 0.16079111 0.17658225 0.15910491 0.16863614]] entropy:[1.7909769]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:6111 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.19086233 0.12793489 0.11441475 0.2041673  0.1097201  0.16440172]] probs:[[0.17316748 0.16260628 0.16042262 0.17548688 0.15967125 0.16864546]] entropy:[1.791076]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:6112 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.19086212 0.1279338  0.11441784 0.20416915 0.10972246 0.16440289]] probs:[[0.17316724 0.16260591 0.16042292 0.17548698 0.15967144 0.16864546]] entropy:[1.791076]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:6113 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.19086316 0.12793577 0.11441439 0.20416704 0.10972015 0.16440155]] probs:[[0.17316762 0.16260642 0.16042255 0.17548682 0.15967125 0.16864543]] entropy:[1.7910762]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-1.2466925] v_loss:[[0.0004455]]
DEBUG:chainerrl.agents.a3c:grad norm:0.6238553664491447
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:6114 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.19130285 0.12004004 0.11108968 0.19880518 0.11345208 0.17780158]] probs:[[0.17320938 0.16129553 0.15985833 0.17451374 0.16023642 0.17088655]] entropy:[1.7910438]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:6115 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.19132692 0.12002844 0.11106351 0.19881222 0.11341777 0.17779326]] probs:[[0.17321485 0.16129489 0.15985535 0.17451629 0.16023214 0.17088643]] entropy:[1.791043]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:6116 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.19068643 0.11975235 0.11138452 0.19895223 0.11344669 0.17795709]] probs:[[0.17311211 0.16125795 0.15991421 0.17454894 0.16024433 0.17092247]] entropy:[1.7910461]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:6117 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.19127487 0.11999874 0.1110831  0.19881773 0.1134179  0.1778021 ]] probs:[[0.17320727 0.16129144 0.1598598  0.17451869 0.16023348 0.17088933]] entropy:[1.7910433]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:6118 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.191325   0.1200293  0.11106698 0.19881451 0.11342103 0.17779306]] probs:[[0.17321432 0.16129485 0.15985571 0.17451648 0.16023247 0.17088619]] entropy:[1.791043]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:6119 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.19132486 0.12002788 0.1110632  0.19881135 0.11341748 0.17779289]] probs:[[0.17321464 0.16129494 0.15985544 0.17451628 0.16023223 0.1708865 ]] entropy:[1.7910432]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:6120 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.19132674 0.12002751 0.1110611  0.19881105 0.11341564 0.17779231]] probs:[[0.17321505 0.16129494 0.15985517 0.1745163  0.16023201 0.17088647]] entropy:[1.7910429]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:6121 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.19141525 0.11998171 0.11120135 0.19903967 0.11340248 0.1778799 ]] probs:[[0.173216   0.16127416 0.1598643  0.17454171 0.16021658 0.17088726]] entropy:[1.791041]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-1.7298567] v_loss:[[0.00101951]]
DEBUG:chainerrl.agents.a3c:grad norm:2.2079467071857137
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:6122 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.20802414 0.1400721  0.11662372 0.19123226 0.12296046 0.14832473]] probs:[[0.17572199 0.16417798 0.16037306 0.17279592 0.16139254 0.16553849]] entropy:[1.7911792]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:6123 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.20571935 0.14132628 0.11312238 0.18550067 0.12293288 0.14613786]] probs:[[0.17568864 0.16473205 0.16015087 0.17217211 0.16172977 0.16552658]] entropy:[1.7912116]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:6124 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.20935085 0.13942218 0.11870191 0.19467422 0.12310863 0.14960602]] probs:[[0.17572822 0.16385959 0.16049932 0.17316796 0.16120815 0.16553684]] entropy:[1.7911588]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:6125 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.20670702 0.14080772 0.11462963 0.18798368 0.12287772 0.14709368]] probs:[[0.17570285 0.16449742 0.16024707 0.17244372 0.16157427 0.16553472]] entropy:[1.7911979]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:6126 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.20569523 0.14133887 0.11308363 0.18543874 0.12293494 0.14611363]] probs:[[0.17568839 0.16473787 0.16014831 0.17216535 0.16173378 0.16552633]] entropy:[1.7912121]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:6127 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.20624882 0.14104079 0.11396648 0.18687487 0.12287427 0.14666197]] probs:[[0.17569464 0.16460349 0.16020675 0.1723235  0.16164021 0.16553135]] entropy:[1.7912042]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:6128 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.2085501  0.13981263 0.11740146 0.19254886 0.12300283 0.14881851]] probs:[[0.17572817 0.16405486 0.16041909 0.17293869 0.16132018 0.165539  ]] entropy:[1.7911713]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:6129 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.20988429 0.13906744 0.11964078 0.19622467 0.12320581 0.15012118]] probs:[[0.17572425 0.16371043 0.16056079 0.17334026 0.1611342  0.16553009]] entropy:[1.7911495]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-1.3655083] v_loss:[[5.623433e-05]]
DEBUG:chainerrl.agents.a3c:grad norm:0.15252456668352551
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:6130 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.20098302 0.13955893 0.12110889 0.17674415 0.13044626 0.14602374]] probs:[[0.17488244 0.16446371 0.16145717 0.17069446 0.16297181 0.16553037]] entropy:[1.7913699]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:6131 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.19894478 0.13876446 0.12053946 0.17646612 0.13072814 0.14405118]] probs:[[0.17468469 0.16448218 0.16151164 0.17080182 0.16316564 0.16535404]] entropy:[1.7913816]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:6132 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.19519266 0.13571109 0.11836713 0.17688642 0.12965156 0.13969477]] probs:[[0.17443669 0.16436347 0.16153733 0.17127246 0.16337052 0.16501954]] entropy:[1.7913814]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:6133 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.19396475 0.13478778 0.11788925 0.17670004 0.12955336 0.13843797]] probs:[[0.17434435 0.1643265  0.16157296 0.17136016 0.1634686  0.16492742]] entropy:[1.7913846]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:6134 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.19063929 0.13263828 0.1163649  0.17611988 0.1290141  0.13565753]] probs:[[0.1740829  0.16427316 0.16162151 0.17157358 0.16367888 0.16476989]] entropy:[1.791393]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:6135 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.18919563 0.13181011 0.11585661 0.17590624 0.12884395 0.1346518 ]] probs:[[0.17395344 0.16425206 0.16165246 0.17165701 0.16376558 0.16471948]] entropy:[1.7913979]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:6136 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.18814398 0.13119413 0.11548188 0.17576337 0.1286915  0.13393198]] probs:[[0.17385973 0.1642351  0.16167477 0.1717205  0.16382459 0.16468537]] entropy:[1.791401]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:6137 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.18758555 0.13088869 0.11529609 0.17568697 0.12875128 0.13355032]] probs:[[0.17380495 0.16422491 0.16168408 0.17174917 0.16387427 0.1646626 ]] entropy:[1.7914033]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-1.3810182] v_loss:[[0.00017472]]
DEBUG:chainerrl.agents.a3c:grad norm:0.29038381213194187
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:6138 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.16865675 0.11687835 0.10659593 0.1835659  0.1314673  0.12995148]] probs:[[0.17152913 0.16287366 0.1612075  0.17410564 0.16526723 0.16501689]] entropy:[1.7913777]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:6139 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.16863833 0.11687803 0.10657401 0.18353727 0.13146593 0.12993869]] probs:[[0.17152838 0.16287588 0.16120623 0.17410311 0.16526932 0.1650171 ]] entropy:[1.7913778]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:6140 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.16855974 0.11690835 0.10648131 0.1833782  0.13146769 0.12987246]] probs:[[0.17152551 0.1628909  0.16120125 0.17408618 0.16527984 0.16501637]] entropy:[1.791379]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:6141 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.16851586 0.1169296  0.10643321 0.18328913 0.13147356 0.12983719]] probs:[[0.1715235  0.16289958 0.16119868 0.17407627 0.16528611 0.16501586]] entropy:[1.7913795]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:6142 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.16861445 0.11717211 0.10623133 0.18314974 0.13124159 0.1299741 ]] probs:[[0.17154317 0.16294171 0.16116872 0.1740548  0.16525042 0.1650411 ]] entropy:[1.7913797]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:6143 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.16852562 0.11702505 0.10632545 0.18316996 0.1313853  0.12985879]] probs:[[0.17153065 0.16292034 0.16118646 0.17406109 0.1652768  0.1650247 ]] entropy:[1.7913799]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:6144 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.16835949 0.1171907  0.10671098 0.18311001 0.13197805 0.12981673]] probs:[[0.17147791 0.1629243  0.16122583 0.17402606 0.16535144 0.16499443]] entropy:[1.7913847]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:6145 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.16872403 0.1173871  0.10606699 0.18310533 0.13102777 0.13012978]] probs:[[0.17156024 0.16297513 0.16114064 0.17404534 0.16521345 0.16506515]] entropy:[1.7913795]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-1.3078785] v_loss:[[0.00019392]]
DEBUG:chainerrl.agents.a3c:grad norm:0.6373357227809731
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:6146 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.15828688 0.12000586 0.10677734 0.17255653 0.1490122  0.13190107]] probs:[[0.16974077 0.16336572 0.16121887 0.17218028 0.16817376 0.1653206 ]] entropy:[1.791506]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:6147 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.1577619  0.11896482 0.10746633 0.17269436 0.149944   0.13111305]] probs:[[0.16966826 0.16321169 0.16134575 0.17222083 0.16834699 0.1652065 ]] entropy:[1.7915043]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:6148 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.15769097 0.11881331 0.10756087 0.17269948 0.15006872 0.1309946 ]] probs:[[0.16965948 0.1631901  0.1613641  0.17222501 0.1683712  0.16519012]] entropy:[1.7915041]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:6149 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.15818548 0.11985222 0.10692064 0.1725912  0.14921443 0.13177758]] probs:[[0.1697235  0.16334055 0.1612419  0.17218617 0.1682077  0.1653001 ]] entropy:[1.7915058]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:6150 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.1603649  0.12435069 0.1039924  0.17196915 0.14520878 0.1350876 ]] probs:[[0.17002513 0.16401076 0.16070554 0.17200963 0.16746764 0.16578121]] entropy:[1.7915075]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:6151 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.15853746 0.12053058 0.10645978 0.1724916  0.14858948 0.13228245]] probs:[[0.16977347 0.163442   0.16115835 0.17215914 0.16809295 0.16537409]] entropy:[1.7915065]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:6152 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.15971981 0.12280454 0.10491367 0.17214791 0.14649019 0.13397297]] probs:[[0.1699417  0.16378263 0.16087846 0.17206693 0.16770823 0.16562209]] entropy:[1.7915076]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:6153 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.16705829 0.12596345 0.10309971 0.17225133 0.13777119 0.13906725]] probs:[[0.17104182 0.16415535 0.16044472 0.17193235 0.16610515 0.16632058]] entropy:[1.7914823]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-1.441994] v_loss:[[2.0578757e-06]]
DEBUG:chainerrl.agents.a3c:grad norm:0.0133850160764576
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:6154 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.1376862  0.13185796 0.09947273 0.17708784 0.14197491 0.1433619 ]] probs:[[0.16647594 0.16550851 0.16023433 0.1731663  0.16719143 0.1674235 ]] entropy:[1.791502]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:6155 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.13802394 0.12920642 0.10218646 0.17974687 0.1439219  0.14250727]] probs:[[0.16641614 0.16495521 0.1605578  0.1735064  0.16740055 0.16716391]] entropy:[1.7914964]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:6156 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.13572639 0.13006234 0.10118715 0.17845567 0.14531396 0.14178576]] probs:[[0.16611934 0.16518109 0.16047966 0.17337133 0.16771968 0.16712897]] entropy:[1.7914987]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:6157 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.13707566 0.13125263 0.10135958 0.18012749 0.144306   0.1434228 ]] probs:[[0.16620341 0.16523843 0.16037203 0.17351504 0.16740948 0.16726169]] entropy:[1.7914908]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:6158 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.13708816 0.13206932 0.09972253 0.17778288 0.14290006 0.14339592]] probs:[[0.16633376 0.16550106 0.16023329 0.17324229 0.1673033  0.16738628]] entropy:[1.7914984]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:6159 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.13887882 0.13195162 0.10085215 0.17858644 0.14219515 0.14459763]] probs:[[0.16651826 0.16536874 0.16030501 0.17326333 0.1670714  0.16747327]] entropy:[1.7915001]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:6160 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.13553713 0.13222194 0.10077345 0.17816633 0.14567454 0.14283817]] probs:[[0.16601393 0.16546446 0.16034181 0.17324397 0.16770543 0.16723043]] entropy:[1.7915001]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:6161 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.1395796  0.1322441  0.0972176  0.16958012 0.14377238 0.14309266]] probs:[[0.16696191 0.16574165 0.16003679 0.17204675 0.16766341 0.16754949]] entropy:[1.7915317]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-1.3570583] v_loss:[[9.347958e-05]]
DEBUG:chainerrl.agents.a3c:grad norm:0.1542231746038361
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:6162 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.1372311  0.13959798 0.08979352 0.17293756 0.15342788 0.13911772]] probs:[[0.16637251 0.16676675 0.15866446 0.17242041 0.16908914 0.16668668]] entropy:[1.7914481]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:6163 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.13369134 0.13157618 0.09374741 0.17375718 0.15853465 0.13317403]] probs:[[0.16599585 0.16564511 0.159496   0.17278163 0.17017138 0.16591   ]] entropy:[1.7914493]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:6164 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.13381308 0.13114646 0.09331176 0.17430605 0.15764439 0.13316643]] probs:[[0.16604568 0.16560349 0.15945497 0.17290734 0.1700503  0.16593833]] entropy:[1.7914455]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:6165 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.13379967 0.13115267 0.09330038 0.17429137 0.15763786 0.13316096]] probs:[[0.1660447  0.16560575 0.15945435 0.1729061  0.17005046 0.16593868]] entropy:[1.7914454]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:6166 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.13382387 0.13114038 0.09324253 0.17421766 0.15761742 0.1331444 ]] probs:[[0.16605307 0.16560806 0.15944931 0.17289789 0.17005144 0.16594027]] entropy:[1.7914457]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:6167 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.133807   0.13118505 0.09331005 0.17417426 0.15772666 0.13314681]] probs:[[0.1660458  0.16561101 0.15945579 0.17288575 0.17006545 0.16593623]] entropy:[1.7914461]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:6168 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.13372941 0.13132527 0.09353697 0.1741499  0.15801105 0.13317917]] probs:[[0.16601697 0.16561833 0.15947667 0.17286493 0.17009747 0.16592565]] entropy:[1.7914469]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:6169 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.13367192 0.13140899 0.09368847 0.17420016 0.15815699 0.13320817]] probs:[[0.16599631 0.16562112 0.15949015 0.17286205 0.17011093 0.16591936]] entropy:[1.7914473]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-1.357199] v_loss:[[7.736824e-05]]
DEBUG:chainerrl.agents.a3c:grad norm:0.14530747281768658
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:6170 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.13715881 0.12777266 0.093123   0.17198521 0.1651395  0.1291245 ]] probs:[[0.16657251 0.16501635 0.15939653 0.17247584 0.17129914 0.16523959]] entropy:[1.7914203]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:6171 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.1370936  0.12788136 0.09330286 0.17194441 0.16538112 0.12914087]] probs:[[0.16654952 0.16502227 0.15941358 0.17245623 0.17132807 0.16523026]] entropy:[1.7914208]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:6172 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.13706514 0.12798706 0.09342591 0.1719509  0.16550188 0.12919632]] probs:[[0.16653425 0.16502929 0.15942311 0.17244646 0.17133792 0.16522896]] entropy:[1.7914215]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:6173 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.14680189 0.12889123 0.1061057  0.16348049 0.1693701  0.1367299 ]] probs:[[0.16744833 0.16447592 0.16077061 0.17026454 0.1712703  0.16577025]] entropy:[1.7915336]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:6174 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.1414413  0.1269827  0.09922194 0.16993001 0.16698623 0.13308828]] probs:[[0.16692376 0.16452764 0.16002305 0.1717476  0.17124274 0.16553526]] entropy:[1.7914684]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:6175 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.14409404 0.12816389 0.10273222 0.16507167 0.16844273 0.13503696]] probs:[[0.16720986 0.16456729 0.16043484 0.17075458 0.17133117 0.16570227]] entropy:[1.7915102]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:6176 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.14193638 0.12739117 0.10001683 0.16768277 0.16744076 0.13355252]] probs:[[0.16699868 0.16458724 0.16014288 0.17135413 0.17131266 0.16560446]] entropy:[1.7914839]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:6177 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.13782895 0.12757522 0.09416182 0.17140988 0.16546312 0.12982038]] probs:[[0.16663152 0.16493165 0.15951179 0.17232218 0.17130047 0.16530238]] entropy:[1.7914304]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-1.31545] v_loss:[[0.00014773]]
DEBUG:chainerrl.agents.a3c:grad norm:1.9655247906368383
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:6178 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.13492875 0.12742534 0.0908269  0.1803088  0.17033838 0.12508294]] probs:[[0.16605633 0.16481501 0.15889208 0.17376557 0.17204165 0.1644294 ]] entropy:[1.7913139]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:6179 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.13556118 0.12720892 0.09143542 0.18144275 0.17026015 0.12547696]] probs:[[0.16609251 0.16471104 0.1589229  0.17389062 0.1719569  0.16442601]] entropy:[1.7913117]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:6180 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.13999797 0.12566686 0.09642595 0.19183014 0.17037146 0.12884495]] probs:[[0.16622114 0.163856   0.15913408 0.17506394 0.1713473  0.16437759]] entropy:[1.7912734]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:6181 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.14175238 0.12507783 0.09776638 0.19503547 0.17024586 0.1297327 ]] probs:[[0.16633084 0.16358033 0.15917318 0.17543381 0.17113835 0.16434355]] entropy:[1.7912569]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:6182 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.1452832  0.12381131 0.10158828 0.20313653 0.1706078  0.13241324]] probs:[[0.16643302 0.16289748 0.15931733 0.1763457  0.1707017  0.16430476]] entropy:[1.7912108]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:6183 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.14590228 0.12106761 0.06219779 0.14700998 0.16569635 0.11771615]] probs:[[0.16982308 0.1656575  0.15618679 0.1700113  0.17321806 0.16510324]] entropy:[1.7912223]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:6184 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.14633156 0.12033825 0.06058124 0.15356691 0.16956414 0.11287027]] probs:[[0.16977862 0.16542237 0.15582678 0.17101148 0.17376919 0.1641916 ]] entropy:[1.7911419]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:6185 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.14593764 0.12063812 0.06107085 0.15547939 0.17230545 0.1117975 ]] probs:[[0.16959517 0.1653583  0.15579599 0.17122114 0.17412649 0.16390288]] entropy:[1.791117]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-1.2407384] v_loss:[[0.00040887]]
DEBUG:chainerrl.agents.a3c:grad norm:0.9901399946124394
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:6186 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.14101003 0.11715699 0.06370926 0.17238148 0.18240196 0.10573579]] probs:[[0.16830738 0.16434024 0.15578721 0.17367111 0.17542012 0.16247395]] entropy:[1.7909477]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:6187 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.14009462 0.11797348 0.06395124 0.17121492 0.18429625 0.10581763]] probs:[[0.16812623 0.16444792 0.15579978 0.17344064 0.17572437 0.16246101]] entropy:[1.7909447]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:6188 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.1400462  0.11788039 0.06363182 0.17104326 0.18394099 0.10567521]] probs:[[0.16814978 0.16446362 0.15577939 0.17344356 0.17569508 0.16246851]] entropy:[1.7909452]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:6189 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.1425762  0.11651123 0.0663676  0.17553858 0.18317457 0.10675367]] probs:[[0.16833171 0.16400084 0.15598    0.17397279 0.17530632 0.16240837]] entropy:[1.790947]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:6190 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.14135477 0.1169711  0.06475103 0.17344709 0.18280177 0.1060622 ]] probs:[[0.16828156 0.16422786 0.15587194 0.1737697  0.1754029  0.16244605]] entropy:[1.790948]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:6191 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.13744996 0.11858879 0.05881618 0.1661744  0.1811711  0.10414688]] probs:[[0.16815847 0.16501653 0.15544206 0.17305876 0.17567363 0.1626505 ]] entropy:[1.790949]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:6192 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.13777758 0.11854003 0.06141032 0.16859326 0.18178475 0.10486691]] probs:[[0.1680296  0.16482803 0.15567538 0.17328817 0.17558923 0.16258965]] entropy:[1.7909583]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:6193 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.13695349 0.12028326 0.06142525 0.164977   0.18492691 0.10453754]] probs:[[0.16788699 0.16511148 0.1556738  0.17265832 0.17613742 0.16253203]] entropy:[1.7909547]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-1.3677292] v_loss:[[0.00012326]]
DEBUG:chainerrl.agents.a3c:grad norm:0.3203542746728192
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:6194 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.13242276 0.13586761 0.06893078 0.16282946 0.183998   0.10791747]] probs:[[0.16662426 0.16719924 0.15637381 0.17176856 0.17544341 0.16259071]] entropy:[1.7910793]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:6195 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.13925067 0.15017685 0.08495811 0.14834711 0.20795363 0.11042892]] probs:[[0.16638973 0.1682177  0.15759687 0.1679102  0.17822304 0.16166255]] entropy:[1.7910286]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:6196 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.14447892 0.1621384  0.09800332 0.14469428 0.2226745  0.11396536]] probs:[[0.16600539 0.168963   0.15846673 0.16604115 0.1795073  0.16101648]] entropy:[1.790957]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:6197 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.14540216 0.16513385 0.10156004 0.14398852 0.22582038 0.11511984]] probs:[[0.16585034 0.16915534 0.15873621 0.16561607 0.17973869 0.16090329]] entropy:[1.7909435]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:6198 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.145735   0.16549817 0.10183624 0.14371508 0.22623356 0.11519035]] probs:[[0.1658722  0.16918297 0.15874813 0.16553749 0.17977682 0.1608823 ]] entropy:[1.7909395]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:6199 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.14581573 0.16538525 0.10162304 0.14401037 0.22570167 0.11521109]] probs:[[0.16589938 0.16917792 0.15872747 0.16560014 0.17969614 0.16089898]] entropy:[1.7909458]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:6200 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.14581752 0.16539305 0.10163247 0.14400846 0.22570884 0.1152129 ]] probs:[[0.16589893 0.16917849 0.15872827 0.16559908 0.17969665 0.16089857]] entropy:[1.7909458]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:6201 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.14581776 0.16539413 0.10163367 0.14400817 0.2257097  0.11521319]] probs:[[0.16589887 0.16917858 0.15872836 0.16559894 0.1796967  0.16089852]] entropy:[1.7909458]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-1.3926867] v_loss:[[1.9939243e-05]]
DEBUG:chainerrl.agents.a3c:grad norm:0.057958133186107846
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:6202 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.16527161 0.1742344  0.08251973 0.15379049 0.19671507 0.1334448 ]] probs:[[0.16895361 0.17047472 0.15553524 0.16702493 0.1743505  0.16366103]] entropy:[1.7911184]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:6203 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.13864093 0.1774783  0.09997791 0.17818235 0.1951882  0.13675578]] probs:[[0.16398004 0.17047387 0.15776107 0.17059395 0.17351985 0.16367121]] entropy:[1.7912401]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:6204 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.13668528 0.17727588 0.10055369 0.18095459 0.19390658 0.13675494]] probs:[[0.16366176 0.17044157 0.15785396 0.17106973 0.17329983 0.16367318]] entropy:[1.7912375]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:6205 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.13664445 0.17726053 0.10055278 0.18102004 0.19384234 0.13676529]] probs:[[0.16365634 0.17044024 0.15785502 0.17108223 0.17329001 0.16367611]] entropy:[1.7912376]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:6206 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.13672084 0.17724188 0.10048487 0.18094043 0.19381787 0.13676243]] probs:[[0.16367209 0.17044045 0.15784743 0.171072   0.17328922 0.16367888]] entropy:[1.791238]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:6207 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.13641393 0.17728297 0.10073693 0.18127514 0.19387701 0.13677274]] probs:[[0.1636111  0.17043623 0.15787683 0.171118   0.17328806 0.1636698 ]] entropy:[1.7912371]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:6208 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.14235653 0.17627795 0.09607526 0.17531954 0.19268242 0.13636017]] probs:[[0.16478743 0.17047314 0.15733466 0.17030984 0.17329273 0.16380227]] entropy:[1.7912451]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:6209 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.13713145 0.17722107 0.10014966 0.18048936 0.19373515 0.1367598 ]] probs:[[0.16375257 0.17045073 0.15780732 0.17100872 0.17328893 0.16369173]] entropy:[1.7912388]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-1.3966922] v_loss:[[1.64288e-05]]
DEBUG:chainerrl.agents.a3c:grad norm:0.031168757619872534
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:6210 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.1593416  0.1755649  0.00284572 0.23894079 0.19242755 0.15519987]] probs:[[0.16711648 0.16984977 0.14290716 0.18096258 0.17273818 0.16642576]] entropy:[1.7892425]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:6211 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.15978085 0.17548124 0.00248725 0.23848802 0.19234353 0.155166  ]] probs:[[0.16720565 0.16985156 0.1428694  0.1808977  0.17273992 0.1664358 ]] entropy:[1.7892417]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:6212 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[1.6262227e-01 1.7502156e-01 1.8892856e-04 2.3560715e-01 1.9180563e-01
  1.5494575e-01]] probs:[[0.16777878 0.16987206 0.14262417 0.18048202 0.17274727 0.16649574]] entropy:[1.789234]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:6213 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.16481678  0.17215493 -0.00238819  0.23397863  0.18959594  0.15141448]] probs:[[0.16844158 0.16968217 0.14250597 0.1805036  0.17266755 0.16619913]] entropy:[1.7892137]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:6214 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.16178791  0.17079188 -0.00047031  0.23752768  0.1888134   0.14888866]] probs:[[0.16799593 0.16951539 0.14283375 0.18121415 0.17259799 0.16584282]] entropy:[1.7892148]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:6215 r:0.1 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.15972976 0.17435539 0.00212526 0.23873238 0.1914601  0.15352933]] probs:[[0.16730282 0.16976771 0.14290798 0.1810563  0.17269652 0.16626868]] entropy:[1.789237]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:6216 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.1590708  0.1754231  0.00300132 0.23925242 0.1923391  0.1549585 ]] probs:[[0.16707894 0.16983353 0.14293599 0.18102732 0.17273086 0.16639328]] entropy:[1.7892423]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:6217 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.15911461 0.17556873 0.00302079 0.23918125 0.19245137 0.15517665]] probs:[[0.1670733  0.16984507 0.14292769 0.18100041 0.17273687 0.16641667]] entropy:[1.789243]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-0.4674567] v_loss:[[0.01428199]]
DEBUG:chainerrl.agents.a3c:grad norm:34.87707625271261
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:6218 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.26200727  0.23542075 -0.04371302  0.14506276  0.20051506  0.10505495]] probs:[[0.18535352 0.18049054 0.13653004 0.16489689 0.17429908 0.15842995]] entropy:[1.7868968]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:6219 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.26200777  0.23542261 -0.04371187  0.1450622   0.20051683  0.10505918]] probs:[[0.18535332 0.1804906  0.13652998 0.16489655 0.17429912 0.15843038]] entropy:[1.786897]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:6220 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.26200143  0.23544915 -0.04370712  0.14506562  0.20051996  0.10506479]] probs:[[0.185351   0.18049426 0.13652979 0.16489609 0.17429858 0.15843028]] entropy:[1.7868967]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:6221 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.2620015   0.23544918 -0.04370711  0.14506565  0.20052002  0.1050649 ]] probs:[[0.185351   0.18049426 0.13652977 0.16489607 0.17429858 0.1584303 ]] entropy:[1.7868967]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:6222 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.26200542  0.23543268 -0.04370995  0.14506358  0.20051827  0.10506177]] probs:[[0.18535244 0.18049198 0.13652991 0.16489637 0.17429894 0.15843041]] entropy:[1.7868968]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:6223 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.26200542  0.2354327  -0.04370995  0.14506358  0.20051828  0.10506174]] probs:[[0.18535244 0.18049198 0.13652991 0.16489637 0.17429894 0.1584304 ]] entropy:[1.7868968]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:6224 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.26188383  0.23596697 -0.04361254  0.14513353  0.20057586  0.10516966]] probs:[[0.18530656 0.1805657  0.13652602 0.16488715 0.17428704 0.15842755]] entropy:[1.7868953]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:6225 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.26199695  0.23546879 -0.04370381  0.14506787  0.20052207  0.10506867]] probs:[[0.18534933 0.18049699 0.13652961 0.1648957  0.17429817 0.15843019]] entropy:[1.7868967]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-1.3820559] v_loss:[[5.5630622e-05]]
DEBUG:chainerrl.agents.a3c:grad norm:0.2528509306207245
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:6226 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.2554186   0.22940291 -0.02626032  0.14340965  0.19263099  0.10909699]] probs:[[0.18430142 0.17956853 0.13905835 0.16477214 0.17308538 0.15921427]] entropy:[1.7876444]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:6227 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.25661027  0.23016882 -0.02658524  0.14203253  0.19380213  0.10918906]] probs:[[0.18446529 0.17965169 0.13897108 0.16449556 0.17323571 0.15918072]] entropy:[1.7875948]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:6228 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.2599261   0.23184226 -0.02735708  0.13822357  0.19706021  0.10948529]] probs:[[0.18493122 0.17980988 0.13875374 0.16374026 0.17366324 0.15910164]] entropy:[1.7874632]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:6229 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.25993663  0.23184098 -0.02737739  0.13828598  0.19711997  0.10943752]] probs:[[0.18493098 0.17980753 0.13874929 0.16374855 0.17367157 0.15909214]] entropy:[1.7874621]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:6230 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.25967705  0.23174034 -0.02730243  0.13850638  0.19681777  0.10945357]] probs:[[0.18489581 0.17980193 0.13876933 0.16379602 0.17363115 0.15910575]] entropy:[1.7874728]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:6231 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.2597447   0.23176481 -0.02732762  0.13847198  0.19691202  0.10943414]] probs:[[0.18490443 0.17980254 0.1387629  0.16378693 0.17364386 0.1590993 ]] entropy:[1.7874696]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:6232 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.25974622  0.2317654  -0.027328    0.13847052  0.19691356  0.10943424]] probs:[[0.18490465 0.1798026  0.13876282 0.16378663 0.17364408 0.15909927]] entropy:[1.7874696]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:6233 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.25967127  0.23173782 -0.02730118  0.13851269  0.19681221  0.10945312]] probs:[[0.184895   0.1798017  0.13876967 0.16379726 0.17363042 0.15910588]] entropy:[1.787473]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-1.4363327] v_loss:[[5.710823e-05]]
DEBUG:chainerrl.agents.a3c:grad norm:0.2808537711283711
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:6234 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.22676963  0.22025284 -0.00641439  0.14477363  0.19723128  0.12347379]] probs:[[0.17922619 0.178062   0.14194854 0.16511673 0.17400956 0.16163695]] entropy:[1.7887425]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:6235 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.22630896  0.22001897 -0.00628428  0.14527795  0.19672906  0.12347497]] probs:[[0.17916328 0.17803988 0.14198257 0.16521811 0.17394125 0.16165486]] entropy:[1.7887583]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:6236 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.22621687  0.21997036 -0.00625979  0.14538898  0.19663386  0.12346988]] probs:[[0.17915049 0.17803492 0.141989   0.1652399  0.1739283  0.1616574 ]] entropy:[1.7887614]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:6237 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.22651826  0.22012809 -0.00635454  0.14506881  0.19698282  0.1234532 ]] probs:[[0.17919129 0.17804988 0.14196508 0.16517483 0.1739762  0.16164277]] entropy:[1.7887508]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:6238 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.22626108  0.2199937  -0.00627169  0.14533582  0.19667982  0.12347215]] probs:[[0.17915662 0.17803729 0.14198588 0.16522945 0.17393455 0.16165613]] entropy:[1.7887597]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:6239 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.22620635  0.21996486 -0.00625716  0.1454017   0.19662295  0.12346927]] probs:[[0.17914903 0.17803435 0.14198971 0.16524239 0.17392682 0.16165766]] entropy:[1.7887617]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:6240 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.22651622  0.22012705 -0.00635401  0.14507125  0.19698073  0.1234531 ]] probs:[[0.179191   0.17804976 0.14196521 0.1651753  0.1739759  0.16164283]] entropy:[1.7887506]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:6241 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.22626063  0.21999346 -0.00627159  0.14533629  0.19667931  0.12347218]] probs:[[0.17915657 0.17803727 0.14198592 0.16522956 0.1739345  0.16165617]] entropy:[1.78876]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-1.393857] v_loss:[[1.5164801e-05]]
DEBUG:chainerrl.agents.a3c:grad norm:0.27939805769042114
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:6242 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.20630366 0.2085908  0.01434176 0.15339287 0.18593486 0.13209412]] probs:[[0.17591985 0.17632267 0.14519338 0.16685376 0.17237282 0.16333756]] entropy:[1.7896559]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:6243 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.20597309 0.20837079 0.01445139 0.1537007  0.18554203 0.13212298]] probs:[[0.17587799 0.1763002  0.14522275 0.16692057 0.17232108 0.16335739]] entropy:[1.7896655]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:6244 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.20592512 0.20833004 0.01443334 0.1538045  0.18553486 0.13207656]] probs:[[0.17587127 0.17629473 0.14522155 0.16693953 0.17232151 0.1633514 ]] entropy:[1.7896657]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:6245 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.20590197 0.20829345 0.01437042 0.15382822 0.1854893  0.13206929]] probs:[[0.17587155 0.17629266 0.14521602 0.16694763 0.17231794 0.16335426]] entropy:[1.7896652]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:6246 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.20578034 0.20806767 0.01393144 0.15389626 0.1851631  0.13206413]] probs:[[0.17588016 0.17628291 0.14517704 0.16698748 0.17229111 0.1633813 ]] entropy:[1.7896613]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:6247 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.20499425 0.2064733  0.01096159 0.15466222 0.18339205 0.13174246]] probs:[[0.17593178 0.1761922  0.14490287 0.16729593 0.17217202 0.16350515]] entropy:[1.7896309]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:6248 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.2006875   0.19881833 -0.00160336  0.15873599  0.17275324  0.12985322]] probs:[[0.17611913 0.17579025 0.1438642  0.16888352 0.17126745 0.16407545]] entropy:[1.789517]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:6249 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.1954105   0.18904474 -0.02270664  0.16797125  0.15164927  0.12548044]] probs:[[0.17667031 0.17554924 0.14204845 0.17188852 0.16910572 0.16473784]] entropy:[1.7892077]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-1.4060493] v_loss:[[8.550871e-06]]
DEBUG:chainerrl.agents.a3c:grad norm:0.11126020521742991
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:6250 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.18721889 0.18836138 0.01170726 0.1632355  0.16858388 0.13793872]] probs:[[0.17391531 0.17411412 0.14591976 0.16979384 0.17070441 0.1655525 ]] entropy:[1.7900102]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:6251 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.19142179 0.19583063 0.02442604 0.15905969 0.17939702 0.14001645]] probs:[[0.17371006 0.1744776  0.14699401 0.16817844 0.17163375 0.16500607]] entropy:[1.7901342]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:6252 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.1922627  0.19758339 0.02766418 0.15828533 0.1813725  0.14028858]] probs:[[0.17365076 0.17457716 0.1472965  0.16784967 0.17176993 0.16485594]] entropy:[1.790167]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:6253 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.1924084  0.19785674 0.02820085 0.15820406 0.18176852 0.14029585]] probs:[[0.17364    0.17458864 0.14734498 0.16780119 0.17180228 0.1648229 ]] entropy:[1.7901719]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:6254 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.19242987 0.19789805 0.02828386 0.15818301 0.18181919 0.14030448]] probs:[[0.17363855 0.17459063 0.14735281 0.16779265 0.17180587 0.16481942]] entropy:[1.7901727]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:6255 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.19243298 0.19791014 0.02832138 0.15811747 0.18177098 0.14035816]] probs:[[0.1736395  0.17459317 0.1473587  0.16778205 0.17179799 0.16482867]] entropy:[1.7901738]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:6256 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.19243348 0.19791119 0.02832352 0.15811616 0.1817716  0.14035888]] probs:[[0.17363946 0.17459323 0.14735891 0.16778171 0.17179798 0.16482866]] entropy:[1.7901735]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-0.4291203] v_loss:[[0.00742596]]
DEBUG:chainerrl.agents.a3c:grad norm:12.724348219333852
DEBUG:chainerrl.agents.a3c:update


INFO: outdir:result global_step:12769 local_step:6256 R:1.2000000000000002
INFO: statistics:[('average_value', 0.21203951051049783), ('average_entropy', 1.781724948564879)]
DEBUG: Closing video encoder: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000008.mp4


DEBUG:chainerrl.agents.a3c:t:6257 r:0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.13689157 0.12650496 0.06977978 0.1335847  0.03268572 0.0182908 ]] probs:[[0.17511116 0.17330176 0.1637448  0.17453304 0.15778211 0.15552713]] entropy:[1.7905881]
DEBUG:chainerrl.agents.a3c:t:6258 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.11182597 0.1781155  0.02002218 0.09558401 0.02257672 0.03550913]] probs:[[0.17224008 0.18404473 0.15713188 0.16946515 0.1575338  0.15958433]] entropy:[1.7900822]
DEBUG:chainerrl.agents.a3c:t:6259 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.10736188 0.18613568 0.01163561 0.09097627 0.01972144 0.03871952]] probs:[[0.17169826 0.18577059 0.1560244  0.1689078  0.1572911  0.1603079 ]] entropy:[1.7898749]
DEBUG:chainerrl.agents.a3c:t:6260 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.10668092 0.18710859 0.01050281 0.09016863 0.01938448 0.03929237]] probs:[[0.17161743 0.18599048 0.15

DEBUG:chainerrl.agents.a3c:t:6286 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.07690243 0.15581957 0.02327327 0.10900045 0.02588196 0.05588511]] probs:[[0.1668898  0.1805939  0.1581754  0.17233352 0.15858857 0.16341883]] entropy:[1.7906468]
DEBUG:chainerrl.agents.a3c:t:6287 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.07690141 0.15581954 0.02327465 0.10899864 0.02588235 0.05588566]] probs:[[0.16688965 0.18059391 0.15817563 0.17233324 0.15858865 0.16341893]] entropy:[1.7906468]
DEBUG:chainerrl.agents.a3c:t:6288 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.07686295 0.15581235 0.02325282 0.10897489 0.02585423 0.05587751]] probs:[[0.16688675 0.18059641 0.15817551 0.17233276 0.15858753 0.16342103]] entropy:[1.7906467]
DEBUG:chainerrl.agents.a3c:pi_loss:[-1.508718] v_loss:[[7.048008e-05]]
DEBUG:chainerrl.agents.a3c:grad norm:0.15294576087465067
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:6289 r:0.0 

DEBUG:chainerrl.agents.a3c:t:6315 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.06412716 0.12064581 0.07257015 0.05593205 0.0433272  0.0828452 ]] probs:[[0.16510443 0.17470466 0.16650431 0.1637569  0.16170573 0.16822396]] entropy:[1.7914538]
DEBUG:chainerrl.agents.a3c:t:6316 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.06862718 0.1283628  0.07406179 0.05494566 0.02791725 0.09092773]] probs:[[0.16567056 0.17586856 0.16657338 0.16341937 0.15906157 0.16940661]] entropy:[1.7912766]
DEBUG:chainerrl.agents.a3c:t:6317 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.06700286 0.12385894 0.06982273 0.05602685 0.03109255 0.08876134]] probs:[[0.16564147 0.17533207 0.16610922 0.16383334 0.15979877 0.16928507]] entropy:[1.7913458]
DEBUG:chainerrl.agents.a3c:t:6318 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.06635264 0.12274739 0.0693954  0.05624088 0.03186729 0.08776475]] probs:[[0.16559727 0.17520444 0.

DEBUG:chainerrl.agents.a3c:t:6344 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.0109894   0.03420542  0.08847212  0.19471014  0.0203666   0.08703964]] probs:[[0.15351458 0.16061182 0.16956851 0.18857487 0.15840444 0.16932578]] entropy:[1.78948]
DEBUG:chainerrl.agents.a3c:pi_loss:[-1.5167336] v_loss:[[9.138584e-05]]
DEBUG:chainerrl.agents.a3c:grad norm:0.24264185727138277
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:6345 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.00723834  0.04526779  0.08825606  0.1776743   0.02429728  0.08726102]] probs:[[0.15412186 0.16243044 0.16956529 0.18542607 0.15905964 0.16939664]] entropy:[1.7899787]
DEBUG:chainerrl.agents.a3c:t:6346 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.00806247  0.05039287  0.09074477  0.17875567  0.02874912  0.09340632]] probs:[[0.15352134 0.16276297 0.16946508 0.18505585 0.159278   0.1699167 ]] entropy:[1.7899806]
DEBUG:chainerrl.agents.

DEBUG:chainerrl.agents.a3c:t:6373 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.02152885 0.08512057 0.07313047 0.15504621 0.03178895 0.09053957]] probs:[[0.15764876 0.16799954 0.16599724 0.18016748 0.15927458 0.16891241]] entropy:[1.7907903]
DEBUG:chainerrl.agents.a3c:t:6374 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.02137554 0.0844651  0.07408174 0.15501277 0.03321574 0.08950698]] probs:[[0.15761343 0.16787758 0.16614346 0.1801487  0.1594907  0.16872613]] entropy:[1.7908031]
DEBUG:chainerrl.agents.a3c:t:6375 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.0213742  0.08442567 0.07413241 0.15501386 0.03328752 0.08944497]] probs:[[0.1576128  0.16787049 0.16615142 0.18014841 0.15950172 0.16871521]] entropy:[1.7908039]
DEBUG:chainerrl.agents.a3c:t:6376 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.02137069 0.08437604 0.07417537 0.15503433 0.03336943 0.08936074]] probs:[[0.1576121  0.16786201 0.

DEBUG:chainerrl.agents.a3c:t:6402 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.02817396 0.03119195 0.08842948 0.12430409 0.10991624 0.08302101]] probs:[[0.1585392  0.1590184  0.16838574 0.17453618 0.17204295 0.16747749]] entropy:[1.7911003]
DEBUG:chainerrl.agents.a3c:t:6403 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.02823187 0.03124649 0.08838915 0.12422176 0.10983825 0.08304375]] probs:[[0.15855044 0.15902914 0.16838114 0.17452408 0.17203178 0.16748348]] entropy:[1.7911022]
DEBUG:chainerrl.agents.a3c:t:6404 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.02671378 0.03037613 0.08727618 0.12383854 0.10910246 0.08466256]] probs:[[0.15838726 0.1589684  0.168276   0.17454243 0.1719892  0.16783676]] entropy:[1.7910906]
DEBUG:chainerrl.agents.a3c:t:6405 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.02806297 0.03113704 0.08826612 0.12417944 0.10976374 0.08322097]] probs:[[0.15853246 0.15902054 0.

DEBUG:chainerrl.agents.a3c:t:6431 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.05482912 0.04416564 0.07570297 0.08016113 0.10171631 0.10941466]] probs:[[0.16285986 0.16113244 0.1662951  0.16703813 0.17067775 0.17199676]] entropy:[1.7914897]
DEBUG:chainerrl.agents.a3c:t:6432 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.05947611 0.04741206 0.07173639 0.08780799 0.0909595  0.11997635]] probs:[[0.16330707 0.16134876 0.16532159 0.16800004 0.16853032 0.17349218]] entropy:[1.7914805]
DEBUG:chainerrl.agents.a3c:pi_loss:[-1.4145926] v_loss:[[1.4366519e-05]]
DEBUG:chainerrl.agents.a3c:grad norm:0.10410925627350828
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:6433 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.06547256 0.05383702 0.07127666 0.07803981 0.08257542 0.11783317]] probs:[[0.16453038 0.16262707 0.16548811 0.16661112 0.16736852 0.17337482]] entropy:[1.7915574]
DEBUG:chainerrl.agents.a3c:t:6434 r:0.

DEBUG:chainerrl.agents.a3c:t:6460 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.0691323  0.07210714 0.06792508 0.09169079 0.06897754 0.1066199 ]] probs:[[0.16494486 0.16543628 0.16474585 0.16870806 0.16491933 0.1712456 ]] entropy:[1.791651]
DEBUG:chainerrl.agents.a3c:t:6461 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.06922717 0.07212159 0.0679004  0.091693   0.06899252 0.10659193]] probs:[[0.16495854 0.1654367  0.16473983 0.1687064  0.16491982 0.17123877]] entropy:[1.7916512]
DEBUG:chainerrl.agents.a3c:t:6462 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.06926396 0.07212471 0.06788785 0.0916901  0.06899539 0.1065812 ]] probs:[[0.16496415 0.16543674 0.1647373  0.16870546 0.16491985 0.17123647]] entropy:[1.7916512]
DEBUG:chainerrl.agents.a3c:t:6463 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.06926926 0.07212276 0.06788427 0.09168744 0.06899451 0.10657945]] probs:[[0.16496518 0.16543658 0.1

DEBUG:chainerrl.agents.a3c:pi_loss:[-1.5463961] v_loss:[[0.00016702]]
DEBUG:chainerrl.agents.a3c:grad norm:0.5849508980947334
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:6489 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.00510466  0.10742668  0.01822026  0.14101839  0.15699321  0.0834417 ]] probs:[[0.15223932 0.17037214 0.15583204 0.17619245 0.17902969 0.16633439]] entropy:[1.7900002]
DEBUG:chainerrl.agents.a3c:t:6490 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.00510714  0.10742299  0.01821846  0.14102232  0.15699647  0.08344154]] probs:[[0.15223895 0.17037152 0.15583175 0.17619313 0.17903027 0.16633436]] entropy:[1.79]
DEBUG:chainerrl.agents.a3c:t:6491 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.0051135   0.1074333   0.01821483  0.14101677  0.15698306  0.08345847]] probs:[[0.15223803 0.17037334 0.15583123 0.17619221 0.17902793 0.16633722]] entropy:[1.79]
DEBUG:chainerrl.agents.a3c:t:6492 

DEBUG:chainerrl.agents.a3c:t:6518 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.04876481  0.07140619  0.03383379  0.13813116  0.24430908  0.07940774]] probs:[[0.14499924 0.16351415 0.15748452 0.17479685 0.19437754 0.16482775]] entropy:[1.787607]
DEBUG:chainerrl.agents.a3c:t:6519 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.04882134  0.0714096   0.03374688  0.1385145   0.24442641  0.07889437]] probs:[[0.14499336 0.16351733 0.15747336 0.17486668 0.19440347 0.16474581]] entropy:[1.7875973]
DEBUG:chainerrl.agents.a3c:t:6520 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.04888374  0.07141945  0.03407793  0.13708189  0.24400826  0.08068197]] probs:[[0.14498319 0.16351764 0.15752424 0.17461497 0.19432066 0.16503927]] entropy:[1.7876266]
DEBUG:chainerrl.agents.a3c:pi_loss:[-1.5943072] v_loss:[[0.00033449]]
DEBUG:chainerrl.agents.a3c:grad norm:0.7876031955642401
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3

DEBUG:chainerrl.agents.a3c:t:6546 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.04250716  0.0988044  -0.00395835  0.04912517  0.19474487  0.13454652]] probs:[[0.1592358  0.16845748 0.15200609 0.16029312 0.18542005 0.1745874 ]] entropy:[1.7895906]
DEBUG:chainerrl.agents.a3c:t:6547 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.04250692  0.09880367 -0.00396021  0.0491231   0.19474478  0.1345466 ]] probs:[[0.15923588 0.1684575  0.15200594 0.16029292 0.18542019 0.17458755]] entropy:[1.7895907]
DEBUG:chainerrl.agents.a3c:t:6548 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.04250693  0.09880368 -0.00396017  0.04912317  0.19474472  0.13454662]] probs:[[0.1592359  0.16845751 0.15200594 0.16029292 0.18542019 0.17458756]] entropy:[1.7895908]
DEBUG:chainerrl.agents.a3c:t:6549 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.04250696  0.09880374 -0.0039602   0.04912315  0.19474468  0.13454661]] probs:[[

DEBUG:chainerrl.agents.a3c:t:6575 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.11081903 0.0959914  0.05411247 0.09976243 0.13176157 0.1830194 ]] probs:[[0.16624574 0.1637989  0.15708084 0.16441776 0.16976407 0.17869268]] entropy:[1.7909855]
DEBUG:chainerrl.agents.a3c:t:6576 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.12592313 0.13496813 0.0649876  0.09770842 0.13313796 0.18852864]] probs:[[0.16683358 0.16834943 0.15697102 0.1621922  0.1680416  0.17761216]] entropy:[1.7910452]
DEBUG:chainerrl.agents.a3c:pi_loss:[-1.4934138] v_loss:[[4.7550206e-05]]
DEBUG:chainerrl.agents.a3c:grad norm:0.1332873276467043
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:6577 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.11466108 0.13214396 0.06262502 0.10519917 0.11642062 0.18016018]] probs:[[0.16592094 0.16884722 0.15750785 0.16435842 0.16621314 0.17715243]] entropy:[1.7911475]
DEBUG:chainerrl.agents.a3c:t:6578 r:0.0

DEBUG:chainerrl.agents.a3c:t:6604 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.13128749 0.13110504 0.08741799 0.10358811 0.11388929 0.17033808]] probs:[[0.16800645 0.1679758  0.16079542 0.16341662 0.16510873 0.17469698]] entropy:[1.7914146]
DEBUG:chainerrl.agents.a3c:t:6605 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.12562622 0.12309774 0.07923689 0.10153195 0.11428335 0.16390333]] probs:[[0.16789569 0.1674717  0.16028501 0.1638987  0.16600204 0.17444684]] entropy:[1.791426]
DEBUG:chainerrl.agents.a3c:t:6606 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.14069946 0.12020338 0.09152723 0.08738307 0.12063107 0.1486047 ]] probs:[[0.17041937 0.16696198 0.16224214 0.16157119 0.1670334  0.1717719 ]] entropy:[1.7915018]
DEBUG:chainerrl.agents.a3c:t:6607 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.15113717 0.11687945 0.09452856 0.07138521 0.13251363 0.12157083]] probs:[[0.17279972 0.16698024 0.1

DEBUG:chainerrl.agents.a3c:pi_loss:[-1.4840051] v_loss:[[6.4053005e-05]]
DEBUG:chainerrl.agents.a3c:grad norm:0.23292525735894615
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:6633 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.14211997 0.02412036 0.14311635 0.01407097 0.19882429 0.14843844]] probs:[[0.17140354 0.15232572 0.17157441 0.1508026  0.18140373 0.17248999]] entropy:[1.789472]
DEBUG:chainerrl.agents.a3c:t:6634 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.14247826 0.02450486 0.14333779 0.01386312 0.19826485 0.14782916]] probs:[[0.17147866 0.15239646 0.17162612 0.1507833  0.18131673 0.17239869]] entropy:[1.7894831]
DEBUG:chainerrl.agents.a3c:t:6635 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.13228402 0.02916777 0.15235409 0.0200315  0.20847276 0.14847006]] probs:[[0.16915902 0.15258518 0.17258835 0.15119746 0.1825507  0.17191932]] entropy:[1.7894675]
DEBUG:chainerrl.agents.a3c:t:6636 r:0.0

DEBUG:chainerrl.agents.a3c:t:6662 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.15649627 0.00346724 0.26566577 0.03406098 0.19604497 0.04971682]] probs:[[0.17249455 0.1480184  0.19239204 0.15261681 0.17945316 0.15502496]] entropy:[1.7871794]
DEBUG:chainerrl.agents.a3c:t:6663 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.15727857 0.00404293 0.26442984 0.03301761 0.19526847 0.05050073]] probs:[[0.17266311 0.14813243 0.19219175 0.15248731 0.17934875 0.1551767 ]] entropy:[1.7872229]
DEBUG:chainerrl.agents.a3c:t:6664 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.15653746 0.00318214 0.26500744 0.03387253 0.19576067 0.04967152]] probs:[[0.17254454 0.148013   0.19231322 0.152626   0.17944677 0.15505648]] entropy:[1.787192]
DEBUG:chainerrl.agents.a3c:pi_loss:[-1.5813186] v_loss:[[0.0002804]]
DEBUG:chainerrl.agents.a3c:grad norm:0.6307071549542895
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:6665 r:0.0 a:2 

DEBUG:chainerrl.agents.a3c:t:6691 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.35801575 -0.05226855  0.34413615  0.00090509  0.00723568 -0.04406638]] probs:[[0.21175505 0.14049135 0.20883629 0.14816396 0.14910491 0.14164843]] entropy:[1.7750332]
DEBUG:chainerrl.agents.a3c:t:6692 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.35802415 -0.05227233  0.3441312   0.000904    0.0072317  -0.04406626]] probs:[[0.21175696 0.1404909  0.20883538 0.1481639  0.1491044  0.14164853]] entropy:[1.7750329]
DEBUG:chainerrl.agents.a3c:t:6693 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.35802862 -0.05227442  0.34412855  0.00090345  0.00722943 -0.04406627]] probs:[[0.21175797 0.14049065 0.20883489 0.14816386 0.1491041  0.14164856]] entropy:[1.7750328]
DEBUG:chainerrl.agents.a3c:t:6694 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.35802272 -0.05227176  0.34413198  0.00090419  0.00723226 -0.04406629]] probs:[[

DEBUG:chainerrl.agents.a3c:t:6720 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.07058277 -0.13809706  0.11880922 -0.05637849  0.01661959  0.5948236 ]] probs:[[0.15677729 0.12724909 0.16452341 0.13808441 0.14854132 0.26482445]] entropy:[1.7582641]
DEBUG:chainerrl.agents.a3c:pi_loss:[-1.570562] v_loss:[[0.00032552]]
DEBUG:chainerrl.agents.a3c:grad norm:2.422357241828488
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:6721 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.08060393 -0.10836063  0.11991712 -0.02155098  0.04121436  0.496834  ]] probs:[[0.16004065 0.1324842  0.16645768 0.14449905 0.15385927 0.2426592 ]] entropy:[1.770638]
DEBUG:chainerrl.agents.a3c:t:6722 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.08060089 -0.10835987  0.11991815 -0.02155101  0.04121519  0.49683473]] probs:[[0.16004016 0.13248427 0.16645782 0.14449903 0.15385936 0.24265933]] entropy:[1.7706378]
DEBUG:chainerrl.agents.a3c:

DEBUG:chainerrl.agents.a3c:t:6748 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.0839233  -0.02840784  0.11981605  0.04766477  0.06554475  0.32245347]] probs:[[0.16271128 0.14542292 0.16865751 0.15691727 0.1597482  0.20654276]] entropy:[1.7854187]
DEBUG:chainerrl.agents.a3c:t:6749 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.08399276 -0.02839072  0.11981457  0.04765441  0.06554121  0.3224014 ]] probs:[[0.16272248 0.14542532 0.16865715 0.15691556 0.15974753 0.20653188]] entropy:[1.7854215]
DEBUG:chainerrl.agents.a3c:t:6750 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.08400027 -0.02839848  0.11981218  0.0476452   0.06553327  0.322392  ]] probs:[[0.16272452 0.14542492 0.1686576  0.1569149  0.15974706 0.20653099]] entropy:[1.7854216]
DEBUG:chainerrl.agents.a3c:t:6751 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.0839728  -0.02839303  0.1198102   0.04765712  0.06554551  0.32241693]] probs:[[

DEBUG:chainerrl.agents.a3c:pi_loss:[-1.6708729] v_loss:[[0.00072675]]
DEBUG:chainerrl.agents.a3c:grad norm:1.88537974192915
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:6777 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.00076221 -0.01058397 -0.0222994   0.1390654   0.12853965  0.36949986]] probs:[[0.14913926 0.14768162 0.14596157 0.17152141 0.16972548 0.21597067]] entropy:[1.7815738]
DEBUG:chainerrl.agents.a3c:t:6778 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.0007622  -0.01058409 -0.0222995   0.13906546  0.12853962  0.36949977]] probs:[[0.14913929 0.14768161 0.14596157 0.17152144 0.1697255  0.21597067]] entropy:[1.7815739]
DEBUG:chainerrl.agents.a3c:t:6779 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.00078135 -0.01059324 -0.02229385  0.13906567  0.12853819  0.36950305]] probs:[[0.14913686 0.1476807  0.1459628  0.17152198 0.16972575 0.21597199]] entropy:[1.7815734]
DEBUG:chainerrl.agents.a3c

DEBUG:chainerrl.agents.a3c:t:6805 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.00394257 0.12068019 0.10866995 0.12557745 0.16261804 0.10162646]] probs:[[0.15064447 0.16929795 0.1672768  0.17012908 0.17654893 0.16610274]] entropy:[1.7906175]
DEBUG:chainerrl.agents.a3c:t:6806 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.00394112 0.12067954 0.10866944 0.12558077 0.16261648 0.10162711]] probs:[[0.15064427 0.16929786 0.16727673 0.17012966 0.17654866 0.16610286]] entropy:[1.7906177]
DEBUG:chainerrl.agents.a3c:t:6807 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.00394092 0.12067947 0.10866936 0.12558141 0.16261616 0.10162725]] probs:[[0.15064423 0.16929784 0.16727671 0.17012976 0.1765486  0.16610287]] entropy:[1.7906175]
DEBUG:chainerrl.agents.a3c:t:6808 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.00394119 0.12067913 0.10867008 0.12558201 0.16261664 0.1016278 ]] probs:[[0.15064421 0.16929771 0.

DEBUG:chainerrl.agents.a3c:t:6834 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.31309947 -0.02778085 -0.00153237  0.03183977  0.15127727  0.16396657]] probs:[[0.20374562 0.14489248 0.14874603 0.15379377 0.17330448 0.1755176 ]] entropy:[1.7845503]
DEBUG:chainerrl.agents.a3c:t:6835 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.31310484 -0.02777568 -0.00154631  0.03182305  0.15127078  0.16396692]] probs:[[0.20374751 0.1448938  0.14874454 0.1537918  0.17330405 0.17551836]] entropy:[1.7845497]
DEBUG:chainerrl.agents.a3c:t:6836 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.313106   -0.02777495 -0.00154929  0.03181995  0.15126969  0.1639674 ]] probs:[[0.2037479  0.144894   0.14874421 0.15379143 0.17330396 0.17551857]] entropy:[1.7845496]
DEBUG:chainerrl.agents.a3c:t:6837 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.31310242 -0.02777709 -0.00154585  0.03182643  0.15127154  0.16396669]] probs:[[

DEBUG:chainerrl.agents.a3c:t:6863 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.25565475 -0.00784435  0.0063496   0.03918089  0.18998815  0.15032367]] probs:[[0.19269593 0.14805952 0.15017605 0.15518835 0.18044876 0.17343144]] entropy:[1.7868063]
DEBUG:chainerrl.agents.a3c:t:6864 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.25563854 -0.00785104  0.00641307  0.03922829  0.19000725  0.15031025]] probs:[[0.1926901  0.14805645 0.15018348 0.15519354 0.18044968 0.17342669]] entropy:[1.7868081]
DEBUG:chainerrl.agents.a3c:pi_loss:[-1.7225237] v_loss:[[0.00102101]]
DEBUG:chainerrl.agents.a3c:grad norm:2.0147676946263613
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:6865 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.24188203 -0.0047603   0.02240528  0.01297558  0.2108734   0.1535978 ]] probs:[[0.18994327 0.14842549 0.15251282 0.15108143 0.18414377 0.17389318]] entropy:[1.7867565]
DEBUG:chainerrl.agents.a

DEBUG:chainerrl.agents.a3c:t:6891 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.2749705  -0.01630226  0.0114974  -0.01056936  0.09077339  0.27090684]] probs:[[0.19627027 0.1466751  0.15080982 0.14751838 0.16325209 0.19547431]] entropy:[1.7837067]
DEBUG:chainerrl.agents.a3c:t:6892 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.2749705  -0.01630226  0.0114974  -0.01056936  0.09077339  0.27090684]] probs:[[0.19627027 0.1466751  0.15080982 0.14751838 0.16325209 0.19547431]] entropy:[1.7837067]
DEBUG:chainerrl.agents.a3c:t:6893 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.2749715  -0.01629138  0.01149642 -0.0105684   0.09076995  0.27091146]] probs:[[0.19627005 0.14667638 0.15080935 0.14751822 0.16325118 0.1954748 ]] entropy:[1.7837067]
DEBUG:chainerrl.agents.a3c:t:6894 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.27497125 -0.01629242  0.01149651 -0.01056741  0.09077118  0.2709096 ]] probs:[[

DEBUG:chainerrl.agents.a3c:t:6920 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.20527464 0.04293998 0.03601311 0.04243921 0.06502881 0.22345188]] probs:[[0.18410566 0.15651865 0.15543821 0.15644029 0.16001444 0.18748279]] entropy:[1.7884711]
DEBUG:chainerrl.agents.a3c:pi_loss:[-1.613315] v_loss:[[0.00043652]]
DEBUG:chainerrl.agents.a3c:grad norm:0.520487154371033
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:6921 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.16259024  0.04036622  0.03912485 -0.02757672  0.059794    0.33915442]] probs:[[0.17571463 0.15549868 0.15530577 0.14528456 0.15854922 0.20964706]] entropy:[1.7839849]
DEBUG:chainerrl.agents.a3c:t:6922 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.1610354   0.0400856   0.04055154 -0.02866439  0.06104014  0.3384509 ]] probs:[[0.1754772  0.15548657 0.15555903 0.14515606 0.1587791  0.2095421 ]] entropy:[1.7840319]
DEBUG:chainerrl.agents.a3c:t:692

DEBUG:chainerrl.agents.a3c:t:6948 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.347984    0.10965882  0.04748563 -0.05232889  0.04913669  0.17609413]] probs:[[0.20910658 0.16476479 0.15483278 0.14012448 0.15508862 0.17608277]] entropy:[1.7834141]
DEBUG:chainerrl.agents.a3c:t:6949 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.34798488  0.1096573   0.04748715 -0.05231846  0.04914309  0.17608443]] probs:[[0.20910655 0.16476437 0.15483285 0.1401258  0.15508947 0.1760809 ]] entropy:[1.7834145]
DEBUG:chainerrl.agents.a3c:t:6950 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.34797314  0.10967296  0.04746619 -0.05243282  0.04906708  0.17619275]] probs:[[0.20910658 0.1647689  0.15483145 0.14011145 0.15507951 0.17610206]] entropy:[1.7834101]
DEBUG:chainerrl.agents.a3c:t:6951 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.3479841   0.10965857  0.04748583 -0.05232764  0.0491374   0.17609297]] probs:[[

DEBUG:chainerrl.agents.a3c:pi_loss:[-1.7921175] v_loss:[[0.00177213]]
DEBUG:chainerrl.agents.a3c:grad norm:5.324816385003105
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:6977 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.11721063 0.10409522 0.09303287 0.03431529 0.08448909 0.06600562]] probs:[[0.17237155 0.1701256  0.16825397 0.15865897 0.16682257 0.16376743]] entropy:[1.7913988]
DEBUG:chainerrl.agents.a3c:t:6978 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.11746032 0.10421925 0.09255476 0.0346345  0.08407775 0.06617514]] probs:[[0.17241569 0.17014776 0.16817461 0.15871061 0.166755   0.16379623]] entropy:[1.7914007]
DEBUG:chainerrl.agents.a3c:t:6979 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.11714046 0.104063   0.09314087 0.03424187 0.08457626 0.06596614]] probs:[[0.17235997 0.17012061 0.16827264 0.15864779 0.1668376  0.16376145]] entropy:[1.7913985]
DEBUG:chainerrl.agents.a3c:t:6980 r:0.0 a:2

INFO: outdir:result global_step:14251 local_step:6981 R:3.55
INFO: statistics:[('average_value', 0.38101056578588344), ('average_entropy', 1.7831998124900945)]


DEBUG:chainerrl.agents.a3c:t:6982 r:0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.04388902 -0.44640374  0.10973659  0.56324786 -0.10675722  0.40959904]] probs:[[0.13922302 0.0930896  0.16234161 0.25549787 0.1307398  0.2191081 ]] entropy:[1.7379427]
DEBUG:chainerrl.agents.a3c:t:6983 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.0887769  -0.5051635   0.04581875  0.58124375 -0.13506216  0.4618046 ]] probs:[[0.13428572 0.08855141 0.15363282 0.2624316  0.12821192 0.23288651]] entropy:[1.7258556]
DEBUG:chainerrl.agents.a3c:t:6984 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.07300711 -0.49975944  0.01117279  0.55954504 -0.14601867  0.45472765]] probs:[[0.1379779  0.09004785 0.15009576 0.25973082 0.12826289 0.23388472]] entropy:[1.728097]
DEBUG:chainerrl.agents.a3c:t:6985 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.06312842 -0.49417996 -0.00268587  0.54882556 -0.14911833  0.447519  ]] probs:[[0.1

DEBUG:chainerrl.agents.a3c:t:7011 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.03858476 -0.34580466  0.08641876  0.34340096 -0.09359775  0.27525944]] probs:[[0.16052786 0.10929799 0.16839315 0.21773608 0.14065146 0.20339343]] entropy:[1.76733]
DEBUG:chainerrl.agents.a3c:t:7012 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.0385789  -0.34581015  0.08640332  0.34340304 -0.09360576  0.27522743]] probs:[[0.16052872 0.10929863 0.16839246 0.21773899 0.14065193 0.20338921]] entropy:[1.7673304]
DEBUG:chainerrl.agents.a3c:t:7013 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.04079591 -0.34470102  0.08990029  0.34379882 -0.09212103  0.28284988]] probs:[[0.16041678 0.10910147 0.16849054 0.21719122 0.14045095 0.204349  ]] entropy:[1.7671597]
DEBUG:chainerrl.agents.a3c:pi_loss:[-1.3980857] v_loss:[[7.610214e-06]]
DEBUG:chainerrl.agents.a3c:grad norm:1.2757141608681402
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a

DEBUG:chainerrl.agents.a3c:t:7039 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.06926659 -0.20400266  0.09156346  0.22927073 -0.02767312  0.1223649 ]] probs:[[0.16894475 0.12854801 0.172754   0.19825932 0.15333606 0.17815787]] entropy:[1.7831421]
DEBUG:chainerrl.agents.a3c:t:7040 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.06195831 -0.20569983  0.08766291  0.23330472 -0.03043112  0.12279751]] probs:[[0.16799419 0.128544   0.17236838 0.1993926  0.15316871 0.17853212]] entropy:[1.7829252]
DEBUG:chainerrl.agents.a3c:t:7041 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.06395808 -0.20509581  0.08800433  0.2325686  -0.02969036  0.12078436]] probs:[[0.16831692 0.1286113  0.17241336 0.19922984 0.15326986 0.17815872]] entropy:[1.7830013]
DEBUG:chainerrl.agents.a3c:t:7042 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.06496927 -0.20447333  0.08973682  0.23206915 -0.02865785  0.1232273 ]] probs:[[

DEBUG:chainerrl.agents.a3c:t:7068 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.01672139 -0.05628882  0.10901611  0.13292994 -0.03937187  0.04014454]] probs:[[0.16343449 0.15192728 0.17923664 0.18357454 0.15451929 0.16730782]] entropy:[1.7893084]
DEBUG:chainerrl.agents.a3c:t:7069 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.0161504  -0.05600288  0.10835361  0.13333844 -0.03943947  0.03972652]] probs:[[0.1633696  0.15199716 0.17914908 0.18368147 0.15453571 0.16726698]] entropy:[1.7893112]
DEBUG:chainerrl.agents.a3c:pi_loss:[-1.4524015] v_loss:[[5.144482e-06]]
DEBUG:chainerrl.agents.a3c:grad norm:0.09133413959553671
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:7070 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.02091388 -0.04535713  0.0974623   0.11668388 -0.02699498  0.04146776]] probs:[[0.16420564 0.15367632 0.17726894 0.18070929 0.15652421 0.16761562]] entropy:[1.7899975]
DEBUG:chainerrl.agent

DEBUG:chainerrl.agents.a3c:t:7096 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.02971153  0.06510776  0.14950475  0.13131446 -0.05331901 -0.03539433]] probs:[[0.15524438 0.170685   0.18571565 0.18236797 0.15162235 0.15436465]] entropy:[1.7883779]
DEBUG:chainerrl.agents.a3c:t:7097 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.03007437  0.06519482  0.14922652  0.13161267 -0.05340358 -0.03553019]] probs:[[0.1551993  0.17071225 0.18567745 0.18243559 0.15162054 0.15435487]] entropy:[1.7883712]
DEBUG:chainerrl.agents.a3c:t:7098 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.02469312  0.06318165  0.15379408  0.12727505 -0.05195349 -0.03342577]] probs:[[0.15586533 0.17018178 0.1863226  0.18144645 0.15167378 0.15451014]] entropy:[1.7884622]
DEBUG:chainerrl.agents.a3c:t:7099 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.02556748  0.06321461  0.15298538  0.12790045 -0.05226091 -0.03367614]] probs:[[

DEBUG:chainerrl.agents.a3c:t:7125 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.07499288  0.06000276  0.09659702  0.15487044 -0.07278437  0.0211835 ]] probs:[[0.1494028  0.17099625 0.17736962 0.18801266 0.14973311 0.16448548]] entropy:[1.7882231]
DEBUG:chainerrl.agents.a3c:pi_loss:[-1.5401399] v_loss:[[0.00016526]]
DEBUG:chainerrl.agents.a3c:grad norm:0.33569764120115664
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:7126 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.06418093  0.04631146  0.09629029  0.1365874  -0.04116916  0.01635636]] probs:[[0.1510487  0.1686954  0.17734085 0.1846331  0.1545649  0.16371705]] entropy:[1.7892447]
DEBUG:chainerrl.agents.a3c:t:7127 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.06430826  0.04634751  0.09630987  0.13646105 -0.04130224  0.01627926]] probs:[[0.15103947 0.16871263 0.17735603 0.18462199 0.15455456 0.16371526]] entropy:[1.7892429]
DEBUG:chainerrl.agents.

DEBUG:chainerrl.agents.a3c:t:7153 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.03175087  0.01008403  0.043861    0.14114515 -0.02119328 -0.01018562]] probs:[[0.16628654 0.1627224  0.16831253 0.18550962 0.15771165 0.15945728]] entropy:[1.7902734]
DEBUG:chainerrl.agents.a3c:t:7154 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.03175139  0.01008447  0.04386138  0.1411453  -0.0211928  -0.01018621]] probs:[[0.1662866  0.16272242 0.16831256 0.1855096  0.15771168 0.15945715]] entropy:[1.7902735]
DEBUG:chainerrl.agents.a3c:t:7155 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.03175139  0.01008447  0.04386138  0.1411453  -0.0211928  -0.01018621]] probs:[[0.1662866  0.16272242 0.16831256 0.1855096  0.15771168 0.15945715]] entropy:[1.7902735]
DEBUG:chainerrl.agents.a3c:t:7156 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.03175087  0.01008403  0.043861    0.14114515 -0.02119328 -0.01018562]] probs:[[

DEBUG:chainerrl.agents.a3c:pi_loss:[-1.4886656] v_loss:[[3.8474467e-05]]
DEBUG:chainerrl.agents.a3c:grad norm:0.12092022831490265
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:7182 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.02182607 0.00846952 0.03159602 0.1015845  0.01360158 0.02120322]] probs:[[0.16472398 0.16253845 0.1663412  0.17840025 0.16337475 0.1646214 ]] entropy:[1.7912474]
DEBUG:chainerrl.agents.a3c:t:7183 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.0218184  0.00846366 0.03159291 0.10158341 0.01359577 0.02121182]] probs:[[0.16472311 0.1625379  0.1663411  0.17840049 0.16337422 0.16462322]] entropy:[1.7912471]
DEBUG:chainerrl.agents.a3c:t:7184 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.02115976 0.00796361 0.03132703 0.10149423 0.01310808 0.02195035]] probs:[[0.16464889 0.16249044 0.16633146 0.17842168 0.16332851 0.16477911]] entropy:[1.7912445]
DEBUG:chainerrl.agents.a3c:t:7185 r:0.

DEBUG:chainerrl.agents.a3c:t:7211 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.03033173 0.06988183 0.01635389 0.09409018 0.00925515 0.01083056]] probs:[[0.16523111 0.17189698 0.16293761 0.17610909 0.16178505 0.16204013]] entropy:[1.7912304]
DEBUG:chainerrl.agents.a3c:t:7212 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.03085534 0.06991813 0.01726912 0.09433059 0.00892534 0.0112422 ]] probs:[[0.16526845 0.17185205 0.16303825 0.176099   0.16168356 0.16205859]] entropy:[1.7912322]
DEBUG:chainerrl.agents.a3c:t:7213 r:0.05 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.01486014 0.06144702 0.01204922 0.09634776 0.01087176 0.00766922]] probs:[[0.1634351  0.17122918 0.16297634 0.1773107  0.16278456 0.16226408]] entropy:[1.7911878]
DEBUG:chainerrl.agents.a3c:pi_loss:[-1.4301071] v_loss:[[0.00220959]]
DEBUG:chainerrl.agents.a3c:grad norm:1.4548425095453505
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:7214 r:0.0 a

DEBUG:chainerrl.agents.a3c:t:7239 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.03100621 -0.01953062  0.04956216  0.05758131 -0.01163436  0.00538189]] probs:[[0.16865294 0.16034153 0.17181167 0.17319499 0.16161266 0.16438621]] entropy:[1.7913282]
DEBUG:chainerrl.agents.a3c:t:7240 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.03227757 -0.01797073  0.05090893  0.05851537 -0.01125186  0.0082355 ]] probs:[[0.16863313 0.16036896 0.17180444 0.17311625 0.16145009 0.1646272 ]] entropy:[1.7913309]
DEBUG:chainerrl.agents.a3c:t:7241 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.0383232  -0.01111969  0.05598255  0.06375095 -0.00739394  0.02190241]] probs:[[0.16851032 0.1603813  0.17151253 0.1728501  0.16097994 0.16576584]] entropy:[1.7913457]
DEBUG:chainerrl.agents.a3c:t:7242 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.04936671 0.00218724 0.0589851  0.0765506  0.00031164 0.03919609]] probs:[[0.1685

DEBUG:chainerrl.agents.a3c:t:7268 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.1333906  -0.00665448  0.072267   -0.01411511  0.16023634  0.13828813]] probs:[[0.17528476 0.15237838 0.1648916  0.15124577 0.18005414 0.17614533]] entropy:[1.7893752]
DEBUG:chainerrl.agents.a3c:t:7269 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.1334131  -0.00671103  0.07231507 -0.01432162  0.1600595   0.13826533]] probs:[[0.1752999  0.1523795  0.16491005 0.1512242  0.1800338  0.17615256]] entropy:[1.7893739]
DEBUG:chainerrl.agents.a3c:pi_loss:[-1.6077242] v_loss:[[0.00036664]]
DEBUG:chainerrl.agents.a3c:grad norm:0.5585674638518994
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:7270 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.14142208 -0.05592093  0.00693745  0.07959329  0.0705116   0.21040758]] probs:[[0.17736518 0.14560068 0.15504667 0.16673104 0.1652237  0.19003274]] entropy:[1.7880446]
DEBUG:chainerrl.agents.a

DEBUG:chainerrl.agents.a3c:t:7296 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.14668475 -0.01744959  0.04858232  0.07195345  0.05186488  0.15254067]] probs:[[0.1786165  0.15157896 0.16192587 0.16575482 0.16245827 0.17966554]] entropy:[1.7900181]
DEBUG:chainerrl.agents.a3c:t:7297 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.1462435  -0.0165363   0.04930934  0.07163854  0.0520004   0.15256786]] probs:[[0.17851053 0.15169437 0.16201897 0.16567741 0.16245556 0.17964308]] entropy:[1.79004]
DEBUG:chainerrl.agents.a3c:t:7298 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.14617696 -0.01639608  0.04941681  0.07159535  0.05202329  0.15258525]] probs:[[0.17849395 0.15171164 0.16203211 0.1656659  0.16245499 0.17964146]] entropy:[1.7900434]
DEBUG:chainerrl.agents.a3c:t:7299 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.1466572  -0.01739624  0.04862747  0.07193454  0.05187085  0.15254138]] probs:[[0.

DEBUG:chainerrl.agents.a3c:t:7325 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.12000608 0.03785714 0.06854696 0.04726132 0.04245843 0.14881524]] probs:[[0.17374907 0.16004632 0.16503425 0.16155852 0.16078442 0.17882745]] entropy:[1.7908525]
DEBUG:chainerrl.agents.a3c:pi_loss:[0.56474215] v_loss:[[0.05211798]]
DEBUG:chainerrl.agents.a3c:grad norm:117.36087597716279
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:7326 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.023116   -0.06851175  0.16309527  0.30536258 -0.0944896   0.10937668]] probs:[[0.1570367  0.14328733 0.18063149 0.20824729 0.13961297 0.17118423]] entropy:[1.7819959]
DEBUG:chainerrl.agents.a3c:t:7327 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.02533286 -0.06987819  0.16230428  0.29728723 -0.09198323  0.10559349]] probs:[[0.15769446 0.14337282 0.18084328 0.20697825 0.14023831 0.1708729 ]] entropy:[1.7824254]
DEBUG:chainerrl.agents.a3c:t:7

DEBUG:chainerrl.agents.a3c:t:7353 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.06393409  0.0052396   0.04246215  0.4083346  -0.10460969  0.0246516 ]] probs:[[0.16288586 0.15360051 0.15942566 0.22985536 0.13762133 0.15661134]] entropy:[1.7773294]
DEBUG:chainerrl.agents.a3c:t:7354 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.06373045  0.00546168  0.0423011   0.4082124  -0.10459115  0.02481397]] probs:[[0.16285671 0.15363842 0.15940392 0.22983293 0.13762727 0.15664065]] entropy:[1.777341]
DEBUG:chainerrl.agents.a3c:t:7355 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.05942796  0.00342299  0.04313355  0.40921706 -0.10730003  0.02884987]] probs:[[0.16222036 0.15338492 0.15959848 0.2301531  0.13730814 0.15733503]] entropy:[1.7771897]
DEBUG:chainerrl.agents.a3c:t:7356 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.0554489   0.00101665  0.04371186  0.41016382 -0.10890239  0.03140996]] probs:[[0

DEBUG:chainerrl.agents.a3c:pi_loss:[-1.5861461] v_loss:[[0.00029488]]
DEBUG:chainerrl.agents.a3c:grad norm:0.4888589670058075
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:7382 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.15194155  0.13344906  0.04844486  0.21191926 -0.00296231 -0.03334264]] probs:[[0.17753996 0.17428698 0.16008405 0.18851422 0.15206254 0.14751229]] entropy:[1.787931]
DEBUG:chainerrl.agents.a3c:t:7383 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.15063332  0.13270764  0.04887844  0.21140139 -0.00299504 -0.03334691]] probs:[[0.17737791 0.17422663 0.16021676 0.18849106 0.15211765 0.14756995]] entropy:[1.787964]
DEBUG:chainerrl.agents.a3c:pi_loss:[-1.8249348] v_loss:[[0.0966602]]
DEBUG:chainerrl.agents.a3c:grad norm:153.9245257418183
DEBUG:chainerrl.agents.a3c:update


INFO: outdir:result global_step:15071 local_step:7383 R:1.05
INFO: statistics:[('average_value', 0.3473611268737998), ('average_entropy', 1.7838784408458388)]


DEBUG:chainerrl.agents.a3c:t:7384 r:0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.17855024  0.14475228  0.20240635 -0.08207275  0.09742285 -0.1066661 ]] probs:[[0.18397881 0.17786461 0.1884206  0.14176881 0.16964248 0.13832477]] entropy:[1.7846148]
DEBUG:chainerrl.agents.a3c:t:7385 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.17694184  0.16983286  0.18965891 -0.14590794  0.083056   -0.10437407]] probs:[[0.18535773 0.1840447  0.18772998 0.13421431 0.16874722 0.13990614]] entropy:[1.7829206]
DEBUG:chainerrl.agents.a3c:t:7386 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.17639881  0.17020619  0.18469289 -0.15268293  0.07885155 -0.10554463]] probs:[[0.18576606 0.18461923 0.18731321 0.13367431 0.16850086 0.14012636]] entropy:[1.7827911]
DEBUG:chainerrl.agents.a3c:t:7387 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.17619665  0.1693307   0.18322583 -0.1532298   0.07721326 -0.10631153]] probs:[[0.

DEBUG:chainerrl.agents.a3c:t:7413 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.07777946  0.08017582  0.16186854 -0.09203551  0.03025804 -0.0019454 ]] probs:[[0.1720902  0.17250308 0.18718696 0.14521332 0.16410351 0.15890299]] entropy:[1.7887094]
DEBUG:chainerrl.agents.a3c:t:7414 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.07805941  0.07945057  0.16217616 -0.09159882  0.03046613 -0.00190894]] probs:[[0.17212391 0.17236353 0.1872288  0.14526454 0.16412385 0.15889542]] entropy:[1.7887148]
DEBUG:chainerrl.agents.a3c:t:7415 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.07731551  0.08143848  0.16134289 -0.09282973  0.02988211 -0.00198526]] probs:[[0.17203507 0.17274582 0.18711543 0.14511886 0.16406536 0.15891947]] entropy:[1.7886978]
DEBUG:chainerrl.agents.a3c:pi_loss:[-1.5254819] v_loss:[[0.0001055]]
DEBUG:chainerrl.agents.a3c:grad norm:0.2030810019387542
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3

DEBUG:chainerrl.agents.a3c:t:7441 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.02810245  0.08016388  0.12993054 -0.02803858  0.02854969  0.01665264]] probs:[[0.16406617 0.17283393 0.18165293 0.15510911 0.16413955 0.16219835]] entropy:[1.7904819]
DEBUG:chainerrl.agents.a3c:t:7442 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.02889265  0.08026307  0.13066515 -0.02984352  0.02918037  0.01737309]] probs:[[0.16415957 0.17281286 0.18174623 0.15479517 0.1642068  0.16227937]] entropy:[1.7904564]
DEBUG:chainerrl.agents.a3c:t:7443 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.03021019  0.07872185  0.13155137 -0.02977107  0.02947373  0.01744963]] probs:[[0.16434588 0.17251512 0.18187405 0.15477802 0.16422488 0.16226207]] entropy:[1.790457]
DEBUG:chainerrl.agents.a3c:t:7444 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.02636536  0.07988435  0.12522349 -0.02892155  0.02724682  0.01051741]] probs:[[0

DEBUG:chainerrl.agents.a3c:t:7470 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.00939613 0.08499961 0.09789107 0.00523135 0.02832148 0.02057224]] probs:[[0.16136186 0.17403437 0.17629246 0.16069122 0.16444476 0.16317536]] entropy:[1.7910808]
DEBUG:chainerrl.agents.a3c:t:7471 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.01020097 0.08423819 0.09854849 0.00384032 0.02894679 0.0204811 ]] probs:[[0.16149533 0.17390573 0.17641227 0.16047136 0.16455123 0.16316408]] entropy:[1.7910768]
DEBUG:chainerrl.agents.a3c:pi_loss:[-1.4781004] v_loss:[[2.8577877e-05]]
DEBUG:chainerrl.agents.a3c:grad norm:0.06748811688520738
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:7472 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.01693152 0.08346371 0.09600139 0.00862192 0.01747528 0.02151097]] probs:[[0.16265626 0.17384626 0.17603962 0.16131026 0.16274475 0.16340286]] entropy:[1.791134]
DEBUG:chainerrl.agents.a3c:t:7473 r:0.0

DEBUG:chainerrl.agents.a3c:t:7498 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.04098459 0.0687737  0.10115416 0.0069078  0.02382444 0.02821373]] probs:[[0.16592033 0.17059577 0.17621015 0.16036154 0.1630974  0.16381486]] entropy:[1.79126]
DEBUG:chainerrl.agents.a3c:t:7499 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.04099694 0.06692656 0.09650409 0.00448307 0.02095592 0.0243343 ]] probs:[[0.16635826 0.17072827 0.17585342 0.16039343 0.16305745 0.16360925]] entropy:[1.7912743]
DEBUG:chainerrl.agents.a3c:t:7500 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.0405256  0.06576135 0.09357854 0.00317659 0.0192585  0.02218478]] probs:[[0.16655102 0.17080754 0.17562564 0.16044524 0.16304636 0.16352418]] entropy:[1.7912849]
DEBUG:chainerrl.agents.a3c:t:7501 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.04092982 0.06703395 0.09637655 0.00451548 0.02089687 0.02397379]] probs:[[0.16636017 0.17076002 0.17

DEBUG:chainerrl.agents.a3c:t:7527 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.02506959  0.20550022 -0.0574831   0.19142452 -0.15320651  0.02171511]] probs:[[0.15632892 0.19686773 0.151343   0.19411609 0.1375277  0.16381651]] entropy:[1.7832401]
DEBUG:chainerrl.agents.a3c:pi_loss:[-1.1292226] v_loss:[[0.00151922]]
DEBUG:chainerrl.agents.a3c:grad norm:4.036623885601014
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:7528 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.01094932  0.15583102 -0.04894345  0.16174708 -0.07377139  0.00954534]] probs:[[0.15891846 0.1877613  0.15299374 0.1888754  0.149242   0.16220903]] entropy:[1.7872951]
DEBUG:chainerrl.agents.a3c:t:7529 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.01096797  0.15577985 -0.0489598   0.16174197 -0.07367241  0.00955306]] probs:[[0.15891549 0.18775171 0.15299125 0.18887445 0.14925678 0.1622103 ]] entropy:[1.7872978]
DEBUG:chainerrl.agents.a3

DEBUG:chainerrl.agents.a3c:t:7555 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.02675331  0.06004411 -0.07406791  0.16062698 -0.06695328  0.13915409]] probs:[[0.1564551  0.17064178 0.14922488 0.18869829 0.15029036 0.18468958]] entropy:[1.7872773]
DEBUG:chainerrl.agents.a3c:t:7556 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.02675251  0.06004477 -0.07406722  0.16062605 -0.06695331  0.13915439]] probs:[[0.15645519 0.17064185 0.14922495 0.18869807 0.15029031 0.1846896 ]] entropy:[1.7872775]
DEBUG:chainerrl.agents.a3c:t:7557 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.02674236  0.06005203 -0.07406062  0.16061611 -0.06695472  0.1391578 ]] probs:[[0.15645641 0.1706427  0.1492256  0.18869576 0.15028976 0.18468979]] entropy:[1.7872779]
DEBUG:chainerrl.agents.a3c:t:7558 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.02674993  0.06004513 -0.07406764  0.16062397 -0.06694863  0.13915075]] probs:[[

DEBUG:chainerrl.agents.a3c:pi_loss:[-1.667505] v_loss:[[0.00067068]]
DEBUG:chainerrl.agents.a3c:grad norm:1.0461586151640656
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:7584 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.11874786  0.14560308  0.07636079  0.00703401 -0.10484213 -0.02500102]] probs:[[0.18031773 0.18522581 0.1728343  0.16125818 0.14418979 0.15617412]] entropy:[1.7880905]
DEBUG:chainerrl.agents.a3c:t:7585 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.11882947  0.14559472  0.07639549  0.00701375 -0.104701   -0.02501062]] probs:[[0.18032618 0.18521783 0.1728343  0.1612493  0.14420514 0.1561672 ]] entropy:[1.788092]
DEBUG:chainerrl.agents.a3c:t:7586 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.11932331  0.14571579  0.07674198  0.00630851 -0.10404091 -0.02493652]] probs:[[0.18038559 0.1852098  0.17286576 0.16110913 0.14427663 0.1561531 ]] entropy:[1.788092]
DEBUG:chainerrl.agents.a3c:

DEBUG:chainerrl.agents.a3c:t:7612 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.15881841  0.18357341  0.1107452   0.02510094 -0.01453119  0.11435656]] probs:[[0.1769811  0.18141694 0.16867432 0.15482965 0.14881343 0.16928457]] entropy:[1.7893568]
DEBUG:chainerrl.agents.a3c:t:7613 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.16121867  0.18400784  0.11087869  0.02488144 -0.01381589  0.11823229]] probs:[[0.17718378 0.181268   0.16848513 0.15460141 0.14873302 0.16972867]] entropy:[1.7893257]
DEBUG:chainerrl.agents.a3c:t:7614 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.1615712   0.18406533  0.1108953   0.02482657 -0.01373677  0.11878926]] probs:[[0.17721547 0.18124695 0.16845867 0.1545661  0.14871897 0.16979375]] entropy:[1.7893203]
DEBUG:chainerrl.agents.a3c:t:7615 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.16161713  0.18407433  0.11089595  0.02482438 -0.01372218  0.11886296]] probs:[[

DEBUG:chainerrl.agents.a3c:t:7641 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.14077516  0.1246942   0.10070987 -0.03671866  0.1967246   0.0773261 ]] probs:[[0.17306408 0.1703033  0.16626729 0.14491801 0.18302292 0.16242443]] entropy:[1.7892814]
DEBUG:chainerrl.agents.a3c:t:7642 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.14052503  0.12484963  0.10060027 -0.0361866   0.1971093   0.07716444]] probs:[[0.17300585 0.17031507 0.16623472 0.14498262 0.18307753 0.16238415]] entropy:[1.7892861]
DEBUG:chainerrl.agents.a3c:t:7643 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.14050217  0.12485851  0.10059211 -0.03613894  0.19714543  0.07714503]] probs:[[0.17300078 0.17031547 0.16623227 0.1449886  0.18308297 0.16237997]] entropy:[1.7892865]
DEBUG:chainerrl.agents.a3c:t:7644 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.14042695  0.12476978  0.10060239 -0.03599616  0.1972903   0.07697848]] probs:[[

DEBUG:chainerrl.agents.a3c:t:7670 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.15478158  0.11789476  0.14107051 -0.04401336  0.1731805   0.06222202]] probs:[[0.17543706 0.16908364 0.17304803 0.1438089  0.17869478 0.15992753]] entropy:[1.789188]
DEBUG:chainerrl.agents.a3c:t:7671 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.1548014   0.11787665  0.14107026 -0.04401504  0.1731662   0.06221741]] probs:[[0.17544109 0.1690811  0.17304854 0.14380912 0.17869279 0.1599273 ]] entropy:[1.7891879]
DEBUG:chainerrl.agents.a3c:pi_loss:[-1.7772802] v_loss:[[0.01330488]]
DEBUG:chainerrl.agents.a3c:grad norm:18.177985290890057
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:7672 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.17478034 0.12637481 0.16006318 0.04485453 0.01130613 0.06753935]] probs:[[0.17973341 0.17124052 0.1771076  0.1578348  0.15262751 0.16145617]] entropy:[1.7899483]
DEBUG:chainerrl.agents.a3c:t:76

DEBUG:chainerrl.agents.a3c:t:7699 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.15368563 0.12785126 0.13403468 0.05732988 0.04526901 0.07278286]] probs:[[0.17597222 0.17148432 0.17254797 0.15980758 0.15789174 0.16229625]] entropy:[1.7908969]
DEBUG:chainerrl.agents.a3c:t:7700 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.15368827 0.12784936 0.13403252 0.05732815 0.04527043 0.07278519]] probs:[[0.17597266 0.17148396 0.17254755 0.15980726 0.15789193 0.1622966 ]] entropy:[1.7908969]
DEBUG:chainerrl.agents.a3c:t:7701 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.15368827 0.12784936 0.13403252 0.05732815 0.04527043 0.07278519]] probs:[[0.17597266 0.17148396 0.17254755 0.15980726 0.15789193 0.1622966 ]] entropy:[1.7908969]
DEBUG:chainerrl.agents.a3c:t:7702 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.15368563 0.12785126 0.13403468 0.05732988 0.04526901 0.07278286]] probs:[[0.17597222 0.17148432 0.

DEBUG:chainerrl.agents.a3c:pi_loss:[-1.483887] v_loss:[[3.1746174e-05]]
DEBUG:chainerrl.agents.a3c:grad norm:0.10096789520693364
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:7728 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.12949818 0.09179401 0.23624921 0.03090928 0.04718936 0.05914697]] probs:[[0.17138477 0.16504316 0.1906925  0.15529433 0.15784323 0.15974198]] entropy:[1.7892504]
DEBUG:chainerrl.agents.a3c:t:7729 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.1294979  0.09178737 0.23625079 0.03090923 0.04719167 0.05914078]] probs:[[0.17138499 0.16504233 0.19069311 0.15529457 0.15784384 0.15974125]] entropy:[1.7892505]
DEBUG:chainerrl.agents.a3c:t:7730 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.12949689 0.0917642  0.23625708 0.03090928 0.0471996  0.05911927]] probs:[[0.17138568 0.16503932 0.19069526 0.15529536 0.15784588 0.15973862]] entropy:[1.7892501]
DEBUG:chainerrl.agents.a3c:t:7731 r:0.0

DEBUG:chainerrl.agents.a3c:t:7757 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.06283177  0.14185414  0.24651805  0.09730369  0.03532051  0.12543616]] probs:[[0.14137001 0.17348073 0.19262214 0.16592172 0.15594961 0.17065579]] entropy:[1.7872722]
DEBUG:chainerrl.agents.a3c:t:7758 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.06283406  0.14183883  0.24652018  0.09730324  0.03532194  0.12541851]] probs:[[0.14137045 0.17347902 0.1926236  0.16592254 0.15595067 0.1706537 ]] entropy:[1.7872721]
DEBUG:chainerrl.agents.a3c:t:7759 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.06284438  0.14178795  0.24653162  0.09730318  0.03533521  0.12537412]] probs:[[0.14137094 0.17347257 0.19262844 0.1659248  0.15595488 0.17064846]] entropy:[1.7872725]
DEBUG:chainerrl.agents.a3c:pi_loss:[-1.4476823] v_loss:[[7.5137254e-06]]
DEBUG:chainerrl.agents.a3c:grad norm:0.17108949732064221
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agen

DEBUG:chainerrl.agents.a3c:t:7785 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.01056624  0.33447385 -0.02778449  0.05915752 -0.11992789  0.35939866]] probs:[[0.14681533 0.20731008 0.14430906 0.15741716 0.13160616 0.21254218]] entropy:[1.7743181]
DEBUG:chainerrl.agents.a3c:t:7786 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.01050646  0.33452633 -0.02774075  0.05922743 -0.11997587  0.35945752]] probs:[[0.14681777 0.207312   0.14430913 0.15742135 0.13159417 0.2125455 ]] entropy:[1.7743145]
DEBUG:chainerrl.agents.a3c:t:7787 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.01052165  0.3344945  -0.02774674  0.0592241  -0.11998291  0.35940877]] probs:[[0.1468187  0.20730987 0.14431138 0.15742423 0.13159609 0.21253973]] entropy:[1.7743173]
DEBUG:chainerrl.agents.a3c:t:7788 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.01052162  0.3344945  -0.02774714  0.05922418 -0.11998378  0.3594094 ]] probs:[[

DEBUG:chainerrl.agents.a3c:t:7814 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.06829437  0.26521313  0.01699838  0.06278391 -0.05394496  0.24174963]] probs:[[0.16034658 0.19524522 0.15232883 0.15946542 0.14189655 0.19071741]] entropy:[1.7849271]
DEBUG:chainerrl.agents.a3c:pi_loss:[-5.695124] v_loss:[[0.21535829]]
DEBUG:chainerrl.agents.a3c:grad norm:282.7612067355501
DEBUG:chainerrl.agents.a3c:update


INFO: outdir:result global_step:15946 local_step:7814 R:0.75
INFO: statistics:[('average_value', 0.34346647346843895), ('average_entropy', 1.7854507767909467)]


DEBUG:chainerrl.agents.a3c:t:7815 r:0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.03102444  0.20049675 -0.08787121  0.12742428 -0.05306451  0.10316782]] probs:[[0.16212872 0.19207057 0.14395417 0.17853603 0.14905296 0.17425749]] entropy:[1.7866807]
DEBUG:chainerrl.agents.a3c:t:7816 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.00296397  0.23892868 -0.15118977  0.10671506 -0.07079606  0.12719062]] probs:[[0.15888752 0.20117219 0.13618888 0.17625777 0.14758974 0.17990395]] entropy:[1.7833337]
DEBUG:chainerrl.agents.a3c:t:7817 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.00163641  0.2466763  -0.15989311  0.10313819 -0.07267207  0.13025266]] probs:[[0.15826887 0.20287864 0.13510315 0.17575128 0.14741616 0.18058188]] entropy:[1.7827045]
DEBUG:chainerrl.agents.a3c:t:7818 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.00229361  0.24765836 -0.16107969  0.10265455 -0.0728932   0.1306529 ]] probs:[[0.

DEBUG:chainerrl.agents.a3c:t:7844 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.01534088  0.17842838 -0.08604351  0.08605627 -0.03490331  0.09509764]] probs:[[0.16160254 0.19022876 0.14602172 0.17344408 0.15368356 0.17501935]] entropy:[1.7878935]
DEBUG:chainerrl.agents.a3c:t:7845 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.01543508  0.1784131  -0.08604681  0.08588781 -0.03496014  0.09515897]] probs:[[0.16162024 0.19022878 0.1460235  0.17341752 0.1536772  0.1750328 ]] entropy:[1.787894]
DEBUG:chainerrl.agents.a3c:t:7846 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.01543505  0.1784131  -0.08604687  0.08588775 -0.03496011  0.09515897]] probs:[[0.16162024 0.19022878 0.14602348 0.17341751 0.1536772  0.1750328 ]] entropy:[1.787894]
DEBUG:chainerrl.agents.a3c:pi_loss:[-1.3931212] v_loss:[[1.7331486e-05]]
DEBUG:chainerrl.agents.a3c:grad norm:0.17537605109471183
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents

DEBUG:chainerrl.agents.a3c:t:7872 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.00191104  0.10925017 -0.05078473  0.10034751  0.03468153  0.06552574]] probs:[[0.1596317  0.17771985 0.15143758 0.17614469 0.16494958 0.17011659]] entropy:[1.7902232]
DEBUG:chainerrl.agents.a3c:t:7873 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.00428387  0.10946987 -0.04981665  0.10026424  0.03506297  0.06486012]] probs:[[0.15993096 0.17767006 0.1515085  0.176042   0.16493003 0.16991843]] entropy:[1.7902553]
DEBUG:chainerrl.agents.a3c:t:7874 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.00964037  0.11025764 -0.04756995  0.0999467   0.03608902  0.06333322]] probs:[[0.16059819 0.17759804 0.1516682  0.17577626 0.16490248 0.16945687]] entropy:[1.7903234]
DEBUG:chainerrl.agents.a3c:t:7875 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.00191601  0.10925321 -0.05078279  0.1003453   0.03468213  0.06552439]] probs:[[

DEBUG:chainerrl.agents.a3c:t:7900 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 5.7458509e-02  2.1271096e-01 -7.8038901e-02  3.5120953e-02
  -1.2862950e-04  3.3703011e-02]] probs:[[0.16835845 0.19663471 0.1470243  0.16463941 0.15893704 0.16440614]] entropy:[1.7877896]
DEBUG:chainerrl.agents.a3c:t:7901 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 5.7458509e-02  2.1271093e-01 -7.8038827e-02  3.5120953e-02
  -1.2865930e-04  3.3703115e-02]] probs:[[0.16835845 0.19663471 0.14702432 0.16463941 0.15893704 0.16440615]] entropy:[1.7877896]
DEBUG:chainerrl.agents.a3c:t:7902 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 5.7458643e-02  2.1271126e-01 -7.8039125e-02  3.5120610e-02
  -1.2834638e-04  3.3703867e-02]] probs:[[0.16835843 0.19663472 0.14702423 0.16463932 0.15893705 0.16440624]] entropy:[1.7877893]
DEBUG:chainerrl.agents.a3c:pi_loss:[-1.3251916] v_loss:[[0.00012019]]
DEBUG:chainerrl.agents.a3c:grad norm:0.470796383352885

DEBUG:chainerrl.agents.a3c:t:7928 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.02933505  0.29524553 -0.00227094 -0.01811939 -0.03779371  0.06670946]] probs:[[0.1535026  0.2123632  0.15771373 0.15523392 0.15220964 0.16897689]] entropy:[1.7841575]
DEBUG:chainerrl.agents.a3c:t:7929 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.02933459  0.2952459  -0.00227092 -0.01811976 -0.03779335  0.0667099 ]] probs:[[0.15350264 0.21236324 0.15771371 0.15523383 0.15220967 0.16897695]] entropy:[1.7841578]
DEBUG:chainerrl.agents.a3c:t:7930 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.02934098  0.29524142 -0.00227139 -0.01811526 -0.03780005  0.06671554]] probs:[[0.15350187 0.21236257 0.15771385 0.15523474 0.15220885 0.16897812]] entropy:[1.7841578]
DEBUG:chainerrl.agents.a3c:t:7931 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.02934283  0.29524013 -0.0022715  -0.01811386 -0.03780207  0.06671719]] probs:[[

DEBUG:chainerrl.agents.a3c:t:7957 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.00046055 0.18173733 0.03483814 0.00963567 0.00032877 0.05219019]] probs:[[0.15883355 0.19040123 0.1643888  0.16029756 0.15881261 0.16726618]] entropy:[1.7896293]
DEBUG:chainerrl.agents.a3c:t:7958 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.00022509  0.1814401   0.03469323  0.01030094 -0.00035012  0.05271911]] probs:[[0.15874086 0.19036405 0.16438174 0.1604206  0.15872101 0.16737173]] entropy:[1.7896297]
DEBUG:chainerrl.agents.a3c:pi_loss:[-1.5186833] v_loss:[[9.3532784e-05]]
DEBUG:chainerrl.agents.a3c:grad norm:0.1346801673128406
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:7959 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.00601612 0.16354635 0.04559979 0.00239558 0.00743247 0.05561196]] probs:[[0.15975352 0.18701005 0.16620398 0.15917617 0.15997995 0.1678764 ]] entropy:[1.7901082]
DEBUG:chainerrl.agents.a3c:t:7960

DEBUG:chainerrl.agents.a3c:t:7986 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.03909135  0.13578905 -0.02381097 -0.00178037 -0.0080909   0.1261428 ]] probs:[[0.16541554 0.18220973 0.155331   0.15879102 0.15779212 0.18046054]] entropy:[1.7896717]
DEBUG:chainerrl.agents.a3c:t:7987 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.04280309  0.13803838 -0.02361114 -0.00481098 -0.00376085  0.12248108]] probs:[[0.16593093 0.18251035 0.15526873 0.15821542 0.15838166 0.17969295]] entropy:[1.789705]
DEBUG:chainerrl.agents.a3c:t:7988 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.0397355   0.13609387 -0.02375288 -0.00232909 -0.00745079  0.12549984]] probs:[[0.16551068 0.18225269 0.1553293  0.15869294 0.15788224 0.1803321 ]] entropy:[1.7896786]
DEBUG:chainerrl.agents.a3c:t:7989 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.04353116  0.13871545 -0.02360944 -0.00547784 -0.00274202  0.12151785]] probs:[[0

DEBUG:chainerrl.agents.a3c:pi_loss:[-0.48434097] v_loss:[[0.0258913]]
DEBUG:chainerrl.agents.a3c:grad norm:37.06403800709743
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:8015 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.20126697  0.00189565  0.06021016  0.01996618 -0.06738036  0.05922969]] probs:[[0.1940289  0.15895733 0.16850245 0.16185588 0.14831816 0.16833733]] entropy:[1.7883084]
DEBUG:chainerrl.agents.a3c:t:8016 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.2012667   0.0018953   0.06021023  0.01996685 -0.06737977  0.05922938]] probs:[[0.19402882 0.15895726 0.16850245 0.16185597 0.14831823 0.16833726]] entropy:[1.7883084]
DEBUG:chainerrl.agents.a3c:t:8017 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.20126678  0.00189525  0.06021019  0.01996678 -0.06737978  0.05922952]] probs:[[0.19402884 0.15895724 0.16850244 0.16185597 0.14831823 0.16833727]] entropy:[1.7883083]
DEBUG:chainerrl.agents.a3

DEBUG:chainerrl.agents.a3c:t:8043 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.35436597 -0.09721415  0.07093236 -0.05896362  0.06566563 -0.00815983]] probs:[[0.22239956 0.14158432 0.16750982 0.1471049  0.16662991 0.15477149]] entropy:[1.7797118]
DEBUG:chainerrl.agents.a3c:t:8044 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.35865948 -0.0939684   0.06960887 -0.06416788  0.07016942 -0.01262758]] probs:[[0.22324613 0.14197443 0.16720562 0.14626901 0.16729936 0.15400542]] entropy:[1.7793638]
DEBUG:chainerrl.agents.a3c:t:8045 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.3587009  -0.0937591   0.06957796 -0.06415901  0.07017425 -0.01251328]] probs:[[0.22324345 0.14199656 0.1671915  0.1462625  0.16729122 0.1540148 ]] entropy:[1.7793682]
DEBUG:chainerrl.agents.a3c:t:8046 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.3587084  -0.0937494   0.06958891 -0.06416101  0.0701526  -0.0124968 ]] probs:[[

DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:8071 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.18756929 -0.0182723   0.0312741   0.00423995  0.06562117  0.04045226]] probs:[[0.19047074 0.15503594 0.1629109  0.15856574 0.16860361 0.164413  ]] entropy:[1.7894502]
DEBUG:chainerrl.agents.a3c:t:8072 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.18396564 -0.01867111  0.03163045  0.00666989  0.06369755  0.04478395]] probs:[[0.18976916 0.15496072 0.16295487 0.15893777 0.16826504 0.16511245]] entropy:[1.789568]
DEBUG:chainerrl.agents.a3c:t:8073 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.18379237 -0.01957969  0.03221005  0.00674666  0.06127823  0.04636712]] probs:[[0.18977651 0.15485281 0.1630839  0.15898365 0.16789402 0.1654091 ]] entropy:[1.7895696]
DEBUG:chainerrl.agents.a3c:t:8074 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.18382117 -0.01984103  0.03240693  0.00666874  

DEBUG:chainerrl.agents.a3c:t:8100 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.12658484  0.16287497  0.05226688 -0.02686878 -0.02698748  0.02151795]] probs:[[0.1791801  0.185802   0.16634658 0.15369004 0.1536718  0.16130945]] entropy:[1.7891159]
DEBUG:chainerrl.agents.a3c:t:8101 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.14231822  0.15657389  0.06575834 -0.03490014 -0.0286974   0.01739186]] probs:[[0.18169852 0.18430729 0.16830687 0.1521901  0.15313703 0.16036019]] entropy:[1.7888577]
DEBUG:chainerrl.agents.a3c:t:8102 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.14510854  0.15623777  0.06842062 -0.03546482 -0.02917424  0.01644534]] probs:[[0.1820999  0.18413785 0.16865708 0.15201543 0.15297471 0.16011499]] entropy:[1.7887962]
DEBUG:chainerrl.agents.a3c:pi_loss:[-1.5405471] v_loss:[[0.00028927]]
DEBUG:chainerrl.agents.a3c:grad norm:1.370366743810246
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3

DEBUG:chainerrl.agents.a3c:t:8128 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.10456301 0.13789453 0.09052607 0.01180135 0.06606402 0.05265521]] probs:[[0.17114455 0.17694518 0.16875899 0.15598297 0.16468087 0.16248743]] entropy:[1.7909613]
DEBUG:chainerrl.agents.a3c:t:8129 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.10189091 0.13776791 0.09640173 0.02185177 0.07597059 0.05536801]] probs:[[0.16997871 0.17618775 0.16904822 0.15690398 0.16562942 0.16225193]] entropy:[1.791089]
DEBUG:chainerrl.agents.a3c:t:8130 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.09994531 0.13580841 0.0948389  0.02154516 0.07561594 0.04942981]] probs:[[0.16998945 0.17619644 0.16912363 0.1571713  0.16590361 0.16161563]] entropy:[1.7910864]
DEBUG:chainerrl.agents.a3c:t:8131 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.09989142 0.13581176 0.0949622  0.02175719 0.07582705 0.04947576]] probs:[[0.16996531 0.1761815  0.1

DEBUG:chainerrl.agents.a3c:t:8157 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.10494772 0.10849238 0.07996884 0.04111069 0.09425223 0.04127349]] probs:[[0.17109485 0.1717024  0.16687404 0.16051398 0.16927466 0.16054012]] entropy:[1.7913764]
DEBUG:chainerrl.agents.a3c:t:8158 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.10954726 0.10866001 0.08155342 0.03976907 0.09401183 0.04106644]] probs:[[0.17174742 0.1715951  0.16700621 0.16017175 0.16909987 0.16037968]] entropy:[1.7913443]
DEBUG:chainerrl.agents.a3c:pi_loss:[-1.4990247] v_loss:[[6.3893196e-05]]
DEBUG:chainerrl.agents.a3c:grad norm:0.09982654553232437
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:8159 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.10580898 0.10303356 0.07817151 0.04816284 0.09165763 0.04302368]] probs:[[0.17126083 0.17078619 0.16659243 0.16166748 0.16885434 0.16083878]] entropy:[1.7914532]
DEBUG:chainerrl.agents.a3c:t:8160 r:0.

DEBUG:chainerrl.agents.a3c:t:8186 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.09666811 0.11069401 0.07008404 0.0624938  0.08015737 0.05545597]] probs:[[0.1695621  0.17195712 0.16511384 0.16386533 0.16678548 0.16271612]] entropy:[1.7915733]
DEBUG:chainerrl.agents.a3c:t:8187 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.09667236 0.11076771 0.06995118 0.06239937 0.08017592 0.05541754]] probs:[[0.16956744 0.17197448 0.16509639 0.16385432 0.16679312 0.16271429]] entropy:[1.7915722]
DEBUG:chainerrl.agents.a3c:t:8188 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.09667093 0.11076774 0.06995194 0.06239866 0.08017385 0.05541487]] probs:[[0.16956736 0.17197464 0.16509669 0.16385436 0.16679294 0.16271402]] entropy:[1.7915723]
DEBUG:chainerrl.agents.a3c:t:8189 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.10298426 0.122134   0.05051357 0.04744893 0.08121178 0.04891822]] probs:[[0.17123206 0.17454271 0.

DEBUG:chainerrl.agents.a3c:pi_loss:[-1.4738824] v_loss:[[3.0769745e-05]]
DEBUG:chainerrl.agents.a3c:grad norm:0.25655249695966786
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:8215 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.13397224 0.05106326 0.08108716 0.10888025 0.06641566 0.0727422 ]] probs:[[0.17484272 0.16093133 0.1658364  0.17051014 0.1634211  0.16445826]] entropy:[1.7913697]
DEBUG:chainerrl.agents.a3c:t:8216 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.1168179  0.09167765 0.09048775 0.09538204 0.0591954  0.10513055]] probs:[[0.17063782 0.1664014  0.16620353 0.16701896 0.16108316 0.16865511]] entropy:[1.791605]
DEBUG:chainerrl.agents.a3c:t:8217 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.10889962 0.08819394 0.08275012 0.09064238 0.0516494  0.09105475]] probs:[[0.1705822  0.1670865  0.16617939 0.16749611 0.16109063 0.16756518]] entropy:[1.7916138]
DEBUG:chainerrl.agents.a3c:t:8218 r:0.0

DEBUG:chainerrl.agents.a3c:t:8244 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.09627501 0.09251282 0.10392463 0.07087191 0.06995355 0.14936276]] probs:[[0.1664618  0.1658367  0.16774005 0.1622864  0.16213743 0.17553765]] entropy:[1.7914016]
DEBUG:chainerrl.agents.a3c:t:8245 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.0968538  0.09240853 0.10412952 0.07053133 0.06996666 0.14953506]] probs:[[0.16654308 0.1658044  0.16775921 0.16221645 0.16212487 0.175552  ]] entropy:[1.7913985]
DEBUG:chainerrl.agents.a3c:t:8246 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.09631755 0.09250161 0.10394118 0.07084306 0.06995188 0.14937292]] probs:[[0.16646807 0.16583405 0.16774201 0.16228095 0.16213638 0.17553858]] entropy:[1.7914014]
DEBUG:chainerrl.agents.a3c:pi_loss:[-1.4384904] v_loss:[[3.4511754e-07]]
DEBUG:chainerrl.agents.a3c:grad norm:0.01572940756217946
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:8247 r:0.

DEBUG:chainerrl.agents.a3c:t:8273 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.13846748 0.11874304 0.09973995 0.10841143 0.02764822 0.07585143]] probs:[[0.17399544 0.17059709 0.16738585 0.16884363 0.1557434  0.16343461]] entropy:[1.7911414]
DEBUG:chainerrl.agents.a3c:t:8274 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.14066768 0.11954952 0.10065506 0.1098934  0.03011326 0.07814841]] probs:[[0.1740854  0.17044757 0.1672573  0.16880964 0.1558652  0.16353494]] entropy:[1.791152]
DEBUG:chainerrl.agents.a3c:t:8275 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.14320005 0.12041081 0.10157274 0.11112824 0.03240616 0.08035626]] probs:[[0.17423588 0.17031008 0.1671318  0.16873647 0.15596259 0.1636232 ]] entropy:[1.7911578]
DEBUG:chainerrl.agents.a3c:t:8276 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.1434511  0.12048268 0.10166033 0.11126    0.03264777 0.08057123]] probs:[[0.17425075 0.1702941  0.1

DEBUG:chainerrl.agents.a3c:t:8302 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.05971937 0.17125708 0.01083144 0.01403282 0.09950004 0.04602534]] probs:[[0.16521807 0.18471313 0.15733518 0.15783966 0.17192304 0.16297099]] entropy:[1.790182]
DEBUG:chainerrl.agents.a3c:pi_loss:[-1.4160656] v_loss:[[9.838053e-06]]
DEBUG:chainerrl.agents.a3c:grad norm:0.04843280794718326
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:8303 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.05937217 0.16611898 0.01810297 0.02499708 0.07794488 0.05231073]] probs:[[0.1652854  0.18390521 0.15860304 0.15970024 0.16838387 0.16412234]] entropy:[1.7905172]
DEBUG:chainerrl.agents.a3c:t:8304 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.05938971 0.16608906 0.01809183 0.02498296 0.07793746 0.05228605]] probs:[[0.16529025 0.18390189 0.15860315 0.15969987 0.16838463 0.16412026]] entropy:[1.7905177]
DEBUG:chainerrl.agents.a3c:t:8305 r:0.0 

DEBUG:chainerrl.agents.a3c:t:8331 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.0627147  0.11007468 0.00672262 0.02413388 0.04378227 0.03395639]] probs:[[0.16923058 0.17743817 0.16001542 0.16282588 0.16605678 0.1644331 ]] entropy:[1.7912036]
DEBUG:chainerrl.agents.a3c:t:8332 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.06270213 0.11005589 0.00670255 0.02412937 0.04377127 0.03390043]] probs:[[0.16923194 0.17743847 0.16001548 0.16282848 0.16605836 0.1644273 ]] entropy:[1.791204]
DEBUG:chainerrl.agents.a3c:t:8333 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.06270301 0.11005573 0.00669525 0.02412325 0.04376318 0.0339144 ]] probs:[[0.16923226 0.17743863 0.16001448 0.16282766 0.1660572  0.16442975]] entropy:[1.7912037]
DEBUG:chainerrl.agents.a3c:t:8334 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.06270248 0.11005647 0.00670324 0.02412955 0.04377161 0.0339022 ]] probs:[[0.16923188 0.17743845 0.1

DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:8359 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.07568868  0.11335771 -0.10018121  0.07368755  0.08844307  0.02275537]] probs:[[0.17133865 0.1779159  0.14370635 0.17099611 0.17353795 0.16250497]] entropy:[1.7894156]
DEBUG:chainerrl.agents.a3c:t:8360 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.07568801  0.1133577  -0.10018142  0.07368831  0.08844225  0.02275678]] probs:[[0.17133853 0.1779159  0.14370632 0.17099623 0.17353782 0.16250521]] entropy:[1.7894156]
DEBUG:chainerrl.agents.a3c:t:8361 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.07561514  0.11337564 -0.10020985  0.07371017  0.08843301  0.02279149]] probs:[[0.17132701 0.1779201  0.14370306 0.17100094 0.1735372  0.16251177]] entropy:[1.7894156]
DEBUG:chainerrl.agents.a3c:t:8362 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.07556228  0.11339784 -0.10022865  0.07371369 

DEBUG:chainerrl.agents.a3c:t:8388 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.064364    0.08409006 -0.0578988   0.08158578  0.06522867  0.03573349]] probs:[[0.16963963 0.17301919 0.15011679 0.17258644 0.16978638 0.16485164]] entropy:[1.7906204]
DEBUG:chainerrl.agents.a3c:t:8389 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.06500827  0.08460255 -0.05750821  0.08134709  0.0654569   0.03678449]] probs:[[0.1696764  0.17303388 0.15011124 0.17247148 0.16975254 0.16495444]] entropy:[1.7906243]
DEBUG:chainerrl.agents.a3c:t:8390 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.06674489  0.08688639 -0.05593025  0.08205034  0.06685723  0.04267916]] probs:[[0.16958772 0.1730381  0.15000898 0.17220329 0.16960676 0.16555518]] entropy:[1.7906318]
DEBUG:chainerrl.agents.a3c:pi_loss:[-1.3999847] v_loss:[[2.0163983e-05]]
DEBUG:chainerrl.agents.a3c:grad norm:0.0864441536458953
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agent

DEBUG:chainerrl.agents.a3c:t:8416 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.06029051  0.05705658 -0.01586249  0.06768525  0.04915845  0.05425521]] probs:[[0.16909657 0.16855061 0.15669748 0.17035164 0.16722463 0.16807911]] entropy:[1.7913797]
DEBUG:chainerrl.agents.a3c:t:8417 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.0601412   0.0569843  -0.01591981  0.06783766  0.04915654  0.05425797]] probs:[[0.16907477 0.16854185 0.15669169 0.17038107 0.1672277  0.16808298]] entropy:[1.791379]
DEBUG:chainerrl.agents.a3c:t:8418 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.06028026  0.0570516  -0.01586646  0.06769578  0.04915834  0.05425541]] probs:[[0.16909507 0.16855    0.15669706 0.17035365 0.16722482 0.16807936]] entropy:[1.7913795]
DEBUG:chainerrl.agents.a3c:t:8419 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.06014045  0.05698397 -0.01592006  0.06783831  0.04915658  0.05425796]] probs:[[0

DEBUG:chainerrl.agents.a3c:t:8445 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.05828311 0.05666663 0.00574166 0.04841491 0.05279144 0.06047727]] probs:[[0.16851754 0.16824538 0.15989198 0.16686277 0.16759464 0.1688877 ]] entropy:[1.7915847]
DEBUG:chainerrl.agents.a3c:t:8446 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.05834676 0.05669464 0.00576124 0.04835823 0.05279911 0.06047069]] probs:[[0.16852671 0.16824852 0.15989362 0.16685176 0.16759437 0.16888504]] entropy:[1.7915847]
DEBUG:chainerrl.agents.a3c:pi_loss:[-1.4236764] v_loss:[[1.1504928e-06]]
DEBUG:chainerrl.agents.a3c:grad norm:0.00823543745546502
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:8447 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.062474   0.05232964 0.01162005 0.04963127 0.05460671 0.05218403]] probs:[[0.16921933 0.16751139 0.16082901 0.16705999 0.16789326 0.167487  ]] entropy:[1.7916275]
DEBUG:chainerrl.agents.a3c:t:8448 r:0.

DEBUG:chainerrl.agents.a3c:t:8474 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.05314853 0.04508034 0.02348386 0.05340889 0.05688353 0.0506551 ]] probs:[[0.16766575 0.16631843 0.16276506 0.16770941 0.16829315 0.1672482 ]] entropy:[1.7916979]
DEBUG:chainerrl.agents.a3c:t:8475 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.05315023 0.04507993 0.02348293 0.05340973 0.05688326 0.05065554]] probs:[[0.167666   0.16631833 0.16276486 0.16770951 0.16829307 0.16724825]] entropy:[1.7916977]
DEBUG:chainerrl.agents.a3c:t:8476 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.05315321 0.04507974 0.0234821  0.05341089 0.05688286 0.05065746]] probs:[[0.16766635 0.16631816 0.16276458 0.16770956 0.16829287 0.16724843]] entropy:[1.791698]
DEBUG:chainerrl.agents.a3c:t:8477 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.0532021  0.04507044 0.02345409 0.05343771 0.05687697 0.05069025]] probs:[[0.1676727  0.16631477 0.1

DEBUG:chainerrl.agents.a3c:pi_loss:[-1.3977432] v_loss:[[1.32306795e-05]]
DEBUG:chainerrl.agents.a3c:grad norm:0.021159290700258293
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:8503 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.06097572 0.03023982 0.03880687 0.04257036 0.06332695 0.05286516]] probs:[[0.16880925 0.16369967 0.16510811 0.16573067 0.16920662 0.16744564]] entropy:[1.7916882]
DEBUG:chainerrl.agents.a3c:t:8504 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.06124311 0.03008226 0.03911679 0.04235507 0.06327073 0.05296906]] probs:[[0.16884717 0.16366687 0.16515224 0.16568792 0.16918987 0.16745588]] entropy:[1.7916874]
DEBUG:chainerrl.agents.a3c:t:8505 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.06125475 0.02997854 0.03916664 0.04233694 0.06325515 0.05292344]] probs:[[0.16885254 0.1636532  0.16516379 0.16568823 0.16919065 0.1674516 ]] entropy:[1.7916874]
DEBUG:chainerrl.agents.a3c:t:8506 r:

DEBUG:chainerrl.agents.a3c:t:8531 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.04052115  0.05041049  0.14082174 -0.02220324  0.10132641  0.0633916 ]] probs:[[0.15210485 0.16658434 0.18234728 0.15491678 0.17528579 0.1687609 ]] entropy:[1.7897317]
DEBUG:chainerrl.agents.a3c:t:8532 r:0.2 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.04394897  0.05204372  0.1356964  -0.01802601  0.10372693  0.06158126]] probs:[[0.15164742 0.16692606 0.18149056 0.15562996 0.17578016 0.16852574]] entropy:[1.7897918]
DEBUG:chainerrl.agents.a3c:t:8533 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.04401555  0.05206692  0.13560031 -0.01797783  0.10376333  0.06154685]] probs:[[0.15163969 0.16693252 0.18147595 0.1556399  0.1757893  0.16852257]] entropy:[1.7897925]
DEBUG:chainerrl.agents.a3c:t:8534 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.055136    0.04542735  0.12201323 -0.03602321  0.10373922  0.05814795]] probs:[[

DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:8559 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.03624968  0.00484667  0.17105108 -0.02515578  0.05641526  0.0291796 ]] probs:[[0.1550827  0.1615888  0.1908064  0.15681276 0.17014031 0.16556896]] entropy:[1.7892629]
DEBUG:chainerrl.agents.a3c:t:8560 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.03623705  0.00484532  0.17105561 -0.02516395  0.05639465  0.02918182]] probs:[[0.15508497 0.16158889 0.19080763 0.15681177 0.17013714 0.16556965]] entropy:[1.7892629]
DEBUG:chainerrl.agents.a3c:t:8561 r:0.25 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.03623667  0.00484556  0.17105418 -0.02516845  0.05638411  0.02917944]] probs:[[0.15508547 0.16158941 0.19080792 0.15681154 0.17013586 0.16556975]] entropy:[1.7892628]
DEBUG:chainerrl.agents.a3c:t:8562 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.03650998  0.00493792  0.17119245 -0.02522619

DEBUG:chainerrl.agents.a3c:t:8588 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.03187473 -0.00504552  0.11548994 -0.07467742 -0.03092901  0.12290801]] probs:[[0.15841785 0.16272561 0.18357089 0.15178022 0.15856774 0.18493769]] entropy:[1.7888228]
DEBUG:chainerrl.agents.a3c:t:8589 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.03186543 -0.00504109  0.11546507 -0.0746909  -0.03096297  0.12290844]] probs:[[0.15842088 0.1627279  0.1835681  0.15177965 0.15856391 0.18493956]] entropy:[1.7888229]
DEBUG:chainerrl.agents.a3c:t:8590 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.03188994 -0.00502905  0.11545819 -0.07468367 -0.03091559  0.12291001]] probs:[[0.15841608 0.16272895 0.1835658  0.15177988 0.15857051 0.1849388 ]] entropy:[1.7888234]
DEBUG:chainerrl.agents.a3c:pi_loss:[-1.5468585] v_loss:[[0.0001597]]
DEBUG:chainerrl.agents.a3c:grad norm:0.4037810746718518
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3

INFO: outdir:result global_step:17542 local_step:8591 R:1.8
INFO: statistics:[('average_value', 0.25209307306440815), ('average_entropy', 1.7875999815961712)]


DEBUG:chainerrl.agents.a3c:t:8592 r:0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.10936019 -0.12604976  0.23616186  0.00813972  0.01075853  0.08526995]] probs:[[0.17508617 0.13836129 0.19875638 0.15823127 0.1586462  0.1709187 ]] entropy:[1.7856225]
DEBUG:chainerrl.agents.a3c:t:8593 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.11466965 -0.14317185  0.23523934 -0.05218098  0.01272124  0.11097433]] probs:[[0.17709865 0.13684726 0.19979195 0.1498832  0.15993357 0.17644541]] entropy:[1.7842095]
DEBUG:chainerrl.agents.a3c:t:8594 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.11438985 -0.14393166  0.23561473 -0.06018586  0.01272143  0.11473321]] probs:[[0.17715698 0.13682663 0.19998875 0.14877878 0.16003104 0.17721781]] entropy:[1.7840031]
DEBUG:chainerrl.agents.a3c:t:8595 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.11443748 -0.1440849   0.23569238 -0.06127712  0.01280507  0.11526219]] probs:[[0.

DEBUG:chainerrl.agents.a3c:t:8621 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.0647831  -0.18436019  0.19239521 -0.0195431   0.16570176  0.06358817]] probs:[[0.16835855 0.13123015 0.19127421 0.15474363 0.18623598 0.16815749]] entropy:[1.7844007]
DEBUG:chainerrl.agents.a3c:t:8622 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.06481694 -0.18439378  0.19242063 -0.01957119  0.16568483  0.0636068 ]] probs:[[0.16836394 0.1312255  0.19127873 0.154739   0.18623249 0.16816032]] entropy:[1.784399]
DEBUG:chainerrl.agents.a3c:t:8623 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.06482128 -0.18439807  0.19242388 -0.01957479  0.16568273  0.06360914]] probs:[[0.16836464 0.13122492 0.19127932 0.15473843 0.18623208 0.16816069]] entropy:[1.7843988]
DEBUG:chainerrl.agents.a3c:pi_loss:[-1.5294446] v_loss:[[0.00011861]]
DEBUG:chainerrl.agents.a3c:grad norm:0.27887888741285294
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a

DEBUG:chainerrl.agents.a3c:t:8649 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.04398303 -0.05520194  0.07789607  0.01138581  0.13408966  0.06802661]] probs:[[0.16593038 0.15026243 0.17165409 0.16060872 0.18157612 0.16996828]] entropy:[1.7900571]
DEBUG:chainerrl.agents.a3c:t:8650 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.0440481  -0.05516783  0.07792275  0.01140238  0.13411759  0.06799939]] probs:[[0.16593726 0.15026401 0.17165461 0.16060759 0.1815769  0.16995965]] entropy:[1.7900574]
DEBUG:chainerrl.agents.a3c:t:8651 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.04400042 -0.05514515  0.07789665  0.01136076  0.13410196  0.06801782]] probs:[[0.1659319  0.15026973 0.17165278 0.16060337 0.18157685 0.16996539]] entropy:[1.7900577]
DEBUG:chainerrl.agents.a3c:t:8652 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.04397774 -0.05519293  0.07789489  0.0113891   0.13409153  0.0680317 ]] probs:[[

DEBUG:chainerrl.agents.a3c:t:8678 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.03329615 -0.00223893  0.06891987  0.02363881  0.09045926  0.07239906]] probs:[[0.1641915  0.15845938 0.17014603 0.16261347 0.17385063 0.17073904]] entropy:[1.7912478]
DEBUG:chainerrl.agents.a3c:t:8679 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.03371485 -0.00219185  0.06910451  0.02361861  0.0905472   0.07234459]] probs:[[0.16424212 0.15844935 0.17015867 0.16259223 0.17384672 0.17071089]] entropy:[1.791248]
DEBUG:chainerrl.agents.a3c:pi_loss:[-1.5195147] v_loss:[[9.115557e-05]]
DEBUG:chainerrl.agents.a3c:grad norm:0.16389202416620996
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:8680 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.02871107 0.01254401 0.05994347 0.02640911 0.08770873 0.07023945]] probs:[[0.16349047 0.16086857 0.16867726 0.16311456 0.17342624 0.17042291]] entropy:[1.7913995]
DEBUG:chainerrl.agents.a3c:t

DEBUG:chainerrl.agents.a3c:t:8707 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.03577424 0.02442473 0.04941701 0.03307997 0.06991281 0.06988917]] probs:[[0.1647666  0.16290714 0.16702986 0.16432326 0.1704886  0.17048457]] entropy:[1.791602]
DEBUG:chainerrl.agents.a3c:t:8708 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.03569868 0.02433494 0.04942517 0.03306217 0.07005212 0.06991179]] probs:[[0.1647543  0.16289267 0.1670314  0.1643205  0.17051253 0.17048861]] entropy:[1.791601]
DEBUG:chainerrl.agents.a3c:t:8709 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.03569981 0.02433547 0.0494254  0.03306168 0.07005323 0.06991093]] probs:[[0.16475445 0.16289271 0.16703139 0.16432038 0.17051266 0.1704884 ]] entropy:[1.791601]
DEBUG:chainerrl.agents.a3c:t:8710 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.0356996  0.02433549 0.04942532 0.0330615  0.07005327 0.06991093]] probs:[[0.16475444 0.16289274 0.167

DEBUG:chainerrl.agents.a3c:pi_loss:[-1.5885844] v_loss:[[0.00034097]]
DEBUG:chainerrl.agents.a3c:grad norm:0.531997370352446
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:8736 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.01849093  0.02617581  0.09248969 -0.01495395  0.13675763  0.10707884]] probs:[[0.15459922 0.16166121 0.17274503 0.155147   0.18056388 0.17528372]] entropy:[1.7899358]
DEBUG:chainerrl.agents.a3c:t:8737 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.01177413  0.01898412  0.08753254 -0.01964907  0.13390009  0.09638404]] probs:[[0.15627831 0.16115986 0.17259455 0.15505247 0.18078578 0.17412904]] entropy:[1.7900717]
DEBUG:chainerrl.agents.a3c:t:8738 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.01289912  0.02033651  0.08849746 -0.01885213  0.1344692   0.09817866]] probs:[[0.15598588 0.16125728 0.17263198 0.15506005 0.18075342 0.17431138]] entropy:[1.7900497]
DEBUG:chainerrl.agents.a3

DEBUG:chainerrl.agents.a3c:t:8764 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.01038519  0.05868577  0.06049802 -0.00161187  0.08108595  0.06874578]] probs:[[0.16071208 0.1686651  0.16897103 0.15879554 0.17248586 0.17037043]] entropy:[1.7912939]
DEBUG:chainerrl.agents.a3c:t:8765 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.01106438  0.05768807  0.05986693 -0.00196453  0.08071825  0.06783141]] probs:[[0.16089217 0.16857117 0.16893888 0.15880951 0.17249845 0.17028975]] entropy:[1.7913038]
DEBUG:chainerrl.agents.a3c:t:8766 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.0110644   0.05768807  0.05986691 -0.00196456  0.08071823  0.06783138]] probs:[[0.16089217 0.16857117 0.16893888 0.15880951 0.17249845 0.17028975]] entropy:[1.7913038]
DEBUG:chainerrl.agents.a3c:t:8767 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.01106444  0.05768809  0.05986689 -0.00196456  0.08071831  0.06783134]] probs:[[

DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:8792 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.04647083 -0.02574748  0.0696277  -0.1212802   0.2330994   0.23454213]] probs:[[0.1488317  0.15194817 0.16715387 0.13810398 0.19683905 0.19712324]] entropy:[1.7822909]
DEBUG:chainerrl.agents.a3c:t:8793 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.04647068 -0.02574766  0.0696278  -0.12128032  0.23309934  0.23454209]] probs:[[0.14883174 0.15194815 0.16715388 0.13810396 0.19683905 0.19712324]] entropy:[1.7822909]
DEBUG:chainerrl.agents.a3c:t:8794 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.04647083 -0.02574739  0.06962778 -0.12128023  0.23309937  0.23454224]] probs:[[0.14883171 0.15194818 0.16715388 0.13810396 0.19683905 0.19712326]] entropy:[1.7822909]
DEBUG:chainerrl.agents.a3c:t:8795 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.04647066 -0.02574756  0.06962792 -0.12128033 

DEBUG:chainerrl.agents.a3c:t:8821 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.03258472  0.00162323  0.08008135 -0.05437753  0.11590042  0.16719638]] probs:[[0.16218378 0.15723927 0.17007284 0.14867578 0.1762751  0.18555324]] entropy:[1.7890807]
DEBUG:chainerrl.agents.a3c:t:8822 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.03035889  0.00099632  0.0801369  -0.05499187  0.11612086  0.16729753]] probs:[[0.16190143 0.15721671 0.17016453 0.14865631 0.17639922 0.18566173]] entropy:[1.7890484]
DEBUG:chainerrl.agents.a3c:t:8823 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 2.6805958e-02 -1.4865515e-04  8.0356963e-02 -5.5945143e-02
   1.1635889e-01  1.6752498e-01]] probs:[[0.16145222 0.15715846 0.17033383 0.14862972 0.17657791 0.18584783]] entropy:[1.7889946]
DEBUG:chainerrl.agents.a3c:pi_loss:[-1.6383742] v_loss:[[0.00052595]]
DEBUG:chainerrl.agents.a3c:grad norm:1.397801875719105
DEBUG:chainerrl.agents.a3c:update
DEBUG

DEBUG:chainerrl.agents.a3c:t:8849 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.01226155  0.04095111  0.0908484  -0.02031239  0.07199611  0.13173978]] probs:[[0.15955892 0.1642029  0.17260404 0.1544452  0.16938053 0.17980835]] entropy:[1.7904963]
DEBUG:chainerrl.agents.a3c:t:8850 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.01226155  0.04095111  0.0908484  -0.02031239  0.07199611  0.13173978]] probs:[[0.15955892 0.1642029  0.17260404 0.1544452  0.16938053 0.17980835]] entropy:[1.7904963]
DEBUG:chainerrl.agents.a3c:t:8851 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.01226155  0.04095111  0.09084839 -0.02031238  0.07199611  0.13173981]] probs:[[0.15955894 0.1642029  0.17260404 0.15444522 0.16938055 0.17980836]] entropy:[1.7904965]
DEBUG:chainerrl.agents.a3c:t:8852 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.01226155  0.04095111  0.09084839 -0.02031238  0.07199611  0.13173981]] probs:[[

DEBUG:chainerrl.agents.a3c:t:8878 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.03933087 0.04770961 0.06720737 0.01043241 0.0735385  0.08919594]] probs:[[0.16409242 0.16547309 0.1687311  0.15941827 0.16980276 0.17248236]] entropy:[1.7914329]
DEBUG:chainerrl.agents.a3c:t:8879 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.03933087 0.04770961 0.06720737 0.01043241 0.0735385  0.08919594]] probs:[[0.16409242 0.16547309 0.1687311  0.15941827 0.16980276 0.17248236]] entropy:[1.7914329]
DEBUG:chainerrl.agents.a3c:pi_loss:[-1.512973] v_loss:[[7.547928e-05]]
DEBUG:chainerrl.agents.a3c:grad norm:0.13759669455252305
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:8880 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.0317589  0.04771028 0.06908389 0.02032888 0.07828395 0.07928699]] probs:[[0.1628918  0.16551098 0.16908664 0.16104054 0.17064941 0.17082067]] entropy:[1.7915003]
DEBUG:chainerrl.agents.a3c:t:8881 r:0.0 

DEBUG:chainerrl.agents.a3c:t:8907 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.19930561  0.06977755 -0.01585288 -0.05341025  0.19765884 -0.03845619]] probs:[[0.19053581 0.1673876  0.15365067 0.14798698 0.1902223  0.15021661]] entropy:[1.7860833]
DEBUG:chainerrl.agents.a3c:t:8908 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.19930506  0.06977753 -0.01585296 -0.05341028  0.19765887 -0.03845604]] probs:[[0.19053572 0.16738762 0.15365067 0.147987   0.19022232 0.15021665]] entropy:[1.7860835]
DEBUG:chainerrl.agents.a3c:t:8909 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.19930515  0.06977749 -0.01585293 -0.05341025  0.19765882 -0.03845608]] probs:[[0.19053574 0.1673876  0.15365067 0.147987   0.19022231 0.15021665]] entropy:[1.7860835]
DEBUG:chainerrl.agents.a3c:t:8910 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.19930506  0.06977751 -0.01585301 -0.05341026  0.19765888 -0.03845606]] probs:[[

DEBUG:chainerrl.agents.a3c:pi_loss:[-1.5853324] v_loss:[[0.0002884]]
DEBUG:chainerrl.agents.a3c:grad norm:0.6748002504107501
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:8936 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.02123979  0.12134084 -0.00170838  0.03164601  0.09440226  0.11058327]] probs:[[0.1540647  0.17767452 0.15710337 0.16243184 0.17295212 0.17577341]] entropy:[1.7902193]
DEBUG:chainerrl.agents.a3c:t:8937 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.02172083  0.12095702 -0.00183221  0.03200575  0.09373199  0.11120596]] probs:[[0.15400751 0.17762582 0.15710117 0.16250812 0.17285518 0.17590219]] entropy:[1.7902164]
DEBUG:chainerrl.agents.a3c:t:8938 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.0217872   0.12090405 -0.00184917  0.03205537  0.09363952  0.11129191]] probs:[[0.1539996  0.17761908 0.15710086 0.16251862 0.1728418  0.17591996]] entropy:[1.7902157]
DEBUG:chainerrl.agents.a3

DEBUG:chainerrl.agents.a3c:t:8965 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.03426261 0.14506659 0.00134072 0.09198284 0.00205545 0.08350135]] probs:[[0.16225901 0.18127187 0.15700413 0.17190023 0.15711638 0.17044841]] entropy:[1.79039]
DEBUG:chainerrl.agents.a3c:t:8966 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.03426264 0.14506662 0.00134071 0.09198281 0.00205543 0.08350132]] probs:[[0.16225901 0.18127187 0.15700412 0.17190021 0.15711637 0.17044841]] entropy:[1.79039]
DEBUG:chainerrl.agents.a3c:t:8967 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.03426264 0.14506665 0.00134074 0.09198279 0.00205547 0.08350128]] probs:[[0.16225901 0.18127187 0.15700412 0.1719002  0.15711638 0.1704484 ]] entropy:[1.7903899]
DEBUG:chainerrl.agents.a3c:pi_loss:[-1.5373935] v_loss:[[0.00022836]]
DEBUG:chainerrl.agents.a3c:grad norm:0.29441461595707563
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:8968 r:0.0 a:3 p

DEBUG:chainerrl.agents.a3c:t:8994 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.047608   0.08724175 0.01724527 0.06313113 0.01731878 0.11089414]] probs:[[0.16497122 0.17164093 0.1600375  0.16755205 0.16004927 0.17574905]] entropy:[1.7911651]
DEBUG:chainerrl.agents.a3c:t:8995 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.04760486 0.08722889 0.01721553 0.06312048 0.0173038  0.11086167]] probs:[[0.16497354 0.17164169 0.16003552 0.16755316 0.16004965 0.17574638]] entropy:[1.7911651]
DEBUG:chainerrl.agents.a3c:t:8996 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.0367252  0.09918154 0.02573711 0.0673741  0.02102239 0.12605979]] probs:[[0.16227154 0.17272961 0.16049825 0.16732198 0.15974332 0.17743523]] entropy:[1.7909886]
DEBUG:chainerrl.agents.a3c:t:8997 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.03786023 0.09968127 0.02752727 0.06752104 0.02167741 0.12760456]] probs:[[0.16229974 0.1726499  0.

DEBUG:chainerrl.agents.a3c:t:9023 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.13605615 -0.05446839  0.04597629  0.06473981  0.05815965  0.10618901]] probs:[[0.17962398 0.14846379 0.16415085 0.16725996 0.16616298 0.17433846]] entropy:[1.7900413]
DEBUG:chainerrl.agents.a3c:pi_loss:[-1.4968717] v_loss:[[5.3241198e-05]]
DEBUG:chainerrl.agents.a3c:grad norm:0.26901661594364434
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:9024 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.10359241 -0.03246707  0.05201497  0.06920056  0.05928775  0.10354668]] probs:[[0.1740544  0.15191306 0.1653047  0.16817011 0.16651131 0.17404644]] entropy:[1.7907515]
DEBUG:chainerrl.agents.a3c:t:9025 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.1034911  -0.03250941  0.05232102  0.06921057  0.05921425  0.10377636]] probs:[[0.17402703 0.15189813 0.16534604 0.16816239 0.16648975 0.17407668]] entropy:[1.7907504]
DEBUG:chainerrl.agen

DEBUG:chainerrl.agents.a3c:t:9051 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.07027456  0.02119656 -0.10448097  0.09846695  0.50209194 -0.10618497]] probs:[[0.14320175 0.15691836 0.13838616 0.16952425 0.2538189  0.13815056]] entropy:[1.7649589]
DEBUG:chainerrl.agents.a3c:t:9052 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.07027511  0.02119717 -0.10448098  0.0984669   0.5020919  -0.10618499]] probs:[[0.14320168 0.15691845 0.13838618 0.16952425 0.2538189  0.13815057]] entropy:[1.7649589]
DEBUG:chainerrl.agents.a3c:t:9053 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.07026593  0.02119785 -0.10447229  0.09846836  0.5020963  -0.10617825]] probs:[[0.14320229 0.15691781 0.1383867  0.16952367 0.25381878 0.13815081]] entropy:[1.7649591]
DEBUG:chainerrl.agents.a3c:t:9054 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.07026682  0.02119745 -0.10447274  0.09846843  0.5020958  -0.10617886]] probs:[[

DEBUG:chainerrl.agents.a3c:pi_loss:[-0.88319844] v_loss:[[0.01158437]]
DEBUG:chainerrl.agents.a3c:grad norm:11.27035669755205
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:9080 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.01750204  0.02038042 -0.00054226  0.06895043  0.3957127  -0.01799174]] probs:[[0.15021345 0.15601307 0.15278277 0.16377766 0.22707312 0.15013993]] entropy:[1.7792811]
DEBUG:chainerrl.agents.a3c:t:9081 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.01748018  0.02037451 -0.00051989  0.06891114  0.39570525 -0.01799732]] probs:[[0.15021724 0.15601265 0.15278669 0.16377176 0.22707216 0.15013957]] entropy:[1.7792821]
DEBUG:chainerrl.agents.a3c:t:9082 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.01755397  0.02032419 -0.00051041  0.06891608  0.3956843  -0.01795637]] probs:[[0.15020843 0.15600717 0.15279047 0.16377506 0.22707087 0.15014799]] entropy:[1.7792823]
DEBUG:chainerrl.agents.a

DEBUG:chainerrl.agents.a3c:t:9108 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.00538974 0.01000474 0.0606317  0.04452046 0.24438436 0.03107018]] probs:[[0.15631562 0.15703869 0.16519377 0.16255361 0.19851641 0.16038187]] entropy:[1.7881038]
DEBUG:chainerrl.agents.a3c:t:9109 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.00551888 0.00997357 0.06027016 0.04361128 0.24399017 0.03114462]] probs:[[0.15637621 0.15707438 0.16517673 0.16244787 0.19848947 0.16043526]] entropy:[1.7881136]
DEBUG:chainerrl.agents.a3c:t:9110 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.00554701 0.00997139 0.06006447 0.04347554 0.24392    0.03114727]] probs:[[0.15639085 0.15708432 0.16515356 0.16243646 0.19848853 0.16044618]] entropy:[1.7881153]
DEBUG:chainerrl.agents.a3c:t:9111 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.00555929 0.01006965 0.06053434 0.04399243 0.24428304 0.03105195]] probs:[[0.15635592 0.15706274 0.

DEBUG:chainerrl.agents.a3c:t:9137 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.00802453 0.03287987 0.05629349 0.0527238  0.18543848 0.07696349]] probs:[[0.15659674 0.16053778 0.1643409  0.1637553  0.18699612 0.16777317]] entropy:[1.7900971]
DEBUG:chainerrl.agents.a3c:t:9138 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.00826205 0.03295729 0.056235   0.05230004 0.18575339 0.07689003]] probs:[[0.15663123 0.16054744 0.16432846 0.1636831  0.1870518  0.16775796]] entropy:[1.7900918]
DEBUG:chainerrl.agents.a3c:t:9139 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.0099976  0.03360188 0.05585898 0.04926214 0.18836738 0.07646462]] probs:[[0.15686637 0.16061313 0.16422799 0.16314816 0.18749723 0.16764712]] entropy:[1.7900454]
DEBUG:chainerrl.agents.a3c:t:9140 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.0102127  0.03396238 0.05577157 0.04884251 0.18903351 0.07656369]] probs:[[0.15687652 0.16064689 0.

DEBUG:chainerrl.agents.a3c:t:9166 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.02838917 0.05396749 0.07441992 0.06066342 0.165847   0.04671545]] probs:[[0.15944731 0.16357832 0.16695833 0.16467729 0.1829424  0.16239633]] entropy:[1.7907345]
DEBUG:chainerrl.agents.a3c:t:9167 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.02841529 0.05380513 0.0744     0.06055403 0.16564474 0.0466197 ]] probs:[[0.15946685 0.16356753 0.1669711  0.16467515 0.18292303 0.16239643]] entropy:[1.7907373]
DEBUG:chainerrl.agents.a3c:pi_loss:[-1.5272557] v_loss:[[0.00010768]]
DEBUG:chainerrl.agents.a3c:grad norm:0.2080821560994126
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:9168 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.02014592 0.05762652 0.07538134 0.06377497 0.15204094 0.05742506]] probs:[[0.1582642  0.16430862 0.16725194 0.16532198 0.18057767 0.16427553]] entropy:[1.7909384]
DEBUG:chainerrl.agents.a3c:t:9169 r:0.0 a:

DEBUG:chainerrl.agents.a3c:t:9195 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.00363405 0.07719686 0.06716131 0.04548491 0.12018782 0.10827319]] probs:[[0.15579744 0.16769041 0.16601595 0.16245605 0.17505679 0.17298344]] entropy:[1.7910131]
DEBUG:chainerrl.agents.a3c:t:9196 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.00269469 0.07682029 0.0655265  0.04350785 0.11847119 0.1083144 ]] probs:[[0.15582171 0.16781095 0.16592638 0.16231285 0.17494802 0.1731801 ]] entropy:[1.7910078]
DEBUG:chainerrl.agents.a3c:t:9197 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.00283038 0.07686966 0.0657519  0.04376145 0.11869437 0.10831943]] probs:[[0.15581979 0.1677944  0.16593924 0.16232999 0.1749612  0.17315535]] entropy:[1.7910087]
DEBUG:chainerrl.agents.a3c:t:9198 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.00381794 0.07726566 0.06738514 0.04569925 0.12038659 0.10826438]] probs:[[0.15580343 0.16767757 0.

DEBUG:chainerrl.agents.a3c:pi_loss:[-1.6183573] v_loss:[[0.00044261]]
DEBUG:chainerrl.agents.a3c:grad norm:1.1606065563757422
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:9224 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.00119603 -0.03268282 -0.03915112  0.07554323  0.18969604  0.21735342]] probs:[[0.15465225 0.14985861 0.14889242 0.1669874  0.18718007 0.19242924]] entropy:[1.786345]
DEBUG:chainerrl.agents.a3c:t:9225 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.00168464 -0.03110753 -0.0346527   0.08034419  0.19429706  0.21631213]] probs:[[0.15466222 0.14967276 0.14914308 0.16731915 0.18751444 0.19168836]] entropy:[1.7864189]
DEBUG:chainerrl.agents.a3c:t:9226 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.00152943 -0.03121788 -0.03494465  0.08004462  0.19397679  0.21642508]] probs:[[0.15466489 0.14968207 0.14912528 0.16729788 0.18748675 0.1917431 ]] entropy:[1.7864138]
DEBUG:chainerrl.agents.a3

DEBUG:chainerrl.agents.a3c:t:9252 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.01910943  0.0335731  -0.00109562  0.0843861   0.1310239   0.13788216]] probs:[[0.15856527 0.16087537 0.15539359 0.16926117 0.17734212 0.17856255]] entropy:[1.7902975]
DEBUG:chainerrl.agents.a3c:t:9253 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.01687363  0.03175924 -0.00207666  0.08496989  0.12954956  0.13778466]] probs:[[0.158366   0.160741   0.15539317 0.16952577 0.17725416 0.1787199 ]] entropy:[1.7902731]
DEBUG:chainerrl.agents.a3c:t:9254 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.00690425  0.02039385 -0.00376637  0.087199    0.12295111  0.13859463]] probs:[[0.15747096 0.15960957 0.15579958 0.17063653 0.17684752 0.1796358 ]] entropy:[1.7901434]
DEBUG:chainerrl.agents.a3c:t:9255 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.01969341  0.03421913 -0.00086611  0.08429227  0.13158092  0.13798767]] probs:[[

DEBUG:chainerrl.agents.a3c:t:9280 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.09864986 -0.0653896   0.12266175  0.13392587 -0.03664335  0.13694674]] probs:[[0.17177953 0.14579074 0.17595418 0.17794737 0.1500425  0.17848574]] entropy:[1.7884209]
DEBUG:chainerrl.agents.a3c:t:9281 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.09969328 -0.06820154  0.11775912  0.13315867 -0.04531018  0.140236  ]] probs:[[0.17229202 0.14566304 0.1754329  0.1781554  0.14903592 0.17942074]] entropy:[1.7882258]
DEBUG:chainerrl.agents.a3c:t:9282 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.10729541 -0.07268872  0.09994837  0.12569599 -0.07886145  0.1483214 ]] probs:[[0.1748693  0.14606543 0.17358923 0.17811678 0.14516658 0.18219268]] entropy:[1.787553]
DEBUG:chainerrl.agents.a3c:t:9283 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.11096859 -0.07353622  0.08494426  0.12239709 -0.10901459  0.14965847]] probs:[[0

DEBUG:chainerrl.agents.a3c:t:9309 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.02059239 -0.04841289  0.1626066   0.12641738 -0.00389678  0.12213751]] probs:[[0.15922983 0.14861266 0.18352719 0.17700423 0.15537779 0.17624828]] entropy:[1.7887745]
DEBUG:chainerrl.agents.a3c:t:9310 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.02059239 -0.04841292  0.16260675  0.12641744 -0.00389679  0.12213755]] probs:[[0.15922983 0.14861266 0.18352722 0.17700425 0.15537779 0.17624831]] entropy:[1.7887746]
DEBUG:chainerrl.agents.a3c:t:9311 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.02015181 -0.0483942   0.16215436  0.12626514 -0.00381606  0.12201844]] probs:[[0.15918925 0.14864305 0.18347828 0.17701015 0.15541919 0.17626004]] entropy:[1.7887827]
DEBUG:chainerrl.agents.a3c:pi_loss:[-1.548268] v_loss:[[0.0001683]]
DEBUG:chainerrl.agents.a3c:grad norm:0.3617421398619221
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c

INFO: outdir:result global_step:19046 local_step:9334 R:1.1500000000000001
INFO: statistics:[('average_value', 0.34445195915711563), ('average_entropy', 1.7872330264493461)]


DEBUG:chainerrl.agents.a3c:t:9335 r:0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.085632   -0.11221209  0.26790872  0.26501432 -0.23534359  0.11502533]] probs:[[0.1674427  0.13738637 0.20092222 0.20034151 0.12146978 0.17243744]] entropy:[1.7756544]
DEBUG:chainerrl.agents.a3c:t:9336 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.07174037 -0.12224966  0.27446204  0.25453362 -0.26585594  0.13648212]] probs:[[0.16585588 0.13660988 0.20312895 0.19912098 0.1183354  0.1769489 ]] entropy:[1.7740539]
DEBUG:chainerrl.agents.a3c:t:9337 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.06992703 -0.1223869   0.27477875  0.2525436  -0.27138183  0.14046918]] probs:[[0.16565412 0.13667257 0.20331444 0.1988436  0.11775346 0.17776175]] entropy:[1.7738265]
DEBUG:chainerrl.agents.a3c:t:9338 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.06964489 -0.12246653  0.2748821   0.25225696 -0.27216128  0.14106295]] probs:[[0.

DEBUG:chainerrl.agents.a3c:t:9364 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.05282665 -0.06490964  0.19497538  0.2115963  -0.1533463   0.11218701]] probs:[[0.16424145 0.14599922 0.18932903 0.19250214 0.13364202 0.17428607]] entropy:[1.7833382]
DEBUG:chainerrl.agents.a3c:t:9365 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.0528268  -0.06490939  0.19497535  0.21159638 -0.15334618  0.11218745]] probs:[[0.16424145 0.14599924 0.18932898 0.19250213 0.133642   0.17428611]] entropy:[1.7833381]
DEBUG:chainerrl.agents.a3c:t:9366 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.05281681 -0.06493441  0.19497836  0.21159129 -0.1533513   0.11215281]] probs:[[0.16424185 0.1459974  0.1893319  0.19250354 0.13364299 0.17428224]] entropy:[1.7833377]
DEBUG:chainerrl.agents.a3c:pi_loss:[-1.4203703] v_loss:[[5.125572e-07]]
DEBUG:chainerrl.agents.a3c:grad norm:0.303003546030996
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.

DEBUG:chainerrl.agents.a3c:t:9392 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.24807647  0.17013709  0.04439138  0.05086709 -0.10857511 -0.0316943 ]] probs:[[0.19929324 0.18435034 0.16256732 0.16362348 0.13950853 0.15065712]] entropy:[1.7846322]
DEBUG:chainerrl.agents.a3c:t:9393 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.24809678  0.17021231  0.04437516  0.05086507 -0.10855034 -0.03162084]] probs:[[0.19929142 0.18435878 0.1625599  0.16361834 0.13950787 0.15066375]] entropy:[1.7846318]
DEBUG:chainerrl.agents.a3c:t:9394 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.24810776  0.17026253  0.04436341  0.05086428 -0.10853404 -0.03157127]] probs:[[0.1992898  0.1843645  0.16255488 0.16361508 0.13950747 0.15066834]] entropy:[1.784632]
DEBUG:chainerrl.agents.a3c:t:9395 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.2481189   0.17029549  0.04435698  0.05086312 -0.10852298 -0.03153967]] probs:[[0

DEBUG:chainerrl.agents.a3c:t:9421 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.14809136  0.10435607  0.05842155  0.07607007 -0.03183001  0.02456443]] probs:[[0.18112382 0.17337306 0.16558938 0.16853772 0.15129924 0.16007684]] entropy:[1.7901394]
DEBUG:chainerrl.agents.a3c:t:9422 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.14698325  0.10217752  0.06348019  0.07735135 -0.03103334  0.02896547]] probs:[[0.18068749 0.17277034 0.16621229 0.16853392 0.15122251 0.16057341]] entropy:[1.7902126]
DEBUG:chainerrl.agents.a3c:pi_loss:[-1.5052465] v_loss:[[6.53447e-05]]
DEBUG:chainerrl.agents.a3c:grad norm:0.2856799232847164
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:9423 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.12342103  0.09368943  0.06411783  0.0802052  -0.00727041  0.02939131]] probs:[[0.17672291 0.171546   0.16654737 0.16924836 0.15507232 0.16086304]] entropy:[1.7908564]
DEBUG:chainerrl.agents.

DEBUG:chainerrl.agents.a3c:t:9450 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.09423015 0.07698472 0.07461885 0.08079544 0.04090124 0.04181498]] probs:[[0.17102377 0.16809967 0.16770245 0.16874148 0.16214219 0.16229041]] entropy:[1.7915609]
DEBUG:chainerrl.agents.a3c:t:9451 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.095274   0.07748042 0.07606269 0.08240283 0.04231295 0.04169667]] probs:[[0.17103383 0.16801743 0.1677794  0.16884653 0.16221139 0.16211145]] entropy:[1.7915566]
DEBUG:chainerrl.agents.a3c:t:9452 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.09527208 0.0774794  0.07605988 0.08239972 0.0423103  0.04169694]] probs:[[0.17103381 0.16801758 0.16777924 0.16884632 0.16221125 0.16211179]] entropy:[1.7915566]
DEBUG:chainerrl.agents.a3c:t:9453 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.09440057 0.07706416 0.07484814 0.08105023 0.04114841 0.04180265]] probs:[[0.17102522 0.1680858  0.

DEBUG:chainerrl.agents.a3c:pi_loss:[-1.4381808] v_loss:[[7.69187e-05]]
DEBUG:chainerrl.agents.a3c:grad norm:0.0680471481081936
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:9479 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.08928885 0.06410147 0.08841822 0.08865814 0.06623311 0.03322914]] probs:[[0.16959739 0.16537902 0.16944979 0.16949046 0.16573192 0.16035138]] entropy:[1.7915581]
DEBUG:chainerrl.agents.a3c:t:9480 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.07947537 0.06196487 0.09016405 0.0865467  0.06623319 0.03549132]] probs:[[0.16822843 0.1653083  0.17003621 0.16942225 0.1660154  0.16098942]] entropy:[1.791591]
DEBUG:chainerrl.agents.a3c:t:9481 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.07796348 0.06159512 0.09040692 0.08624566 0.06618764 0.03578394]] probs:[[0.16802223 0.16529436 0.17012607 0.1694196  0.16605523 0.16108252]] entropy:[1.7915944]
DEBUG:chainerrl.agents.a3c:t:9482 r:0.0 a:

DEBUG:chainerrl.agents.a3c:t:9508 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.06090973 0.04363022 0.07029411 0.09872437 0.07492147 0.06662704]] probs:[[0.16527067 0.1624394  0.16682893 0.17164    0.1676027  0.16621828]] entropy:[1.7916226]
DEBUG:chainerrl.agents.a3c:t:9509 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.06090973 0.04363022 0.07029411 0.09872437 0.07492147 0.06662704]] probs:[[0.16527067 0.1624394  0.16682893 0.17164    0.1676027  0.16621828]] entropy:[1.7916226]
DEBUG:chainerrl.agents.a3c:t:9510 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.06084767 0.04365359 0.07029647 0.09863155 0.07478069 0.06664225]] probs:[[0.16526753 0.16245021 0.16683652 0.17163146 0.16758633 0.16622797]] entropy:[1.7916235]
DEBUG:chainerrl.agents.a3c:pi_loss:[-1.4763142] v_loss:[[2.2455959e-05]]
DEBUG:chainerrl.agents.a3c:grad norm:0.030155811317505405
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:9511 r:0

DEBUG:chainerrl.agents.a3c:t:9537 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.05659114 0.0424946  0.07884807 0.0897486  0.07105107 0.07607114]] probs:[[0.16456957 0.16226597 0.16827345 0.17011775 0.16696651 0.1678068 ]] entropy:[1.7916402]
DEBUG:chainerrl.agents.a3c:t:9538 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.05659144 0.04249439 0.07884805 0.08974931 0.07105213 0.07607097]] probs:[[0.16456954 0.16226587 0.16827337 0.1701178  0.16696663 0.16780671]] entropy:[1.79164]
DEBUG:chainerrl.agents.a3c:t:9539 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.05653251 0.04250639 0.07885648 0.08965647 0.07092383 0.07609355]] probs:[[0.1645664  0.16227429 0.16828148 0.17010878 0.16695185 0.16781718]] entropy:[1.7916405]
DEBUG:chainerrl.agents.a3c:t:9540 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.05653137 0.04250683 0.07885645 0.08965471 0.07092106 0.07609369]] probs:[[0.16456637 0.16227451 0.16

DEBUG:chainerrl.agents.a3c:t:9566 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.19039337  0.08444981  0.096851    0.07969095 -0.02093973 -0.03648232]] probs:[[0.18825641 0.16933201 0.17144501 0.1685281  0.1523944  0.1500441 ]] entropy:[1.7888312]
DEBUG:chainerrl.agents.a3c:pi_loss:[-0.14758536] v_loss:[[0.01662257]]
DEBUG:chainerrl.agents.a3c:grad norm:28.503821062374755
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:9567 r:0.1 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.06494182  0.01255532  0.11459871  0.16943567  0.17555684 -0.12827416]] probs:[[0.16525403 0.1568198  0.17366719 0.18345653 0.18458293 0.13621956]] entropy:[1.786595]
DEBUG:chainerrl.agents.a3c:t:9568 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.06494175  0.0125552   0.11459871  0.1694357   0.17555696 -0.12827422]] probs:[[0.16525401 0.15681978 0.17366719 0.18345653 0.18458296 0.13621953]] entropy:[1.7865949]
DEBUG:chainerrl.agents.a

DEBUG:chainerrl.agents.a3c:t:9594 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.12413936  0.00702264 -0.02165865  0.1803488   0.20847085 -0.09066249]] probs:[[0.17524415 0.15587641 0.15146919 0.18537664 0.1906638  0.14136969]] entropy:[1.7857821]
DEBUG:chainerrl.agents.a3c:t:9595 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.12443223  0.00699443 -0.02194361  0.18123408  0.20985469 -0.09071328]] probs:[[0.17522107 0.15580586 0.15136175 0.18546206 0.19084679 0.1413025 ]] entropy:[1.7857234]
DEBUG:chainerrl.agents.a3c:t:9596 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.12449501  0.00697366 -0.02199752  0.18140692  0.21010324 -0.0907186 ]] probs:[[0.17521834 0.1557904  0.15134172 0.18547958 0.19087927 0.14129068]] entropy:[1.7857125]
DEBUG:chainerrl.agents.a3c:t:9597 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.12454919  0.00694516 -0.02204661  0.1815662   0.21032278 -0.09070986]] probs:[[

DEBUG:chainerrl.agents.a3c:pi_loss:[-1.6663713] v_loss:[[0.00066393]]
DEBUG:chainerrl.agents.a3c:grad norm:1.3892275751351275
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:9623 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.07320742  0.02573523  0.03029748  0.16978078  0.11212067 -0.01291151]] probs:[[0.16749886 0.1597331  0.1604635  0.18448162 0.17414525 0.1536777 ]] entropy:[1.7898891]
DEBUG:chainerrl.agents.a3c:t:9624 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.07320742  0.02573523  0.03029748  0.16978078  0.11212067 -0.01291151]] probs:[[0.16749886 0.1597331  0.1604635  0.18448162 0.17414525 0.1536777 ]] entropy:[1.7898891]
DEBUG:chainerrl.agents.a3c:t:9625 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.07320744  0.02573513  0.03029745  0.16978076  0.11212069 -0.01291168]] probs:[[0.16749886 0.15973309 0.1604635  0.18448162 0.17414527 0.15367767]] entropy:[1.7898892]
DEBUG:chainerrl.agents.a

DEBUG:chainerrl.agents.a3c:t:9652 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.06469262 0.06039521 0.05411484 0.1132913  0.08366582 0.02189583]] probs:[[0.1663271  0.16561386 0.16457699 0.17461002 0.16951299 0.15935901]] entropy:[1.7913696]
DEBUG:chainerrl.agents.a3c:t:9653 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.06469262 0.06039521 0.05411484 0.1132913  0.08366582 0.02189583]] probs:[[0.1663271  0.16561386 0.16457699 0.17461002 0.16951299 0.15935901]] entropy:[1.7913696]
DEBUG:chainerrl.agents.a3c:t:9654 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.06469256 0.06039513 0.05411492 0.11329129 0.0836657  0.02189574]] probs:[[0.1663271  0.16561386 0.16457702 0.17461003 0.16951297 0.15935901]] entropy:[1.7913694]
DEBUG:chainerrl.agents.a3c:pi_loss:[-1.5909019] v_loss:[[0.00030714]]
DEBUG:chainerrl.agents.a3c:grad norm:0.7134765122994174
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:9655 r:0.0 a:

DEBUG:chainerrl.agents.a3c:t:9681 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.14677162 -0.09668948  0.1461895  -0.08768962  0.24997392  0.06358577]] probs:[[0.1784688  0.13990347 0.17836493 0.14116827 0.19787115 0.16422343]] entropy:[1.7838452]
DEBUG:chainerrl.agents.a3c:t:9682 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.1467716  -0.09668949  0.14618947 -0.08768962  0.2499739   0.06358576]] probs:[[0.1784688  0.13990347 0.17836493 0.14116827 0.19787115 0.16422343]] entropy:[1.7838452]
DEBUG:chainerrl.agents.a3c:t:9683 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.14677161 -0.09668948  0.14618947 -0.0876896   0.24997391  0.06358577]] probs:[[0.1784688  0.13990347 0.17836493 0.14116827 0.19787115 0.16422343]] entropy:[1.7838452]
DEBUG:chainerrl.agents.a3c:t:9684 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.1467716  -0.09668951  0.14618947 -0.08768962  0.2499739   0.06358577]] probs:[[

DEBUG:chainerrl.agents.a3c:t:9710 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.1566596  -0.04257006  0.11407731 -0.026315    0.17131418  0.04534834]] probs:[[0.18116318 0.14843817 0.17361078 0.15087076 0.1838376  0.16207948]] entropy:[1.7882757]
DEBUG:chainerrl.agents.a3c:pi_loss:[-1.5942011] v_loss:[[0.00032817]]
DEBUG:chainerrl.agents.a3c:grad norm:0.7080448250947906
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:9711 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.15566507 -0.02744487  0.08936762 -0.01318697  0.15719815  0.04911872]] probs:[[0.1813687  0.15102148 0.16973434 0.15319015 0.18164697 0.16303837]] entropy:[1.7890891]
DEBUG:chainerrl.agents.a3c:t:9712 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.15523581 -0.02768931  0.08993442 -0.01434111  0.15549853  0.04949747]] probs:[[0.18137102 0.15105131 0.16990566 0.1530811  0.18141867 0.16317226]] entropy:[1.7891016]
DEBUG:chainerrl.agents.a

DEBUG:chainerrl.agents.a3c:t:9738 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.09719844 0.01862969 0.07340346 0.03525356 0.09606673 0.07218822]] probs:[[0.1719682  0.15897404 0.16792454 0.16163889 0.1717737  0.16772059]] entropy:[1.7913334]
DEBUG:chainerrl.agents.a3c:t:9739 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.09719846 0.01862969 0.07340355 0.03525348 0.0960667  0.07218838]] probs:[[0.1719682  0.15897404 0.16792455 0.16163887 0.1717737  0.16772062]] entropy:[1.7913334]
DEBUG:chainerrl.agents.a3c:t:9740 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.09719843 0.01862968 0.07340348 0.03525356 0.09606673 0.07218822]] probs:[[0.1719682  0.15897404 0.16792454 0.1616389  0.1717737  0.16772059]] entropy:[1.7913334]
DEBUG:chainerrl.agents.a3c:t:9741 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.0971984  0.01862967 0.07340345 0.03525356 0.09606676 0.07218822]] probs:[[0.1719682  0.15897404 0.

INFO: outdir:result global_step:19929 local_step:9761 R:0.30000000000000004
INFO: statistics:[('average_value', 0.35512751742241944), ('average_entropy', 1.7877020871269438)]


DEBUG:chainerrl.agents.a3c:t:9762 r:0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.13347083  0.09306419  0.05390305  0.12490758 -0.12916622  0.12018397]] probs:[[0.17758173 0.17054929 0.16399947 0.17606755 0.13656408 0.17523783]] entropy:[1.7879676]
DEBUG:chainerrl.agents.a3c:t:9763 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.13370232  0.12640212  0.01928796  0.09594847 -0.14316389  0.13305293]] probs:[[0.17840986 0.17711218 0.15912166 0.17179976 0.13526249 0.17829403]] entropy:[1.7872219]
DEBUG:chainerrl.agents.a3c:t:9764 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.13398813  0.13127315  0.01417311  0.09138533 -0.14554024  0.13568598]] probs:[[0.17855534 0.17807122 0.15839367 0.17110814 0.13501288 0.17885876]] entropy:[1.7870443]
DEBUG:chainerrl.agents.a3c:t:9765 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.13417676  0.13204932  0.01354911  0.09066214 -0.14592966  0.13658401]] probs:[[0.

DEBUG:chainerrl.agents.a3c:t:9791 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.13522984  0.06806692  0.04718323  0.06109381 -0.03934027  0.09670268]] probs:[[0.17916878 0.16753048 0.1640681  0.16636632 0.15046915 0.1723972 ]] entropy:[1.7903677]
DEBUG:chainerrl.agents.a3c:t:9792 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.13528422  0.0680799   0.04715014  0.06108651 -0.0393583   0.09667123]] probs:[[0.17917903 0.16753314 0.16406314 0.1663656  0.15046687 0.17239226]] entropy:[1.7903669]
DEBUG:chainerrl.agents.a3c:t:9793 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.13528422  0.0680799   0.04715014  0.06108651 -0.0393583   0.09667125]] probs:[[0.17917903 0.16753314 0.16406314 0.1663656  0.15046687 0.17239226]] entropy:[1.7903669]
DEBUG:chainerrl.agents.a3c:pi_loss:[-1.5079027] v_loss:[[7.159448e-05]]
DEBUG:chainerrl.agents.a3c:grad norm:0.18609049338625294
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agent

DEBUG:chainerrl.agents.a3c:t:9819 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.08715318 0.06639646 0.05149778 0.10824395 0.00915425 0.09639477]] probs:[[0.16949168 0.16600986 0.16355486 0.17310436 0.15677394 0.17106532]] entropy:[1.7912252]
DEBUG:chainerrl.agents.a3c:t:9820 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.08715796 0.06639507 0.05150604 0.10824094 0.00915859 0.09639244]] probs:[[0.1694922  0.16600932 0.16355592 0.17310353 0.15677434 0.17106462]] entropy:[1.7912253]
DEBUG:chainerrl.agents.a3c:t:9821 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.08716036 0.06639442 0.0515103  0.10823942 0.00916073 0.09639107]] probs:[[0.16949247 0.16600908 0.16355649 0.17310312 0.15677455 0.17106424]] entropy:[1.7912252]
DEBUG:chainerrl.agents.a3c:t:9822 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.08715513 0.06639595 0.05150118 0.10824273 0.00915601 0.09639385]] probs:[[0.16949189 0.16600963 0.

DEBUG:chainerrl.agents.a3c:t:9848 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.06038763 0.06952016 0.05372494 0.10888469 0.02319912 0.10296066]] probs:[[0.16503794 0.16655207 0.163942   0.17323905 0.15901315 0.17221582]] entropy:[1.7913312]
DEBUG:chainerrl.agents.a3c:t:9849 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.06038432 0.06952961 0.05370843 0.10886472 0.0231972  0.10291144]] probs:[[0.16503969 0.16655594 0.16394156 0.173238   0.15901504 0.17220972]] entropy:[1.7913316]
DEBUG:chainerrl.agents.a3c:pi_loss:[-1.4065397] v_loss:[[1.5749749e-05]]
DEBUG:chainerrl.agents.a3c:grad norm:0.04859246383620315
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:9850 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.00212339  0.2659316  -0.04043731  0.04574459  0.04793709  0.04648018]] probs:[[0.15576322 0.20364782 0.14990821 0.16340065 0.16375929 0.16352087]] entropy:[1.786606]
DEBUG:chainerrl.agents.a3c:t:9851

DEBUG:chainerrl.agents.a3c:t:9876 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.00644591 0.11842883 0.00192668 0.0414496  0.04412528 0.01029292]] probs:[[0.16150233 0.18063934 0.16077411 0.16725561 0.16770373 0.16212483]] entropy:[1.7909365]
DEBUG:chainerrl.agents.a3c:t:9877 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.00657998 0.12023664 0.00125423 0.04051271 0.04420099 0.01214613]] probs:[[0.16145985 0.18089435 0.16060224 0.16703264 0.16764984 0.16236107]] entropy:[1.7909155]
DEBUG:chainerrl.agents.a3c:t:9878 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.00499999 0.11940408 0.00269174 0.04183147 0.04249655 0.01049349]] probs:[[0.16128683 0.1808356  0.16091496 0.16733801 0.16744934 0.16217528]] entropy:[1.7909211]
DEBUG:chainerrl.agents.a3c:t:9879 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.00875638  0.12473392 -0.00046865  0.04005641  0.04207729  0.01739771]] probs:[[0.16159935 0.18147

DEBUG:chainerrl.agents.a3c:t:9905 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.02821567 0.04936428 0.04963589 0.11833055 0.01509033 0.0280245 ]] probs:[[0.16328946 0.16677958 0.16682488 0.17868765 0.16116023 0.16325824]] entropy:[1.7911744]
DEBUG:chainerrl.agents.a3c:pi_loss:[-1.6020079] v_loss:[[0.00039376]]
DEBUG:chainerrl.agents.a3c:grad norm:1.6916049395189843
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:9906 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.05052116  0.18180041  0.05413936  0.07787749  0.02117488 -0.01727958]] probs:[[0.15112601 0.1906493  0.16780029 0.17183122 0.16235901 0.15623412]] entropy:[1.7888902]
DEBUG:chainerrl.agents.a3c:t:9907 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.05539376  0.16726826  0.0543016   0.07362075  0.02125697 -0.00024766]] probs:[[0.15061595 0.1881793  0.16807806 0.17135677 0.16261476 0.15915512]] entropy:[1.7893355]
DEBUG:chainerrl.agents.a3c:t:9

DEBUG:chainerrl.agents.a3c:t:9933 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.09338785  0.04589534  0.01218377  0.1428033   0.12230343  0.04596347]] probs:[[0.14456175 0.16616645 0.16065809 0.18307537 0.17936057 0.16617776]] entropy:[1.7888589]
DEBUG:chainerrl.agents.a3c:t:9934 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.0913545   0.04495676  0.01640081  0.14178973  0.12281183  0.04760274]] probs:[[0.14471193 0.16584544 0.16117655 0.182708   0.17927328 0.16628484]] entropy:[1.7889378]
DEBUG:chainerrl.agents.a3c:t:9935 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.09058478  0.04283898  0.01118805  0.15110525  0.12145907  0.05889038]] probs:[[0.14449422 0.16511849 0.15997416 0.18399885 0.17862406 0.16779025]] entropy:[1.7887769]
DEBUG:chainerrl.agents.a3c:t:9936 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.08772633  0.03974083  0.01516282  0.15203255  0.12333439  0.06435845]] probs:[[

DEBUG:chainerrl.agents.a3c:pi_loss:[-1.8216516] v_loss:[[0.00213066]]
DEBUG:chainerrl.agents.a3c:grad norm:14.102095798441987
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:9962 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.12438665  0.02745476  0.00663005  0.28761485  0.06968288 -0.1446771 ]] probs:[[0.17591092 0.15965988 0.1563694  0.20710087 0.1665464  0.13441257]] entropy:[1.7831388]
DEBUG:chainerrl.agents.a3c:t:9963 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.13000023  0.01224137  0.0064766   0.2864208   0.05457747 -0.14702275]] probs:[[0.1777014  0.15796062 0.15705264 0.20778942 0.16479164 0.13470425]] entropy:[1.7828941]
DEBUG:chainerrl.agents.a3c:t:9964 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.13148777  0.00741404  0.00638189  0.28618595  0.04936673 -0.14807256]] probs:[[0.17824358 0.15744518 0.15728275 0.20806472 0.16419093 0.13477284]] entropy:[1.7827895]
DEBUG:chainerrl.agents.a

DEBUG:chainerrl.agents.a3c:t:9990 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.10726803  0.07321275  0.00911254  0.18502347  0.1335935  -0.07964034]] probs:[[0.1721147  0.16635196 0.15602334 0.1860316  0.17670587 0.14277254]] entropy:[1.7881424]
DEBUG:chainerrl.agents.a3c:t:9991 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.10726499  0.07320929  0.00911292  0.18502182  0.13359268 -0.07963679]] probs:[[0.17211434 0.16635156 0.15602356 0.18603148 0.1767059  0.1427732 ]] entropy:[1.7881427]
DEBUG:chainerrl.agents.a3c:t:9992 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.10737572  0.07334069  0.00929355  0.185026    0.1336248  -0.07963289]] probs:[[0.1721203  0.16636075 0.15603986 0.1860181  0.17669812 0.14276287]] entropy:[1.7881439]
DEBUG:chainerrl.agents.a3c:t:9993 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.10737581  0.07334072  0.00929345  0.18502603  0.1336247  -0.0796328 ]] probs:[[

DEBUG:chainerrl.agents.a3c:t:10019 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.00088868  0.08489842  0.06826487  0.17461613  0.10523307  0.14576456]] probs:[[0.15099053 0.1645154  0.16180156 0.17995772 0.167895   0.17483984]] entropy:[1.7901942]
DEBUG:chainerrl.agents.a3c:t:10020 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.00081117  0.08495343  0.06821441  0.17454793  0.1052532   0.14575231]] probs:[[0.15100199 0.1645242  0.16179314 0.17994516 0.16789813 0.17483743]] entropy:[1.7901962]
DEBUG:chainerrl.agents.a3c:t:10021 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.00051085  0.08511138  0.06835458  0.17444086  0.10516825  0.14595336]] probs:[[0.15103291 0.16453445 0.16180035 0.17990868 0.16786781 0.17485584]] entropy:[1.7902018]
DEBUG:chainerrl.agents.a3c:t:10022 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.00052934  0.08504236  0.06831849  0.17454396  0.10520697  0.14594173]] prob

DEBUG:chainerrl.agents.a3c:t:10048 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.07687147 0.15920566 0.05793882 0.13339978 0.09279516 0.0913896 ]] probs:[[0.16244587 0.17638676 0.15939929 0.17189318 0.16505332 0.16482149]] entropy:[1.7911674]
DEBUG:chainerrl.agents.a3c:t:10049 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.07379295 0.15626302 0.06348652 0.13517548 0.09386905 0.09533562]] probs:[[0.16178419 0.17569216 0.16012533 0.17202602 0.165065   0.16530727]] entropy:[1.7912186]
DEBUG:chainerrl.agents.a3c:pi_loss:[-1.5664208] v_loss:[[0.00024823]]
DEBUG:chainerrl.agents.a3c:grad norm:1.201284441721611
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:10050 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.0731371  0.15543298 0.06771433 0.14796141 0.09703146 0.1021735 ]] probs:[[0.16098632 0.17479526 0.16011569 0.17349413 0.1648793  0.16572931]] entropy:[1.7911856]
DEBUG:chainerrl.agents.a3c:t:10051 r:0.0

DEBUG:chainerrl.agents.a3c:t:10077 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.18736137 0.13210987 0.09531245 0.01627302 0.12137764 0.12060716]] probs:[[0.1794486  0.16980273 0.16366799 0.1512298  0.16799012 0.16786073]] entropy:[1.7904773]
DEBUG:chainerrl.agents.a3c:t:10078 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.18736136 0.13210987 0.09531245 0.01627305 0.12137764 0.12060717]] probs:[[0.1794486  0.16980274 0.16366799 0.1512298  0.16799012 0.16786073]] entropy:[1.7904773]
DEBUG:chainerrl.agents.a3c:t:10079 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.18736412 0.13210742 0.09532292 0.01629072 0.12138125 0.12062193]] probs:[[0.17944777 0.16980106 0.16366848 0.15123135 0.16798948 0.16786195]] entropy:[1.7904778]
DEBUG:chainerrl.agents.a3c:t:10080 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.18736137 0.13210987 0.09531245 0.01627302 0.12137764 0.12060716]] probs:[[0.1794486  0.1698027

DEBUG:chainerrl.agents.a3c:pi_loss:[-1.4083341] v_loss:[[7.1224486e-06]]
DEBUG:chainerrl.agents.a3c:grad norm:0.0636898326281721
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:10106 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.14487605 0.15127614 0.08602007 0.04006824 0.12615085 0.12116033]] probs:[[0.17218304 0.17328855 0.16234149 0.1550504  0.16898887 0.16814764]] entropy:[1.7910455]
DEBUG:chainerrl.agents.a3c:t:10107 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.13510321 0.16700384 0.09664132 0.02817069 0.08367846 0.09026797]] probs:[[0.1724346  0.17802405 0.16592836 0.15494737 0.16379134 0.16487421]] entropy:[1.7908239]
DEBUG:chainerrl.agents.a3c:t:10108 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.13645329 0.16620462 0.09512275 0.03062579 0.08518239 0.08939447]] probs:[[0.17261194 0.17782453 0.1656232  0.1552782  0.16398501 0.16467717]] entropy:[1.7908542]
DEBUG:chainerrl.agents.a3c:t:10109 r

DEBUG:chainerrl.agents.a3c:t:10135 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.08930693 0.14602742 0.05265063 0.08070038 0.13140026 0.1045918 ]] probs:[[0.16468517 0.17429619 0.15875773 0.16327389 0.17176528 0.1672217 ]] entropy:[1.7912719]
DEBUG:chainerrl.agents.a3c:t:10136 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.09038501 0.14321986 0.04301807 0.07527384 0.14799425 0.11292546]] probs:[[0.16460654 0.17353736 0.1569914  0.16213784 0.17436787 0.16835897]] entropy:[1.7910746]
DEBUG:chainerrl.agents.a3c:t:10137 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.08371893 0.15298922 0.04335146 0.07079278 0.12141939 0.08900177]] probs:[[0.16493388 0.17676388 0.1584085  0.16281563 0.17127064 0.16580749]] entropy:[1.7911325]
DEBUG:chainerrl.agents.a3c:pi_loss:[-1.3818815] v_loss:[[3.3365708e-05]]
DEBUG:chainerrl.agents.a3c:grad norm:0.3283293459316239
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:10138 r

DEBUG:chainerrl.agents.a3c:t:10164 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.10014801 0.13823187 0.06150427 0.07228616 0.08904167 0.15984233]] probs:[[0.16600537 0.17244941 0.15971267 0.161444   0.16417186 0.17621669]] entropy:[1.791142]
DEBUG:chainerrl.agents.a3c:t:10165 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.10019152 0.13818    0.06165559 0.07247797 0.08907362 0.16002695]] probs:[[0.16599745 0.17242475 0.15972227 0.16146024 0.16416213 0.17623316]] entropy:[1.7911423]
DEBUG:chainerrl.agents.a3c:t:10166 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.10018857 0.13819401 0.06162886 0.07244675 0.08906705 0.15999995]] probs:[[0.16599914 0.17242944 0.15972012 0.16145733 0.16416322 0.17623073]] entropy:[1.7911422]
DEBUG:chainerrl.agents.a3c:t:10167 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.10013564 0.13826749 0.06144595 0.07221648 0.0890257  0.15977377]] probs:[[0.16600849 0.17246094

INFO: outdir:result global_step:20782 local_step:10185 R:0.30000000000000004
INFO: statistics:[('average_value', 0.3273797321938242), ('average_entropy', 1.7884454350074706)]


DEBUG:chainerrl.agents.a3c:t:10186 r:0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.31930417  0.00854896  0.15118407  0.02476169 -0.0306764   0.03416158]] probs:[[0.20922138 0.15333694 0.17684485 0.1558432  0.14743868 0.15731503]] entropy:[1.7841042]
DEBUG:chainerrl.agents.a3c:t:10187 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.32228708  0.02917865  0.11894915 -0.02780829 -0.02697744  0.04843253]] probs:[[0.2113176  0.1576305  0.17243567 0.1488988  0.14902256 0.16069493]] entropy:[1.7838428]
DEBUG:chainerrl.agents.a3c:t:10188 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.32435873  0.03325949  0.11433569 -0.03317903 -0.02438792  0.05133743]] probs:[[0.21168268 0.15822041 0.1715827  0.14805008 0.14935733 0.16110674]] entropy:[1.7837678]
DEBUG:chainerrl.agents.a3c:t:10189 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.32369995  0.03427695  0.11395027 -0.03316349 -0.02411134  0.0512476 ]] probs:

DEBUG:chainerrl.agents.a3c:t:10215 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.22210546 0.05724907 0.07820208 0.03102718 0.00605401 0.03861454]] probs:[[0.19312072 0.16376936 0.16723703 0.15953083 0.15559618 0.16074586]] entropy:[1.7890992]
DEBUG:chainerrl.agents.a3c:t:10216 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.22209263 0.05720647 0.07817324 0.03100982 0.0060631  0.03862208]] probs:[[0.19312105 0.16376476 0.16723463 0.15953037 0.15559985 0.1607494 ]] entropy:[1.7890996]
DEBUG:chainerrl.agents.a3c:t:10217 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.22209282 0.05720653 0.07817328 0.03100957 0.00606316 0.038622  ]] probs:[[0.19312108 0.16376476 0.16723463 0.15953033 0.15559985 0.16074939]] entropy:[1.7890995]
DEBUG:chainerrl.agents.a3c:pi_loss:[-1.4682081] v_loss:[[1.6540444e-05]]
DEBUG:chainerrl.agents.a3c:grad norm:0.1335698762703976
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:10218 r

DEBUG:chainerrl.agents.a3c:t:10244 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.07233731 0.00406423 0.04959257 0.21175556 0.03742568 0.04197611]] probs:[[0.16675259 0.15574782 0.16300264 0.19169958 0.16103144 0.16176586]] entropy:[1.7893915]
DEBUG:chainerrl.agents.a3c:t:10245 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.07164996 0.00378289 0.04954296 0.21160501 0.0375058  0.04236104]] probs:[[0.16665804 0.15572272 0.16301416 0.19169377 0.1610637  0.16184759]] entropy:[1.7893946]
DEBUG:chainerrl.agents.a3c:t:10246 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.07335355 0.00478699 0.04963193 0.21229556 0.03754291 0.04096389]] probs:[[0.16688088 0.1558219  0.16296878 0.19175573 0.16101052 0.16156226]] entropy:[1.789381]
DEBUG:chainerrl.agents.a3c:t:10247 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.07427081 0.00524631 0.04962235 0.21244073 0.03748265 0.04035641]] probs:[[0.1670101  0.15587117

DEBUG:chainerrl.agents.a3c:t:10273 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.05923742 0.00874258 0.01687797 0.16473246 0.09518771 0.04205255]] probs:[[0.16555987 0.15740752 0.15869331 0.18398017 0.17162009 0.16273905]] entropy:[1.7903106]
DEBUG:chainerrl.agents.a3c:pi_loss:[-1.4783002] v_loss:[[4.604736e-05]]
DEBUG:chainerrl.agents.a3c:grad norm:0.15346830910018705
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:10274 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.07487423 0.01480207 0.03279975 0.13313432 0.09241999 0.03991522]] probs:[[0.16824165 0.15843259 0.16130982 0.17833458 0.17121962 0.1624617 ]] entropy:[1.7909439]
DEBUG:chainerrl.agents.a3c:t:10275 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.07485547 0.01494469 0.03277778 0.13312975 0.09237612 0.03994631]] probs:[[0.16823637 0.15845318 0.16130425 0.17833151 0.17120996 0.16246471]] entropy:[1.7909453]
DEBUG:chainerrl.agents.a3c:t:10276 r

DEBUG:chainerrl.agents.a3c:t:10302 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.06710443 0.05690844 0.06113195 0.0996827  0.0495447  0.05605556]] probs:[[0.16698335 0.16528943 0.16598901 0.17251295 0.16407675 0.16514853]] entropy:[1.7916236]
DEBUG:chainerrl.agents.a3c:t:10303 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.06713097 0.05695384 0.06105199 0.09973136 0.04936165 0.05608422]] probs:[[0.16699083 0.16529995 0.16597877 0.17252451 0.16404973 0.16515628]] entropy:[1.7916229]
DEBUG:chainerrl.agents.a3c:t:10304 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.06712581 0.05695016 0.06104339 0.09973052 0.04935189 0.05608565]] probs:[[0.1669907  0.16530007 0.16597807 0.17252512 0.16404884 0.16515723]] entropy:[1.7916228]
DEBUG:chainerrl.agents.a3c:t:10305 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.06711559 0.05693221 0.06106974 0.09971242 0.04941679 0.05607702]] probs:[[0.16698802 0.1652961

DEBUG:chainerrl.agents.a3c:t:10331 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.11569779 0.03756576 0.02511414 0.04518394 0.14015687 0.07039826]] probs:[[0.17389409 0.16082461 0.15883449 0.16205448 0.17819981 0.16619252]] entropy:[1.7908592]
DEBUG:chainerrl.agents.a3c:t:10332 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.11516269 0.03749494 0.02420839 0.04479259 0.13974251 0.07099729]] probs:[[0.17385076 0.1608592  0.15873607 0.16203739 0.17817692 0.16633965]] entropy:[1.7908587]
DEBUG:chainerrl.agents.a3c:t:10333 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.10917693 0.03715105 0.01679041 0.04163679 0.13560575 0.07816148]] probs:[[0.1732144  0.16117719 0.15792869 0.1619018  0.17785329 0.16792454]] entropy:[1.7908672]
DEBUG:chainerrl.agents.a3c:t:10334 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.10374925 0.03613397 0.01238072 0.03952749 0.13195983 0.08404303]] probs:[[0.17258646 0.1613027

DEBUG:chainerrl.agents.a3c:t:10360 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.01489342  0.05650275  0.1224205   0.04808826  0.10957695  0.03809741]] probs:[[0.15448381 0.16591664 0.177222   0.16452639 0.1749604  0.16289082]] entropy:[1.790714]
DEBUG:chainerrl.agents.a3c:t:10361 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.01499497  0.05660023  0.12226612  0.04818944  0.1095443   0.03798999]] probs:[[0.15447327 0.16593835 0.17720056 0.16454853 0.17496052 0.16287875]] entropy:[1.7907145]
DEBUG:chainerrl.agents.a3c:pi_loss:[-0.51526445] v_loss:[[0.01375939]]
DEBUG:chainerrl.agents.a3c:grad norm:66.91521488652108
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:10362 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.00750351  0.04360184  0.20436472  0.05380275  0.02183692  0.03933278]] probs:[[0.15553626 0.16369161 0.19224048 0.16536996 0.16016737 0.16299428]] entropy:[1.7893094]
DEBUG:chainerrl.agents

DEBUG:chainerrl.agents.a3c:t:10388 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.02374008 -0.15437926  0.22246946 -0.12158769  0.43991077 -0.02375448]] probs:[[0.15646529 0.13093683 0.19086449 0.13530162 0.23722403 0.14920777]] entropy:[1.7683369]
DEBUG:chainerrl.agents.a3c:t:10389 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.0189025  -0.16104473  0.22909747 -0.1154219   0.4475431  -0.02427983]] probs:[[0.15536527 0.12977882 0.19170809 0.13583684 0.23851196 0.14879902]] entropy:[1.7674695]
DEBUG:chainerrl.agents.a3c:t:10390 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.01652162 -0.16139355  0.23054376 -0.11603065  0.44865814 -0.0236873 ]] probs:[[0.15497497 0.12971613 0.19195975 0.13573593 0.23874597 0.1488672 ]] entropy:[1.7672935]
DEBUG:chainerrl.agents.a3c:t:10391 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.01826634 -0.16102518  0.22953147 -0.11549816  0.4477397  -0.02410892]] prob

DEBUG:chainerrl.agents.a3c:t:10417 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.14553659 -0.082506    0.11297546 -0.14133245  0.37920073  0.00881497]] probs:[[0.17698811 0.14089853 0.17131798 0.13284905 0.22357495 0.15437132]] entropy:[1.7763566]
DEBUG:chainerrl.agents.a3c:pi_loss:[-0.17827031] v_loss:[[0.041697]]
DEBUG:chainerrl.agents.a3c:grad norm:43.04218660812328
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:10418 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.18201019 -0.09185161 -0.00123106 -0.08577775  0.5625929  -0.10714763]] probs:[[0.17952982 0.13652116 0.14947066 0.13735288 0.26267672 0.13444881]] entropy:[1.7578791]
DEBUG:chainerrl.agents.a3c:t:10419 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.18230867 -0.0916037  -0.0012113  -0.08684748  0.5632436  -0.10669668]] probs:[[0.17955197 0.13653107 0.14944743 0.137182   0.26280165 0.1344859 ]] entropy:[1.7577951]
DEBUG:chainerrl.agents.

DEBUG:chainerrl.agents.a3c:t:10445 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.13777572 -0.03279883 -0.01298457 -0.17464885  0.55116266 -0.08369373]] probs:[[0.17396824 0.14668658 0.14962207 0.12728749 0.26302782 0.13940778]] entropy:[1.7583705]
DEBUG:chainerrl.agents.a3c:t:10446 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.13798027 -0.03340096 -0.01224156 -0.17397122  0.5515649  -0.08189083]] probs:[[0.1739165  0.14652471 0.14965811 0.12730984 0.26300156 0.13958926]] entropy:[1.7584062]
DEBUG:chainerrl.agents.a3c:t:10447 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.13844527 -0.03477907 -0.0105762  -0.1723685   0.55258507 -0.07790501]] probs:[[0.17379601 0.14615358 0.14973406 0.12736648 0.2629653  0.13998455]] entropy:[1.7584726]
DEBUG:chainerrl.agents.a3c:t:10448 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.13853793 -0.03521799 -0.01017395 -0.17195378  0.5528336  -0.07700764]] prob

DEBUG:chainerrl.agents.a3c:pi_loss:[-0.7235781] v_loss:[[0.03517228]]
DEBUG:chainerrl.agents.a3c:grad norm:33.98498337278977
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:10474 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.05498679  0.06148513  0.14194728 -0.16927251  0.2061187  -0.04482621]] probs:[[0.16765478 0.1687478  0.18288681 0.13397425 0.19500767 0.15172867]] entropy:[1.7845652]
DEBUG:chainerrl.agents.a3c:t:10475 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.05662194  0.06117978  0.14204615 -0.16926184  0.20666339 -0.04587705]] probs:[[0.16789736 0.16866437 0.18287028 0.13395034 0.19507699 0.15154064]] entropy:[1.7845325]
DEBUG:chainerrl.agents.a3c:t:10476 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.07775354  0.0508177   0.13123076 -0.16708535  0.2161612  -0.05510339]] probs:[[0.1713731  0.16681863 0.18078713 0.13415611 0.19681235 0.15005264]] entropy:[1.7842793]
DEBUG:chainerrl.agents

DEBUG:chainerrl.agents.a3c:t:10502 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.02923421  0.13037053  0.17449637 -0.09851258  0.02120481  0.06066387]] probs:[[0.15437154 0.1810851  0.18925455 0.14403899 0.16235761 0.16889216]] entropy:[1.7875404]
DEBUG:chainerrl.agents.a3c:t:10503 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.029234    0.13037337  0.17449796 -0.09851288  0.02120341  0.0606622 ]] probs:[[0.15437153 0.18108556 0.18925479 0.1440389  0.16235733 0.16889182]] entropy:[1.7875402]
DEBUG:chainerrl.agents.a3c:t:10504 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.02923421  0.13037051  0.17449637 -0.09851258  0.02120482  0.06066387]] probs:[[0.15437154 0.1810851  0.18925455 0.14403899 0.16235761 0.16889216]] entropy:[1.7875404]
DEBUG:chainerrl.agents.a3c:t:10505 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.029234    0.13037337  0.17449796 -0.09851288  0.02120341  0.0606622 ]] prob

DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:10530 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.0274012  0.08426613 0.06884541 0.01619585 0.05535956 0.12977129]] probs:[[0.16062112 0.1700195  0.16741781 0.15883134 0.16517518 0.177935  ]] entropy:[1.7910467]
DEBUG:chainerrl.agents.a3c:t:10531 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.02772791 0.08500921 0.06863771 0.01522113 0.05516629 0.12979813]] probs:[[0.16067968 0.17015234 0.16738936 0.15868261 0.16514951 0.17794651]] entropy:[1.7910383]
DEBUG:chainerrl.agents.a3c:t:10532 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.02770685 0.08496972 0.06864797 0.01526159 0.05517021 0.12979285]] probs:[[0.16067667 0.170146   0.16739146 0.1586894  0.16515054 0.17794599]] entropy:[1.7910385]
DEBUG:chainerrl.agents.a3c:t:10533 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.02770668 0.08496954 0.06864805 0.01526179 0.05517031 0.12979

DEBUG:chainerrl.agents.a3c:t:10559 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.0307828   0.08290122  0.07061996  0.0376162   0.10818017  0.11330001]] probs:[[0.15146986 0.16970652 0.16763505 0.16219278 0.17405121 0.17494461]] entropy:[1.7905902]
DEBUG:chainerrl.agents.a3c:t:10560 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.03091092  0.08288962  0.07063241  0.03761378  0.10807621  0.11330842]] probs:[[0.15145594 0.16971071 0.16764323 0.16219826 0.17403944 0.17495243]] entropy:[1.790589]
DEBUG:chainerrl.agents.a3c:t:10561 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.03087213  0.08285652  0.07056338  0.03760531  0.10820617  0.11333527]] probs:[[0.15145959 0.16970262 0.16762921 0.16219452 0.1740595  0.17495456]] entropy:[1.7905884]
DEBUG:chainerrl.agents.a3c:pi_loss:[-1.5875831] v_loss:[[0.00031372]]
DEBUG:chainerrl.agents.a3c:grad norm:0.7700170438493767
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents

DEBUG:chainerrl.agents.a3c:t:10587 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.0256604   0.0145103   0.02597258  0.08847927  0.08058741  0.13214274]] probs:[[0.15389542 0.16020335 0.16205022 0.17250271 0.17114669 0.18020163]] entropy:[1.7903697]
DEBUG:chainerrl.agents.a3c:t:10588 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.02408522  0.01560582  0.02698758  0.08864065  0.08141609  0.13247028]] probs:[[0.15401302 0.16024889 0.16208324 0.17239064 0.17114969 0.18011448]] entropy:[1.7903922]
DEBUG:chainerrl.agents.a3c:t:10589 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.02576767  0.01370347  0.02615237  0.08863991  0.08132886  0.133092  ]] probs:[[0.15384673 0.16004066 0.16204543 0.17249432 0.17123781 0.18033503]] entropy:[1.7903465]
DEBUG:chainerrl.agents.a3c:t:10590 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.02620795  0.01321314  0.02591943  0.08863329  0.08127341  0.13322397]] prob

DEBUG:chainerrl.agents.a3c:t:10616 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.02779461 0.03482503 0.05452704 0.0774739  0.07363433 0.09376107]] probs:[[0.16128564 0.16242355 0.16565536 0.16950057 0.16885102 0.17228386]] entropy:[1.7914822]
DEBUG:chainerrl.agents.a3c:t:10617 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.02388372 0.03664056 0.04637014 0.07345588 0.07186283 0.09860942]] probs:[[0.16094926 0.1630156  0.16460943 0.16912892 0.16885972 0.17343707]] entropy:[1.791439]
DEBUG:chainerrl.agents.a3c:pi_loss:[-1.5315917] v_loss:[[0.00011776]]
DEBUG:chainerrl.agents.a3c:grad norm:0.4276242140283676
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:10618 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.06092599 0.03943783 0.07534808 0.08723465 0.07309764 0.06428853]] probs:[[0.16568527 0.16216297 0.1680921  0.17010207 0.16771425 0.16624333]] entropy:[1.7916502]
DEBUG:chainerrl.agents.a3c:t:10619 r:0.0

DEBUG:chainerrl.agents.a3c:t:10645 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.05280348 0.02694726 0.02340636 0.06322151 0.09564545 0.073916  ]] probs:[[0.16608293 0.1618437  0.16127165 0.16782224 0.17335287 0.16962665]] entropy:[1.7914373]
DEBUG:chainerrl.agents.a3c:t:10646 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.04775182 0.0297149  0.0275909  0.05869984 0.07500128 0.05927827]] probs:[[0.16632317 0.1633501  0.16300352 0.16815408 0.1709177  0.16825138]] entropy:[1.7916174]
DEBUG:chainerrl.agents.a3c:t:10647 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.04814433 0.03060348 0.0270053  0.05807713 0.07418105 0.05953326]] probs:[[0.16640294 0.16350953 0.16292225 0.16806401 0.17079242 0.16830891]] entropy:[1.7916223]
DEBUG:chainerrl.agents.a3c:t:10648 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.04822509 0.02941791 0.02723536 0.05844694 0.07244121 0.05834413]] probs:[[0.16651253 0.1634101

DEBUG:chainerrl.agents.a3c:pi_loss:[-1.5392822] v_loss:[[0.00014298]]
DEBUG:chainerrl.agents.a3c:grad norm:2.013718056994595
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:10674 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.01484941 -0.07989477 -0.03819774  0.19223385  0.18019886 -0.04928356]] probs:[[0.162082   0.14743069 0.15370807 0.1935405  0.1912252  0.15201351]] entropy:[1.785574]
DEBUG:chainerrl.agents.a3c:t:10675 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.01362912 -0.08547562 -0.02943649  0.19478354  0.19191062 -0.03321333]] probs:[[0.1609918  0.14580187 0.15420575 0.1929648  0.19241124 0.15362445]] entropy:[1.7854286]
DEBUG:chainerrl.agents.a3c:t:10676 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.01410156 -0.08452109 -0.0320527   0.19449796  0.18873914 -0.0368899 ]] probs:[[0.16129623 0.14614803 0.1540209  0.19318321 0.19207391 0.1532777 ]] entropy:[1.7854576]
DEBUG:chainerrl.agents.

DEBUG:chainerrl.agents.a3c:t:10702 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.01632739 -0.02106337 -0.03248999  0.11654951  0.11673583 -0.01491909]] probs:[[0.16404387 0.1580234  0.15622799 0.18133678 0.18137057 0.15899733]] entropy:[1.7897395]
DEBUG:chainerrl.agents.a3c:t:10703 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.01400172 -0.02446609 -0.03622137  0.11688191  0.11606982 -0.01476354]] probs:[[0.16391447 0.15772879 0.1558855  0.18167603 0.18152855 0.1592666 ]] entropy:[1.789669]
DEBUG:chainerrl.agents.a3c:t:10704 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.01330801 -0.02404723 -0.03695869  0.1174067   0.11451048 -0.01599946]] probs:[[0.1638904  0.15788117 0.1558558  0.18187082 0.18134484 0.1591569 ]] entropy:[1.7896687]
DEBUG:chainerrl.agents.a3c:t:10705 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.00842992 -0.02881518 -0.04460201  0.11866284  0.10964983 -0.0175804 ]] probs

DEBUG:chainerrl.agents.a3c:grad norm:0.38883845346437573
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:10730 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.11262363  0.05610268 -0.25616008  0.13540784  0.11546856  0.02997657]] probs:[[0.179109   0.16926637 0.1238673  0.1832367  0.1796193  0.16490136]] entropy:[1.7839508]
DEBUG:chainerrl.agents.a3c:t:10731 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.11343192  0.05560653 -0.25587755  0.13520305  0.11609302  0.03082386]] probs:[[0.17919822 0.16912991 0.12386386 0.18314235 0.17967571 0.16498993]] entropy:[1.7839509]
DEBUG:chainerrl.agents.a3c:t:10732 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.12131163  0.05220142 -0.25413504  0.13324715  0.12209952  0.03835698]] probs:[[0.18007006 0.16804567 0.12370495 0.18223217 0.18021199 0.1657352 ]] entropy:[1.7838991]
DEBUG:chainerrl.agents.a3c:t:10733 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0

DEBUG:chainerrl.agents.a3c:t:10758 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.0844329   0.04920822 -0.16957003  0.10369112  0.06474748  0.05680606]] probs:[[0.17501545 0.1689579  0.13575765 0.17841859 0.17160389 0.1702465 ]] entropy:[1.7879608]
DEBUG:chainerrl.agents.a3c:t:10759 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.09095883  0.04611672 -0.16780885  0.1019673   0.07003468  0.06325251]] probs:[[0.17571026 0.1680051  0.13564874 0.17765525 0.17207187 0.1709088 ]] entropy:[1.7879342]
DEBUG:chainerrl.agents.a3c:t:10760 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.09090191  0.04613978 -0.16781504  0.1019818   0.06998924  0.06319894]] probs:[[0.17570402 0.16801257 0.1356508  0.17766163 0.17206773 0.1709033 ]] entropy:[1.7879348]
DEBUG:chainerrl.agents.a3c:t:10761 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.090904    0.04613981 -0.16781335  0.101981    0.06998861  0.06319909]] prob

DEBUG:chainerrl.agents.a3c:grad norm:0.0721626712690264
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:10786 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.06715686  0.05714061 -0.04224793  0.01814766  0.04170059  0.06341071]] probs:[[0.17212595 0.1704105  0.15428811 0.16389358 0.16779958 0.17148235]] entropy:[1.7910619]
DEBUG:chainerrl.agents.a3c:t:10787 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.06868339  0.05724543 -0.04218445  0.01838057  0.04186451  0.06414783]] probs:[[0.17230572 0.17034611 0.15422344 0.16385263 0.16774608 0.17152599]] entropy:[1.7910507]
DEBUG:chainerrl.agents.a3c:t:10788 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.06838038  0.05759422 -0.04202421  0.01841436  0.04201411  0.06385278]] probs:[[0.17225146 0.17040351 0.15424632 0.16385622 0.16776918 0.17147332]] entropy:[1.7910545]
DEBUG:chainerrl.agents.a3c:t:10789 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.

DEBUG:chainerrl.agents.a3c:t:10814 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.06897675 0.05069611 0.02313101 0.04222968 0.04567995 0.08776781]] probs:[[0.16930147 0.16623464 0.16171494 0.16483317 0.16540287 0.1725129 ]] entropy:[1.7915475]
DEBUG:chainerrl.agents.a3c:t:10815 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.06776991 0.05087613 0.02256747 0.04349574 0.04555786 0.08898701]] probs:[[0.16907467 0.16624235 0.16160224 0.16501993 0.16536057 0.17270027]] entropy:[1.7915426]
DEBUG:chainerrl.agents.a3c:t:10816 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.0664508  0.05162604 0.02116926 0.0434036  0.04387069 0.08764425]] probs:[[0.16899537 0.16650854 0.16151367 0.16514504 0.1652222  0.17261519]] entropy:[1.7915446]
DEBUG:chainerrl.agents.a3c:t:10817 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.06155883 0.03061595 0.0187186  0.04054266 0.03374431 0.08611847]] probs:[[0.16937006 0.1642095

DEBUG:chainerrl.agents.a3c:t:10842 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.16565716  0.07436963 -0.05728231  0.05455852 -0.02440289  0.03139016]] probs:[[0.18835814 0.17192487 0.15071726 0.16855237 0.15575513 0.16469218]] entropy:[1.789147]
DEBUG:chainerrl.agents.a3c:t:10843 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.165692    0.0744172  -0.05728683  0.05453401 -0.024404    0.03144796]] probs:[[0.18836106 0.17192975 0.15071368 0.168545   0.15575194 0.16469851]] entropy:[1.7891461]
DEBUG:chainerrl.agents.a3c:t:10844 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.1656569   0.07436905 -0.05728214  0.05455871 -0.02440321  0.03138964]] probs:[[0.18835814 0.17192483 0.15071733 0.16855244 0.15575512 0.16469213]] entropy:[1.7891469]
DEBUG:chainerrl.agents.a3c:t:10845 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.16565661  0.07436858 -0.0572821   0.05455883 -0.02440343  0.03138919]] probs

INFO: outdir:result global_step:22144 local_step:10853 R:1.55
INFO: statistics:[('average_value', 0.38845946177803503), ('average_entropy', 1.7872445187117025)]


DEBUG:chainerrl.agents.a3c:t:10854 r:0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.10413547 -0.0486372   0.12738672  0.11583203 -0.01157547 -0.02198983]] probs:[[0.176496   0.1514909  0.18064782 0.1785725  0.15721075 0.155582  ]] entropy:[1.7891254]
DEBUG:chainerrl.agents.a3c:t:10855 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.09791096 -0.04163731  0.09659609  0.10497449 -0.00138752 -0.00141142]] probs:[[0.1758567  0.15295154 0.17562562 0.17710327 0.15923339 0.15922959]] entropy:[1.7900369]
DEBUG:chainerrl.agents.a3c:t:10856 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.09792838 -0.04117223  0.09422907  0.10255184 -0.00091123  0.00044796]] probs:[[0.17592964 0.1530835  0.17528002 0.17674492 0.15937254 0.1595893 ]] entropy:[1.7901053]
DEBUG:chainerrl.agents.a3c:t:10857 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.09829268 -0.04125628  0.09394684  0.10242828 -0.00068776  0.00105148]] probs:

DEBUG:chainerrl.agents.a3c:t:10883 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.07527286 -0.02690873  0.0910331   0.0769261   0.04568317 -0.00666266]] probs:[[0.17204064 0.15532957 0.17477351 0.1723253  0.16702458 0.15850644]] entropy:[1.7907865]
DEBUG:chainerrl.agents.a3c:t:10884 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.07528117 -0.02690667  0.09102977  0.07692415  0.04568217 -0.00666336]] probs:[[0.17204197 0.1553298  0.17477283 0.17232485 0.16702431 0.15850623]] entropy:[1.7907865]
DEBUG:chainerrl.agents.a3c:t:10885 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.07528214 -0.02690635  0.09102882  0.07692176  0.04568258 -0.00666364]] probs:[[0.17204219 0.1553299  0.17477272 0.17232451 0.16702445 0.15850624]] entropy:[1.7907867]
DEBUG:chainerrl.agents.a3c:pi_loss:[-1.4817924] v_loss:[[2.9867835e-05]]
DEBUG:chainerrl.agents.a3c:grad norm:0.10119546349738677
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.a

DEBUG:chainerrl.agents.a3c:t:10911 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.1099641   0.16709785 -0.08318316 -0.02731057  0.07451724  0.04922481]] probs:[[0.17664315 0.18702932 0.14561765 0.15398526 0.17049138 0.16623332]] entropy:[1.7883424]
DEBUG:chainerrl.agents.a3c:t:10912 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.10996418  0.1670978  -0.08318295 -0.02731055  0.0745177   0.04922442]] probs:[[0.17664313 0.18702927 0.14561766 0.15398525 0.17049144 0.16623323]] entropy:[1.7883422]
DEBUG:chainerrl.agents.a3c:t:10913 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.10996388  0.167096   -0.08318439 -0.02731081  0.07451721  0.04922876]] probs:[[0.1766431  0.18702896 0.14561747 0.15398523 0.17049137 0.16623397]] entropy:[1.7883424]
DEBUG:chainerrl.agents.a3c:t:10914 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.10996287  0.16709654 -0.08318722 -0.02731272  0.0745125   0.04923107]] prob

DEBUG:chainerrl.agents.a3c:t:10940 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.10199361  0.08354506 -0.03717937 -0.03820643  0.04247211  0.07353099]] probs:[[0.177458   0.17421417 0.15440221 0.15424371 0.16720363 0.17247829]] entropy:[1.7902102]
DEBUG:chainerrl.agents.a3c:t:10941 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.10100473  0.08357924 -0.03918713 -0.03861888  0.04297528  0.07463125]] probs:[[0.17733024 0.17426695 0.15413393 0.15422155 0.16733275 0.17271456]] entropy:[1.7901847]
DEBUG:chainerrl.agents.a3c:pi_loss:[-1.4620855] v_loss:[[2.4384908e-05]]
DEBUG:chainerrl.agents.a3c:grad norm:0.23163476204932798
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:10942 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.10183926  0.07385267  0.00239009 -0.01440098  0.03567047  0.05279631]] probs:[[0.17679991 0.17192048 0.16006331 0.15739812 0.16547991 0.1683383 ]] entropy:[1.7909678]
DEBUG:chainerrl.a

DEBUG:chainerrl.agents.a3c:t:10968 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.16166945 -0.05304928  0.08102011 -0.08244211  0.23389143 -0.07692804]] probs:[[0.18603574 0.15008774 0.17162116 0.14574045 0.19996868 0.14654627]] entropy:[1.7839847]
DEBUG:chainerrl.agents.a3c:t:10969 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.16176383 -0.05301766  0.08098862 -0.0824459   0.23381741 -0.07695223]] probs:[[0.18605365 0.15009278 0.17161609 0.14574017 0.19995427 0.14654303]] entropy:[1.7839856]
DEBUG:chainerrl.agents.a3c:t:10970 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.16758041 -0.050827    0.07922838 -0.08293832  0.22899841 -0.07818323]] probs:[[0.18715784 0.15043707 0.17133151 0.14568308 0.19901304 0.14637746]] entropy:[1.7840447]
DEBUG:chainerrl.agents.a3c:t:10971 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.15975423 -0.04074358  0.08467505 -0.0911501   0.2300123  -0.07250293]] prob

DEBUG:chainerrl.agents.a3c:t:10997 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.11421784  0.00952312  0.07902464 -0.03521932  0.13447444 -0.05183805]] probs:[[0.17874147 0.16097446 0.17256038 0.15393081 0.18239908 0.15139382]] entropy:[1.789189]
DEBUG:chainerrl.agents.a3c:pi_loss:[-1.4934577] v_loss:[[7.452028e-05]]
DEBUG:chainerrl.agents.a3c:grad norm:0.1577099539984408
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:10998 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.1033601   0.02077651  0.07818969 -0.01324527  0.11845708 -0.06330444]] probs:[[0.17706919 0.1630337  0.1726679  0.1575803  0.17976268 0.14988615]] entropy:[1.7896659]
DEBUG:chainerrl.agents.a3c:t:10999 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.10327841  0.02078282  0.07806878 -0.01333134  0.1183972  -0.06329233]] probs:[[0.1770648  0.16304402 0.17265686 0.15757571 0.17976215 0.1498965 ]] entropy:[1.7896681]
DEBUG:chainerrl.agen

DEBUG:chainerrl.agents.a3c:t:11025 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.08912528  0.01345375  0.03686509 -0.01509283  0.06425339 -0.01226853]] probs:[[0.1767958  0.16391103 0.16779368 0.15929809 0.17245278 0.15974863]] entropy:[1.7910208]
DEBUG:chainerrl.agents.a3c:t:11026 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.09498179  0.00756837  0.03016627 -0.01704704  0.05428895 -0.0125834 ]] probs:[[0.17838989 0.16345835 0.16719422 0.15948386 0.17127642 0.16019735]] entropy:[1.7909772]
DEBUG:chainerrl.agents.a3c:t:11027 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.11208828  0.00015872  0.01772287 -0.01641198  0.03223326 -0.01486277]] probs:[[0.18223242 0.16293532 0.16582243 0.16025762 0.16824612 0.16050608]] entropy:[1.7907624]
DEBUG:chainerrl.agents.a3c:t:11028 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.11460792  0.00317647  0.01925526 -0.01541225  0.0327465  -0.01432879]] prob

DEBUG:chainerrl.agents.a3c:pi_loss:[-1.5539008] v_loss:[[0.00017455]]
DEBUG:chainerrl.agents.a3c:grad norm:0.30381136216177584
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:11054 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.04658452 -0.0388463   0.06737264  0.02564708 -0.00375136  0.03607303]] probs:[[0.17068177 0.15670578 0.17426707 0.16714528 0.162303   0.16889706]] entropy:[1.7911639]
DEBUG:chainerrl.agents.a3c:t:11055 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.04743327 -0.03735675  0.06853389  0.02630937 -0.00304977  0.03618159]] probs:[[0.17068607 0.15681018 0.17432593 0.16711834 0.16228321 0.16877633]] entropy:[1.7911687]
DEBUG:chainerrl.agents.a3c:t:11056 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.04703838 -0.03804852  0.0679308   0.02597832 -0.00337775  0.03615094]] probs:[[0.17068604 0.15676361 0.1742896  0.16712898 0.16229403 0.1688378 ]] entropy:[1.791167]
DEBUG:chainerrl.agent

DEBUG:chainerrl.agents.a3c:t:11082 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.04612495 0.00300676 0.05720308 0.03407933 0.02970316 0.00086904]] probs:[[0.16959415 0.16243696 0.17148338 0.16756353 0.16683185 0.1620901 ]] entropy:[1.7915449]
DEBUG:chainerrl.agents.a3c:t:11083 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.04806056 0.00438702 0.05410956 0.03467598 0.01274851 0.00981038]] probs:[[0.17013125 0.16286093 0.1711635  0.16786928 0.16422841 0.1637466 ]] entropy:[1.7915719]
DEBUG:chainerrl.agents.a3c:t:11084 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.04613801 0.00359066 0.05639825 0.03567241 0.02727063 0.00030012]] probs:[[0.16964239 0.16257595 0.17139192 0.16787623 0.16647168 0.16204187]] entropy:[1.7915468]
DEBUG:chainerrl.agents.a3c:t:11085 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.04758818 0.00417369 0.05404336 0.03387771 0.01577339 0.00855447]] probs:[[0.17004554 0.1628210

DEBUG:chainerrl.agents.a3c:t:11110 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.17246589  0.04156401  0.0335121  -0.04655231 -0.00310756  0.0220395 ]] probs:[[0.19046909 0.16709925 0.16575919 0.15300517 0.15979895 0.16386837]] entropy:[1.7893894]
DEBUG:chainerrl.agents.a3c:t:11111 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.17248735  0.04178039  0.03309673 -0.04689311 -0.00603072  0.02335062]] probs:[[0.19053647 0.16719095 0.16574542 0.15300386 0.15938546 0.16413788]] entropy:[1.7893664]
DEBUG:chainerrl.agents.a3c:t:11112 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.1717177   0.04164609  0.03285919 -0.04778297 -0.00779299  0.02377296]] probs:[[0.19049574 0.16726147 0.16579819 0.15295278 0.15919329 0.16429853]] entropy:[1.7893611]
DEBUG:chainerrl.agents.a3c:t:11113 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.17003515  0.04269974  0.03236153 -0.04912529 -0.0119033   0.02364343]] prob

DEBUG:chainerrl.agents.a3c:t:11139 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.19231416  0.05350959  0.04258452 -0.11614266  0.15279062  0.01267688]] probs:[[0.19001512 0.1653888  0.16359174 0.13958125 0.18265152 0.15877154]] entropy:[1.786898]
DEBUG:chainerrl.agents.a3c:t:11140 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.1925924   0.05234031  0.04240621 -0.1162869   0.1521012   0.01252919]] probs:[[0.19013245 0.16525155 0.16361804 0.13960844 0.18258753 0.15880193]] entropy:[1.786894]
DEBUG:chainerrl.agents.a3c:t:11141 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.19256392  0.05230138  0.04236636 -0.11627823  0.1519468   0.01248036]] probs:[[0.19013716 0.1652539  0.16362023 0.13961709 0.18256907 0.15880263]] entropy:[1.7868968]
DEBUG:chainerrl.agents.a3c:pi_loss:[-1.67312] v_loss:[[0.00074538]]
DEBUG:chainerrl.agents.a3c:grad norm:2.1514242650909723
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3

DEBUG:chainerrl.agents.a3c:t:11167 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.11571777  0.08854335 -0.00933118  0.00816739  0.04375033 -0.0010922 ]] probs:[[0.17940465 0.17459507 0.15831628 0.16111098 0.16694699 0.15962604]] entropy:[1.7906415]
DEBUG:chainerrl.agents.a3c:t:11168 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.11688757  0.09556114 -0.01276683  0.01037363  0.04492882  0.00058786]] probs:[[0.17930664 0.17552316 0.15750277 0.16118994 0.16685726 0.15962027]] entropy:[1.7905617]
DEBUG:chainerrl.agents.a3c:t:11169 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.12341577  0.09438749 -0.01504492  0.00880829  0.04326685  0.00452257]] probs:[[0.18035291 0.17519283 0.15703283 0.1608236  0.16646193 0.16013582]] entropy:[1.7904806]
DEBUG:chainerrl.agents.a3c:t:11170 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.12325096  0.09514125 -0.01704561  0.00879178  0.04215136  0.00615142]] prob

DEBUG:chainerrl.agents.a3c:t:11196 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.1254813   0.25737286 -0.21970223 -0.00155304  0.06222216  0.18829478]] probs:[[0.14117236 0.2070242  0.12847838 0.15979788 0.17032102 0.19320612]] entropy:[1.7782221]
DEBUG:chainerrl.agents.a3c:t:11197 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.15527357  0.2484886  -0.24260284  0.02127148  0.07637094  0.1823243 ]] probs:[[0.13756749 0.20600013 0.12606347 0.16413014 0.1734274  0.1928114 ]] entropy:[1.7772319]
DEBUG:chainerrl.agents.a3c:pi_loss:[-1.749113] v_loss:[[0.00121514]]
DEBUG:chainerrl.agents.a3c:grad norm:2.618347719760332
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:11198 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.14472912  0.21071403 -0.22473164  0.02425551  0.10278884  0.15442403]] probs:[[0.13959487 0.19917576 0.128862   0.16529456 0.17879903 0.18827386]] entropy:[1.7800134]
DEBUG:chainerrl.agents.

DEBUG:chainerrl.agents.a3c:t:11224 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.0219874   0.19816533 -0.03652148  0.02450123 -0.05847983  0.11206542]] probs:[[0.15656698 0.19512427 0.15430789 0.16401741 0.15095647 0.17902702]] entropy:[1.7874417]
DEBUG:chainerrl.agents.a3c:t:11225 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.02287846  0.19759744 -0.03763681  0.02365367 -0.05928973  0.11258198]] probs:[[0.15652002 0.1951288  0.154227   0.16397534 0.15092345 0.17922543]] entropy:[1.7874136]
DEBUG:chainerrl.agents.a3c:t:11226 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.02567779  0.1962562  -0.04057175  0.02170561 -0.06046753  0.11303735]] probs:[[0.1563273  0.19517292 0.15401624 0.16391294 0.15098225 0.17958832]] entropy:[1.7873558]
DEBUG:chainerrl.agents.a3c:t:11227 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.0234339   0.19730566 -0.03841512  0.02318747 -0.05998844  0.11329641]] prob

DEBUG:chainerrl.agents.a3c:t:11253 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.00358043  0.12639673 -0.00165219  0.06344619 -0.01408112  0.05931005]] probs:[[0.1605921  0.18157773 0.15975396 0.17049967 0.15778069 0.1697959 ]] entropy:[1.79054]
DEBUG:chainerrl.agents.a3c:pi_loss:[-0.5034301] v_loss:[[0.01149013]]
DEBUG:chainerrl.agents.a3c:grad norm:17.897840213609125
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:11254 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.06766237  0.15616779  0.00892084  0.00854315 -0.00087819  0.00303584]] probs:[[0.17096186 0.18678269 0.16120856 0.16114768 0.15963659 0.16026263]] entropy:[1.7900791]
DEBUG:chainerrl.agents.a3c:t:11255 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.06997019  0.15713538  0.0108975   0.00949578 -0.00084227  0.00389129]] probs:[[0.17115298 0.18674107 0.16133535 0.16110936 0.15945238 0.16020894]] entropy:[1.7900714]
DEBUG:chainerrl.agents.

DEBUG:chainerrl.agents.a3c:t:11281 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.01185605  0.17692894  0.02002542 -0.05759284  0.0402038   0.02804638]] probs:[[0.16218849 0.19129793 0.16351889 0.15130691 0.16685197 0.16483575]] entropy:[1.7891915]
DEBUG:chainerrl.agents.a3c:t:11282 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.01123348  0.1899714   0.01140009 -0.06157045  0.05456451  0.02601736]] probs:[[0.16168581 0.19332892 0.16171275 0.15033273 0.16884583 0.16409391]] entropy:[1.7887206]
DEBUG:chainerrl.agents.a3c:t:11283 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.0109562   0.19042102  0.01096169 -0.06189704  0.055183    0.02582745]] probs:[[0.16164172 0.19341673 0.16164261 0.15028432 0.16895105 0.1640635 ]] entropy:[1.7886972]
DEBUG:chainerrl.agents.a3c:t:11284 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.00985842  0.20579638 -0.0018839  -0.10035169  0.0615006   0.03622007]] prob

DEBUG:chainerrl.agents.a3c:pi_loss:[-1.8749528] v_loss:[[0.00241293]]
DEBUG:chainerrl.agents.a3c:grad norm:15.552634119110055
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:11310 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.24752814  0.10389312 -0.03884415  0.14108482  0.16915905  0.08557268]] probs:[[0.1243736  0.17674556 0.1532352  0.18344282 0.18866579 0.173537  ]] entropy:[1.7826672]
DEBUG:chainerrl.agents.a3c:t:11311 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.24756938  0.10374649 -0.03875267  0.1409451   0.16915692  0.0856095 ]] probs:[[0.12437304 0.17672613 0.15325484 0.1834239  0.18867232 0.17354976]] entropy:[1.7826704]
DEBUG:chainerrl.agents.a3c:t:11312 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.24769336  0.1036918  -0.03894299  0.14108635  0.16910112  0.08565016]] probs:[[0.12436158 0.17672211 0.15323056 0.18345566 0.1886678  0.17356233]] entropy:[1.7826622]
DEBUG:chainerrl.agent

DEBUG:chainerrl.agents.a3c:t:11338 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.11301968  0.05638098 -0.0412121   0.07641705 -0.01677023 -0.01268021]] probs:[[0.14983088 0.17748885 0.16098557 0.1810809  0.16496883 0.16564494]] entropy:[1.7898191]
DEBUG:chainerrl.agents.a3c:t:11339 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.10885692  0.07068002 -0.04500214  0.09423419 -0.02843286 -0.02070339]] probs:[[0.15006708 0.17957972 0.15996213 0.1838598  0.16263467 0.16389662]] entropy:[1.7893493]
DEBUG:chainerrl.agents.a3c:t:11340 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.10693343  0.07218204 -0.04245508  0.09451006 -0.02855563 -0.02212711]] probs:[[0.15024117 0.1797123  0.1602476  0.18377005 0.1624905  0.16353844]] entropy:[1.789368]
DEBUG:chainerrl.agents.a3c:t:11341 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.10546024  0.0753245  -0.03667506  0.0877066  -0.02585877 -0.03144757]] probs

DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:11366 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.0699338   0.1504338  -0.08939456  0.18347114  0.17017297 -0.47802898]] probs:[[0.17420569 0.18880916 0.14854804 0.19515109 0.19257313 0.1007129 ]] entropy:[1.769714]
DEBUG:chainerrl.agents.a3c:t:11367 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.06993926  0.15040049 -0.08938845  0.18348658  0.17016813 -0.4780251 ]] probs:[[0.17420699 0.18880324 0.14854924 0.19515449 0.19257256 0.1007135 ]] entropy:[1.7697147]
DEBUG:chainerrl.agents.a3c:t:11368 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.06974041  0.15043862 -0.08948049  0.18345194  0.17047788 -0.47793418]] probs:[[0.1741687  0.18880649 0.14853247 0.19514365 0.19262819 0.10072055]] entropy:[1.7697113]
DEBUG:chainerrl.agents.a3c:t:11369 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.06944387  0.15051232 -0.08964577  0.183430

DEBUG:chainerrl.agents.a3c:t:11395 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.08121911  0.119912   -0.03523664  0.0596862   0.08184259 -0.32307526]] probs:[[0.17933072 0.18640552 0.1596168  0.17551048 0.17944255 0.11969386]] entropy:[1.7819582]
DEBUG:chainerrl.agents.a3c:t:11396 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.08121509  0.1199064  -0.03523669  0.05968329  0.0818417  -0.32307565]] probs:[[0.17933044 0.18640494 0.15961719 0.1755104  0.17944284 0.11969411]] entropy:[1.7819583]
DEBUG:chainerrl.agents.a3c:t:11397 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.08588465  0.10531009 -0.03752049  0.04208711  0.07840218 -0.31632137]] probs:[[0.18109195 0.18464413 0.16006817 0.17333174 0.179742   0.12112207]] entropy:[1.7825694]
DEBUG:chainerrl.agents.a3c:pi_loss:[-1.3783396] v_loss:[[3.623696e-05]]
DEBUG:chainerrl.agents.a3c:grad norm:1.4596049479851634
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.age

DEBUG:chainerrl.agents.a3c:t:11423 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.00162676  0.12502939  0.00254023 -0.00473679  0.07749006 -0.21169055]] probs:[[0.16586608 0.18826242 0.16655868 0.16535103 0.17952195 0.13443986]] entropy:[1.786588]
DEBUG:chainerrl.agents.a3c:t:11424 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.02832855  0.15247665  0.02224795  0.04547191  0.10605255 -0.17999119]] probs:[[0.15798393 0.18929346 0.16617972 0.17008424 0.18070655 0.13575213]] entropy:[1.7863846]
DEBUG:chainerrl.agents.a3c:t:11425 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.02421531  0.1647495   0.01687383  0.0417835   0.12082659 -0.18384182]] probs:[[0.15805982 0.19093601 0.16468963 0.16884352 0.18273106 0.13474   ]] entropy:[1.7857957]
DEBUG:chainerrl.agents.a3c:t:11426 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.01811606  0.16229747  0.01320309  0.04195109  0.11185488 -0.18472062]] probs

DEBUG:chainerrl.agents.a3c:t:11452 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.01182058  0.15373446  0.09242824  0.06224379  0.11070107 -0.13030577]] probs:[[0.15975499 0.18411402 0.17316571 0.1680169  0.17635903 0.13858932]] entropy:[1.7878139]
DEBUG:chainerrl.agents.a3c:t:11453 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.0218149   0.13345325  0.0731104   0.06205237  0.11448748 -0.12903126]] probs:[[0.16210389 0.18124974 0.17063606 0.16875955 0.17784458 0.1394061 ]] entropy:[1.7882875]
DEBUG:chainerrl.agents.a3c:pi_loss:[-1.533757] v_loss:[[0.00030302]]
DEBUG:chainerrl.agents.a3c:grad norm:2.3573066946240377
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:11454 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.02654284  0.1320839   0.07940082  0.0484257   0.09089539 -0.10811322]] probs:[[0.16318004 0.1813439  0.17203744 0.16679025 0.17402634 0.14262204]] entropy:[1.7890244]
DEBUG:chainerrl.agents

DEBUG:chainerrl.agents.a3c:t:11480 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.04088276  0.09090072  0.0335835   0.04445649  0.10452677 -0.05569256]] probs:[[0.15506114 0.17690325 0.16704875 0.168875   0.17933024 0.15278164]] entropy:[1.789958]
DEBUG:chainerrl.agents.a3c:t:11481 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.03681766  0.08628388  0.03580709  0.04020293  0.1050702  -0.05634625]] probs:[[0.1557754  0.17618187 0.16750948 0.16824745 0.17952296 0.15276283]] entropy:[1.7900391]
DEBUG:chainerrl.agents.a3c:t:11482 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.03358323  0.07925174  0.03491284  0.04259418  0.10120323 -0.05018258]] probs:[[0.15631525 0.17498668 0.16739745 0.16868824 0.17887037 0.15374194]] entropy:[1.7902619]
DEBUG:chainerrl.agents.a3c:t:11483 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.03423359  0.07768707  0.03658852  0.04407195  0.09676471 -0.05416677]] probs

DEBUG:chainerrl.agents.a3c:t:11509 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.07541916 0.1305181  0.0055671  0.03028313 0.10323387 0.08401492]] probs:[[0.16717145 0.1766409  0.1558927  0.15979375 0.17188655 0.16861461]] entropy:[1.790874]
DEBUG:chainerrl.agents.a3c:pi_loss:[-1.5724996] v_loss:[[0.00019724]]
DEBUG:chainerrl.agents.a3c:grad norm:0.8220362248736466
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:11510 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.09578525  0.04958709  0.06305756  0.06037567  0.04041198 -0.01379854]] probs:[[0.17451386 0.16663504 0.16889487 0.16844253 0.16511314 0.15640056]] entropy:[1.791223]
DEBUG:chainerrl.agents.a3c:t:11511 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.10032972  0.04823589  0.03939026  0.05772479  0.03806673 -0.0064185 ]] probs:[[0.17584589 0.16691992 0.16544992 0.16851135 0.1652311  0.15804183]] entropy:[1.7912632]
DEBUG:chainerrl.agents.a3c:t:

DEBUG:chainerrl.agents.a3c:t:11537 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.09903356  0.11743839  0.00484205  0.15448979  0.04299884 -0.04203777]] probs:[[0.1724245  0.17562734 0.156925   0.18225662 0.16302845 0.14973815]] entropy:[1.7894963]
DEBUG:chainerrl.agents.a3c:t:11538 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.1023747   0.12148079  0.01340653  0.15817203  0.0360382  -0.0519916 ]] probs:[[0.17288111 0.17621596 0.15816456 0.1828016  0.16178489 0.14815186]] entropy:[1.789258]
DEBUG:chainerrl.agents.a3c:t:11539 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.10375989  0.11935785  0.01748739  0.16229801  0.0318004  -0.05585943]] probs:[[0.1731186  0.17584006 0.15880935 0.18355513 0.16109873 0.1475781 ]] entropy:[1.7891405]
DEBUG:chainerrl.agents.a3c:t:11540 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.09802844  0.12541062  0.01025633  0.17549048  0.03161021 -0.06648609]] probs

DEBUG:chainerrl.agents.a3c:pi_loss:[-1.5997938] v_loss:[[0.0005975]]
DEBUG:chainerrl.agents.a3c:grad norm:2.3763336374347057
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:11566 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.06865247  0.12794547  0.05439142  0.00299222  0.2238106   0.02683984]] probs:[[0.14572193 0.17738067 0.1648019  0.1565452  0.19522704 0.1603233 ]] entropy:[1.7872826]
DEBUG:chainerrl.agents.a3c:t:11567 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.05682444  0.13665803  0.06965476  0.01255038  0.21954785  0.06544503]] probs:[[0.14559159 0.17667075 0.16522108 0.15605058 0.191939   0.16452701]] entropy:[1.7878811]
DEBUG:chainerrl.agents.a3c:t:11568 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.05572867  0.13717052  0.06957743  0.01430582  0.21767657  0.06413274]] probs:[[0.14576039 0.17677243 0.16521871 0.1563346  0.19159222 0.16432159]] entropy:[1.7879618]
DEBUG:chainerrl.agents

DEBUG:chainerrl.agents.a3c:t:11594 r:0.05 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.03096524  0.0373683   0.06364343 -0.03883366  0.12251861  0.01006872]] probs:[[0.16535982 0.16642202 0.17085274 0.15421149 0.18121374 0.16194023]] entropy:[1.7905397]
DEBUG:chainerrl.agents.a3c:t:11595 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.01758519  0.0548191   0.06307809 -0.02489604  0.13464573  0.00887902]] probs:[[0.16237965 0.16853964 0.16993739 0.15562603 0.18254517 0.16097209]] entropy:[1.7904556]
DEBUG:chainerrl.agents.a3c:t:11596 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.02334526  0.03863577  0.03992431 -0.01045572  0.10836224 -0.00469376]] probs:[[0.16501717 0.16755976 0.16777581 0.15953265 0.17966007 0.16045451]] entropy:[1.7909791]
DEBUG:chainerrl.agents.a3c:t:11597 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.03264921  0.04358292  0.03751738 -0.01060097  0.10765155 -0.01355371]] pro

DEBUG:chainerrl.agents.a3c:grad norm:89.36856469368422
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:11622 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.06366851  0.02667702 -0.10758065  0.15768747  0.0531038   0.07797255]] probs:[[0.15208237 0.16646211 0.1455486  0.18976344 0.17091982 0.17522368]] entropy:[1.7879024]
DEBUG:chainerrl.agents.a3c:t:11623 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.05077577  0.04403671 -0.1338695   0.1410715  -0.05302087  0.06031916]] probs:[[0.15756814 0.17323871 0.14500442 0.1908915  0.15721478 0.17608254]] entropy:[1.7876903]
DEBUG:chainerrl.agents.a3c:t:11624 r:0.15 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.05690984  0.0368478  -0.13830365  0.15303443 -0.0300918   0.06469063]] probs:[[0.15600094 0.17133482 0.14380644 0.19244422 0.16024119 0.1761723 ]] entropy:[1.7874116]
DEBUG:chainerrl.agents.a3c:t:11625 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.

DEBUG:chainerrl.agents.a3c:t:11650 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.00878274 -0.01898452  0.06278831  0.11329728  0.00212389  0.1349373 ]] probs:[[0.15958896 0.15521857 0.16844463 0.17717113 0.15852982 0.18104689]] entropy:[1.7900655]
DEBUG:chainerrl.agents.a3c:t:11651 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.03031261 -0.02806323  0.05457707  0.10719975  0.01466287  0.16429462]] probs:[[0.16192703 0.15274502 0.16590416 0.17486827 0.15941264 0.18514287]] entropy:[1.7897444]
DEBUG:chainerrl.agents.a3c:t:11652 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.06029856 -0.03680103  0.06717145  0.06493629  0.03099502  0.15088336]] probs:[[0.16708587 0.15162468 0.1682382  0.16786256 0.1622607  0.18292801]] entropy:[1.7902197]
DEBUG:chainerrl.agents.a3c:t:11653 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.08828083 -0.03033196  0.00728483  0.07891342  0.02531493  0.15087263]] prob

DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:11678 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.06292648 -0.01357299  0.09011975  0.05082958 -0.0062023   0.02006004]] probs:[[0.17143421 0.15880866 0.17616002 0.16937287 0.15998352 0.1642407 ]] entropy:[1.791063]
DEBUG:chainerrl.agents.a3c:t:11679 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.05424845 -0.05448836  0.0740385   0.06012102 -0.01372505 -0.00245645]] probs:[[0.17235433 0.15459605 0.17579919 0.17336947 0.16102809 0.16285291]] entropy:[1.7906963]
DEBUG:chainerrl.agents.a3c:t:11680 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.0645592  -0.0115988   0.08100649  0.05399887 -0.01855052  0.02395274]] probs:[[0.17202213 0.15940772 0.17487483 0.17021509 0.15830341 0.16517685]] entropy:[1.7910573]
DEBUG:chainerrl.agents.a3c:t:11681 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.06800411 -0.01425852  0.08673131  0.048107

DEBUG:chainerrl.agents.a3c:t:11707 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.09210263  0.0177905   0.09539897 -0.00927672  0.04276873  0.04531161]] probs:[[0.17417282 0.1616989  0.1747479  0.15738085 0.16578871 0.16621082]] entropy:[1.7910588]
DEBUG:chainerrl.agents.a3c:t:11708 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.09935017  0.03107306  0.09789035 -0.00890405  0.05137341  0.04678736]] probs:[[0.17446244 0.16294819 0.17420793 0.15656249 0.1662899  0.16552903]] entropy:[1.7910529]
DEBUG:chainerrl.agents.a3c:t:11709 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.1156147   0.05393085  0.08523413 -0.00357162  0.07074144  0.03941766]] probs:[[0.17603616 0.16550569 0.17076851 0.15625717 0.16831146 0.16312103]] entropy:[1.7910686]
DEBUG:chainerrl.agents.a3c:pi_loss:[-1.594398] v_loss:[[0.00027617]]
DEBUG:chainerrl.agents.a3c:grad norm:0.7259215484113719
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents

DEBUG:chainerrl.agents.a3c:t:11735 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.06479968 0.02033042 0.14256546 0.0353682  0.04481745 0.0253054 ]] probs:[[0.16807109 0.16076083 0.1816629  0.1631966  0.16474599 0.1615626 ]] entropy:[1.790866]
DEBUG:chainerrl.agents.a3c:t:11736 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.06307513 0.02160137 0.13568139 0.03817145 0.04165135 0.03442996]] probs:[[0.16776691 0.1609513  0.18040095 0.1636405  0.16421095 0.16302937]] entropy:[1.7910199]
DEBUG:chainerrl.agents.a3c:t:11737 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.03810753 -0.05174147  0.20280823  0.07988764 -0.00745939 -0.05418845]] probs:[[0.15626754 0.15415145 0.19883698 0.17583829 0.161131   0.15377472]] entropy:[1.7871709]
DEBUG:chainerrl.agents.a3c:t:11738 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.0390596  -0.06991421  0.2182749   0.0789235  -0.01839647 -0.07074329]] probs:[[0.1567887

DEBUG:chainerrl.agents.a3c:t:11764 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.08128607 0.05449296 0.14050938 0.0631476  0.11969674 0.1098211 ]] probs:[[0.16434653 0.16000165 0.17437367 0.1613924  0.170782   0.16910371]] entropy:[1.7912802]
DEBUG:chainerrl.agents.a3c:t:11765 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.07324589 0.07991804 0.12582791 0.04656421 0.10084698 0.13762662]] probs:[[0.16316305 0.16425534 0.17197207 0.15886715 0.16772926 0.17401314]] entropy:[1.7912729]
DEBUG:chainerrl.agents.a3c:pi_loss:[-1.2594135] v_loss:[[0.00128828]]
DEBUG:chainerrl.agents.a3c:grad norm:52.06683116088311
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:11766 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.00076073  0.18803003  0.03984585  0.12055861  0.06505117  0.14119019]] probs:[[0.15154357 0.18303256 0.15782389 0.17109048 0.16185245 0.17465702]] entropy:[1.7897172]
DEBUG:chainerrl.agents.a3c:t:11767

DEBUG:chainerrl.agents.a3c:t:11792 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-2.5580093e-01  3.2750157e-01  1.6862214e-01 -1.5079048e-01
  -7.8510724e-02 -2.9229710e-04]] probs:[[0.12631889 0.2263567  0.19310473 0.14030519 0.15082191 0.16309261]] entropy:[1.7718089]
DEBUG:chainerrl.agents.a3c:t:11793 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.25114262  0.33913094  0.17446825 -0.13149244 -0.10266044  0.03038688]] probs:[[0.12582833 0.22705492 0.1925834  0.14182141 0.14596991 0.16674203]] entropy:[1.771261]
DEBUG:chainerrl.agents.a3c:t:11794 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.25729814  0.31741577  0.20773031 -0.16266221 -0.03605537  0.08749982]] probs:[[0.12304252 0.2186001  0.19589104 0.13525556 0.15351109 0.1736997 ]] entropy:[1.7718433]
DEBUG:chainerrl.agents.a3c:t:11795 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.21640514  0.36833957  0.19638155 -0.178588   -0.01823019

DEBUG:chainerrl.agents.a3c:t:11821 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.07667539  0.08172405  0.14105667 -0.03573438 -0.04904227  0.10595339]] probs:[[0.14958717 0.17526142 0.18597482 0.15583852 0.15377836 0.17955975]] entropy:[1.7881997]
DEBUG:chainerrl.agents.a3c:pi_loss:[-1.5565947] v_loss:[[0.00136689]]
DEBUG:chainerrl.agents.a3c:grad norm:9.387742554743198
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:11822 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.03550087  0.07069845  0.08003722  0.00064285 -0.04086664  0.09978882]] probs:[[0.15598574 0.17346293 0.17509045 0.16172677 0.155151   0.17858315]] entropy:[1.7901661]
DEBUG:chainerrl.agents.a3c:t:11823 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.07064249  0.07521923  0.05684041 -0.01460916 -0.03922682  0.08469341]] probs:[[0.15265779 0.17663059 0.17341398 0.1614559  0.15752977 0.17831196]] entropy:[1.7899916]
DEBUG:chainerrl.agents

DEBUG:chainerrl.agents.a3c:t:11849 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.18275182  0.1198978  -0.08222201  0.00200544  0.1594096   0.02592163]] probs:[[0.18612508 0.17478645 0.14280002 0.15534875 0.18183084 0.15910889]] entropy:[1.7874367]
DEBUG:chainerrl.agents.a3c:t:11850 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.18063214  0.13847107 -0.11444931  0.01570968  0.14875533 -0.01031076]] probs:[[0.18705745 0.17933485 0.13925885 0.15861714 0.1811887  0.15454309]] entropy:[1.7864374]
DEBUG:chainerrl.agents.a3c:t:11851 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.16808996  0.12490099 -0.08231805  0.03962891  0.14227277  0.04575795]] probs:[[0.18264744 0.174927   0.14218794 0.16062889 0.17799234 0.16161641]] entropy:[1.7883582]
DEBUG:chainerrl.agents.a3c:t:11852 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.1751041   0.13119441 -0.05356491  0.01556156  0.1280602   0.01464037]] prob

DEBUG:chainerrl.agents.a3c:pi_loss:[-2.2323205] v_loss:[[0.009141]]
DEBUG:chainerrl.agents.a3c:grad norm:108.66278928253632
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:11878 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.07348846  0.09686569  0.0555886   0.10101346 -0.16503403 -0.00764783]] probs:[[0.17410447 0.17822249 0.17101574 0.17896324 0.13715792 0.16053616]] entropy:[1.7878093]
DEBUG:chainerrl.agents.a3c:t:11879 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.098625    0.09383064  0.04799987  0.10900775 -0.18158607 -0.03605063]] probs:[[0.17902179 0.17816554 0.17018436 0.1808902  0.13527319 0.15646492]] entropy:[1.7868164]
DEBUG:chainerrl.agents.a3c:t:11880 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.07575081  0.1252406   0.07516651  0.12507248 -0.14728706 -0.00545929]] probs:[[0.17173888 0.18045203 0.17163856 0.1804217  0.13740563 0.15834326]] entropy:[1.7875569]
DEBUG:chainerrl.agents.

DEBUG:chainerrl.agents.a3c:t:11906 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.10940865 0.00259243 0.08267856 0.05622445 0.17841147 0.31264296]] probs:[[0.16347031 0.14690927 0.15915862 0.15500341 0.1751485  0.2003099 ]] entropy:[1.7865186]
DEBUG:chainerrl.agents.a3c:t:11907 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.10680897 -0.01161376  0.07594754  0.05967069  0.1659732   0.31951052]] probs:[[0.16367011 0.14539151 0.15869616 0.156134   0.17364572 0.20246245]] entropy:[1.7860397]
DEBUG:chainerrl.agents.a3c:t:11908 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.09781462 -0.0161108   0.08268198  0.05509637  0.16446362  0.32364532]] probs:[[0.16239642 0.1449103  0.15995744 0.15560521 0.17358881 0.20354174]] entropy:[1.785756]
DEBUG:chainerrl.agents.a3c:t:11909 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.10937257 -0.03678738  0.09853062  0.04351898  0.16335467  0.30948022]] probs:[[0.1

DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:11934 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.20737356  0.10446545  0.06246859 -0.05851062  0.12468408  0.28470415]] probs:[[0.18067245 0.16300446 0.15630054 0.13849047 0.16633373 0.19519836]] entropy:[1.7859745]
DEBUG:chainerrl.agents.a3c:t:11935 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.19020732  0.11821536  0.05578488 -0.06293386  0.13862096  0.29315966]] probs:[[0.17732769 0.16501023 0.15502353 0.13766982 0.16841194 0.19655685]] entropy:[1.7857757]
DEBUG:chainerrl.agents.a3c:t:11936 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.18252918  0.11978902  0.05882543 -0.0612395   0.13719624  0.291925  ]] probs:[[0.17612514 0.16541453 0.1556315  0.1380238  0.16831914 0.19648588]] entropy:[1.7859725]
DEBUG:chainerrl.agents.a3c:t:11937 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.19007064  0.11821015  0.05574156 -0.06298

DEBUG:chainerrl.agents.a3c:t:11963 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.07997765 0.09960128 0.14415137 0.02818916 0.06263535 0.15106109]] probs:[[0.16414694 0.16739991 0.17502621 0.15586239 0.16132478 0.17623977]] entropy:[1.7908181]
DEBUG:chainerrl.agents.a3c:t:11964 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.07993296 0.10233035 0.14566262 0.02943324 0.06349021 0.15120448]] probs:[[0.16396381 0.1676776  0.17510317 0.15588929 0.16128984 0.17607626]] entropy:[1.7908196]
DEBUG:chainerrl.agents.a3c:t:11965 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.07997189 0.09958523 0.14414456 0.02818491 0.06261589 0.15106091]] probs:[[0.1641474  0.16739868 0.17502654 0.15586308 0.16132306 0.17624128]] entropy:[1.790818]
DEBUG:chainerrl.agents.a3c:pi_loss:[-1.4813356] v_loss:[[2.9318697e-05]]
DEBUG:chainerrl.agents.a3c:grad norm:0.14024121217073263
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:11966 r

DEBUG:chainerrl.agents.a3c:t:11992 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.07291856 0.08850399 0.14869727 0.02081627 0.08262903 0.14159395]] probs:[[0.16327839 0.16584308 0.17613229 0.15498903 0.16487162 0.17488559]] entropy:[1.7908306]
DEBUG:chainerrl.agents.a3c:t:11993 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.07282837 0.0882033  0.14854412 0.02098618 0.0820678  0.14090714]] probs:[[0.16330904 0.1658393  0.17615426 0.15505844 0.16482492 0.17481409]] entropy:[1.7908382]
DEBUG:chainerrl.agents.a3c:t:11994 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.06719164 0.02893221 0.12909172 0.02087244 0.07962517 0.12392107]] probs:[[0.16523635 0.15903391 0.1757877  0.1577573  0.16730365 0.17488112]] entropy:[1.7908878]
DEBUG:chainerrl.agents.a3c:t:11995 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.06455068 -0.00669858  0.12868606  0.01948895  0.06708977  0.11714195]] probs:[[0.16639084 0.1

DEBUG:chainerrl.agents.a3c:t:12021 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.11150514  0.02660531  0.18667078 -0.04798396  0.07554363  0.26379374]] probs:[[0.13846204 0.1589686  0.18656358 0.14754267 0.16694175 0.2015213 ]] entropy:[1.7833458]
DEBUG:chainerrl.agents.a3c:pi_loss:[-1.7646456] v_loss:[[0.00155673]]
DEBUG:chainerrl.agents.a3c:grad norm:41.566445973733124
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:12022 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.0521835   0.11137403  0.12316723 -0.08092194 -0.05398676  0.18189318]] probs:[[0.16535257 0.17543533 0.17751653 0.14474516 0.14869687 0.18825354]] entropy:[1.7873256]
DEBUG:chainerrl.agents.a3c:t:12023 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.00531661  0.11084547  0.12578772 -0.0917142  -0.0476253   0.16750152]] probs:[[0.15803488 0.17750128 0.18017346 0.14495425 0.1514881  0.18784814]] entropy:[1.7871686]
DEBUG:chainerrl.agent

DEBUG:chainerrl.agents.a3c:t:12049 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.19458404  0.342233    0.00172426  0.20534918  0.18279636  0.21031925]] probs:[[0.11937736 0.20420137 0.14527056 0.1780782  0.174107   0.17896546]] entropy:[1.7779416]
DEBUG:chainerrl.agents.a3c:t:12050 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.20204136  0.31925908 -0.01827093  0.17485085  0.16153003  0.26134655]] probs:[[0.11945134 0.2011819  0.14354944 0.17412986 0.17182568 0.18986191]] entropy:[1.7775114]
DEBUG:chainerrl.agents.a3c:t:12051 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.22613186  0.30100065 -0.0072197   0.18193129  0.18352327  0.26892322]] probs:[[0.11642015 0.1972236  0.14491071 0.17508452 0.17536347 0.19099757]] entropy:[1.7770257]
DEBUG:chainerrl.agents.a3c:t:12052 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.22322181  0.30570927  0.0025747   0.19365747  0.17827019  0.29194278]] prob

DEBUG:chainerrl.agents.a3c:pi_loss:[-1.7451594] v_loss:[[0.0011975]]
DEBUG:chainerrl.agents.a3c:grad norm:179.09778420846328
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:12078 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.07453682  0.09613746 -0.01713187  0.00619839  0.11745939  0.05005511]] probs:[[0.1498436  0.17773016 0.15869705 0.16244303 0.1815604  0.16972578]] entropy:[1.7895991]
DEBUG:chainerrl.agents.a3c:t:12079 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.07709127  0.14622375 -0.0359331   0.00391829  0.22181766  0.04597713]] probs:[[0.14585847 0.18235436 0.15198699 0.15816619 0.19667363 0.16496035]] entropy:[1.7862387]
DEBUG:chainerrl.agents.a3c:t:12080 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.05734172  0.13159257 -0.03353617  0.00101175  0.18629785  0.04608864]] probs:[[0.14976797 0.18091394 0.15337604 0.15876746 0.1910866  0.16608797]] entropy:[1.7878366]
DEBUG:chainerrl.agents

DEBUG:chainerrl.agents.a3c:t:12106 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.22161037 -0.01810007 -0.1184694   0.7051728  -0.03320749  0.04822608]] probs:[[0.1193633  0.14630333 0.13233185 0.30155575 0.14410968 0.1563361 ]] entropy:[1.7333478]
DEBUG:chainerrl.agents.a3c:t:12107 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.19020697 -0.01691723 -0.11134711  0.7101627  -0.04549371  0.05551691]] probs:[[0.12245851 0.14562896 0.13250656 0.3013107  0.1415263  0.1565689 ]] entropy:[1.7340529]
DEBUG:chainerrl.agents.a3c:t:12108 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.10913439 -0.00700217 -0.09014668  0.7361555  -0.02810273  0.06284444]] probs:[[0.12940948 0.14332487 0.13189015 0.30134973 0.1403323  0.15369351]] entropy:[1.7351002]
DEBUG:chainerrl.agents.a3c:t:12109 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.08249956 -0.01597021 -0.04581336  0.75401855 -0.06041462  0.04464429]] prob

DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:12134 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.01818158  0.24462697  0.19229329  0.1205162   0.04738649  0.1313444 ]] probs:[[0.14466195 0.1881441  0.17855105 0.1661843  0.15446502 0.16799356]] entropy:[1.7880324]
DEBUG:chainerrl.agents.a3c:t:12135 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.03165592  0.25302088  0.1864545   0.13657865  0.01758854  0.11887009]] probs:[[0.14348862 0.19074406 0.1784603  0.16977777 0.15073152 0.16679771]] entropy:[1.7871822]
DEBUG:chainerrl.agents.a3c:t:12136 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.04519928  0.25060287  0.18490429  0.11004815  0.01542643  0.11584064]] probs:[[0.14269173 0.19180681 0.1796104  0.16665636 0.15161012 0.1676245 ]] entropy:[1.786944]
DEBUG:chainerrl.agents.a3c:t:12137 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.04095933  0.2592483   0.18465884  0.132851

INFO: outdir:result global_step:24803 local_step:12158 R:3.8999999999999995
INFO: statistics:[('average_value', 0.3258592203809394), ('average_entropy', 1.7861309182940674)]


DEBUG:chainerrl.agents.a3c:t:12159 r:0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.00378519  0.1722435   0.17202303  0.07264093 -0.06009094 -0.1555772 ]] probs:[[0.1605449  0.1900015  0.18995962 0.1719888  0.15061058 0.1368946 ]] entropy:[1.7848098]
DEBUG:chainerrl.agents.a3c:t:12160 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.00469285  0.19161324  0.15206331  0.04742289 -0.05248187 -0.14244229]] probs:[[0.15962306 0.1942452  0.18671274 0.16816252 0.15217423 0.13908228]] entropy:[1.785207]
DEBUG:chainerrl.agents.a3c:t:12161 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.00763179  0.19388591  0.14895934  0.0493784  -0.04820071 -0.13999243]] probs:[[0.15904042 0.19454746 0.18600054 0.1683708  0.15271746 0.1393234 ]] entropy:[1.7853038]
DEBUG:chainerrl.agents.a3c:t:12162 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.00828449  0.19427232  0.14880228  0.04999776 -0.04741384 -0.1395853 ]] probs:[

DEBUG:chainerrl.agents.a3c:t:12188 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.01261628  0.08788349  0.10175101  0.02123012 -0.03401127 -0.09323324]] probs:[[0.16226675 0.17942214 0.1819276  0.1678529  0.15883192 0.1496987 ]] entropy:[1.7894604]
DEBUG:chainerrl.agents.a3c:t:12189 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.01261629  0.08788412  0.10175012  0.02123002 -0.03401024 -0.09323329]] probs:[[0.16226673 0.17942224 0.18192744 0.16785286 0.15883209 0.14969869]] entropy:[1.7894607]
DEBUG:chainerrl.agents.a3c:t:12190 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.01275261  0.08770286  0.1015434   0.02113745 -0.03404653 -0.09343797]] probs:[[0.16226801 0.17941558 0.18191606 0.16786154 0.15884922 0.14968963]] entropy:[1.7894619]
DEBUG:chainerrl.agents.a3c:pi_loss:[-1.5704749] v_loss:[[0.00021628]]
DEBUG:chainerrl.agents.a3c:grad norm:0.6061172546585669
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agent

DEBUG:chainerrl.agents.a3c:t:12216 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.12158855 -0.06034404  0.15815847  0.09959029 -0.04564535 -0.14532092]] probs:[[0.18312033 0.15265964 0.18994097 0.17913598 0.15492012 0.140223  ]] entropy:[1.7857151]
DEBUG:chainerrl.agents.a3c:t:12217 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.11683989 -0.05893199  0.14881912  0.09299602 -0.04631751 -0.15280694]] probs:[[0.18312168 0.15360416 0.1890724  0.17880699 0.15555407 0.1398407 ]] entropy:[1.7859082]
DEBUG:chainerrl.agents.a3c:t:12218 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.12284257 -0.057055    0.14345723  0.09005786 -0.04595923 -0.1491586 ]] probs:[[0.18414643 0.15382779 0.18798195 0.17820714 0.15554412 0.14029258]] entropy:[1.7860805]
DEBUG:chainerrl.agents.a3c:t:12219 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.11913202 -0.05608349  0.14932005  0.09145499 -0.04699503 -0.14601581]] prob

DEBUG:chainerrl.agents.a3c:t:12245 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.04487767 -0.01260278  0.08394763  0.00338501 -0.01124938 -0.09854962]] probs:[[0.17375764 0.16405159 0.18068069 0.16669549 0.16427377 0.1505408 ]] entropy:[1.790193]
DEBUG:chainerrl.agents.a3c:t:12246 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.05486564 -0.01105928  0.08966812  0.00736127 -0.01217607 -0.08982658]] probs:[[0.17465213 0.16350953 0.18083747 0.16654937 0.16332702 0.15112449]] entropy:[1.790173]
DEBUG:chainerrl.agents.a3c:pi_loss:[-1.599906] v_loss:[[0.00032434]]
DEBUG:chainerrl.agents.a3c:grad norm:9.62672859788444
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:12247 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.02254894 -0.01649168 -0.00177856  0.05604966 -0.0532288  -0.07788171]] probs:[[0.17231825 0.16572046 0.16817676 0.17818882 0.15974285 0.15585287]] entropy:[1.7907612]
DEBUG:chainerrl.agents.a3c

DEBUG:chainerrl.agents.a3c:t:12273 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.0310378  -0.01338126  0.00733383  0.00711422 -0.05843147 -0.1486962 ]] probs:[[0.16784726 0.17083718 0.174413   0.1743747  0.1633117  0.14921616]] entropy:[1.7903697]
DEBUG:chainerrl.agents.a3c:t:12274 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.01552998 -0.01425915 -0.01929126 -0.00253784 -0.05691961 -0.15584421]] probs:[[0.17125826 0.17147604 0.17061532 0.17349778 0.16431463 0.148838  ]] entropy:[1.7904373]
DEBUG:chainerrl.agents.a3c:t:12275 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.01514753 -0.01445821 -0.01999561 -0.0027637  -0.05689617 -0.15614446]] probs:[[0.1713527  0.17147087 0.17052397 0.17348789 0.16434623 0.14881845]] entropy:[1.7904356]
DEBUG:chainerrl.agents.a3c:t:12276 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.05599257 -0.00560447  0.05559148  0.02677544 -0.06259105 -0.14165077]] prob

DEBUG:chainerrl.agents.a3c:t:12302 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.04677956 -0.00985272 -0.01334786  0.01741661 -0.06664789 -0.14748387]] probs:[[0.16604556 0.17229171 0.17169058 0.17705464 0.16277908 0.15013845]] entropy:[1.7903785]
DEBUG:chainerrl.agents.a3c:pi_loss:[-1.6280618] v_loss:[[0.00038982]]
DEBUG:chainerrl.agents.a3c:grad norm:1.4886463417993596
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:12303 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.04956847 -0.02423718 -0.03844176  0.01740791 -0.09105941 -0.12067834]] probs:[[0.16675471 0.17103279 0.16862051 0.17830585 0.15997748 0.1553086 ]] entropy:[1.7907618]
DEBUG:chainerrl.agents.a3c:t:12304 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.04784326 -0.02456159 -0.03216827  0.02159149 -0.09047916 -0.1172521 ]] probs:[[0.16659835 0.17052254 0.16923034 0.17857713 0.15964457 0.15542711]] entropy:[1.7907419]
DEBUG:chainerrl.agent

DEBUG:chainerrl.agents.a3c:t:12330 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.1608293  -0.06936379 -0.0447122   0.04227418  0.04580959 -0.08546814]] probs:[[0.14810607 0.16229151 0.16634198 0.18145944 0.18210211 0.15969884]] entropy:[1.7891479]
DEBUG:chainerrl.agents.a3c:t:12331 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.16081503 -0.06936412 -0.04473069  0.04227993  0.04582413 -0.08547368]] probs:[[0.14810792 0.16229118 0.16633861 0.18146016 0.18210444 0.15969768]] entropy:[1.7891479]
DEBUG:chainerrl.agents.a3c:t:12332 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.16082275 -0.06936422 -0.04472455  0.04228101  0.04582382 -0.08547174]] probs:[[0.14810672 0.16229111 0.16633958 0.1814603  0.18210432 0.15969795]] entropy:[1.7891475]
DEBUG:chainerrl.agents.a3c:t:12333 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.16082308 -0.06936162 -0.04472836  0.04228981  0.04582903 -0.08547129]] prob

DEBUG:chainerrl.agents.a3c:pi_loss:[-1.503854] v_loss:[[7.295541e-05]]
DEBUG:chainerrl.agents.a3c:grad norm:0.18925584855555017
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:12359 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.09714547 -0.06721558 -0.01433128  0.01677678 -0.0327076  -0.07082959]] probs:[[0.1579626  0.16276187 0.17160109 0.17702316 0.16847649 0.16217472]] entropy:[1.7910233]
DEBUG:chainerrl.agents.a3c:t:12360 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.0980534  -0.06808563 -0.01507968  0.01633118 -0.03314627 -0.07079618]] probs:[[0.1579078  0.16271158 0.17156892 0.17704357 0.16849709 0.16227113]] entropy:[1.7910213]
DEBUG:chainerrl.agents.a3c:t:12361 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.07597613 -0.06746212 -0.03986907 -0.00096783  0.00693086 -0.07049675]] probs:[[0.16089727 0.16227299 0.16681296 0.17343006 0.17480534 0.16178131]] entropy:[1.7911966]
DEBUG:chainerrl.age

DEBUG:chainerrl.agents.a3c:t:12387 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.0364701  -0.10982899  0.00551835 -0.14519112 -0.10337952  0.12810573]] probs:[[0.16713177 0.15531008 0.17429878 0.14991395 0.15631498 0.19703045]] entropy:[1.787379]
DEBUG:chainerrl.agents.a3c:t:12388 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.03646174 -0.11082827  0.00558678 -0.14515461 -0.10350576  0.12773378]] probs:[[0.1671715  0.15519054 0.1743507  0.14995381 0.15633109 0.19700237]] entropy:[1.7873781]
DEBUG:chainerrl.agents.a3c:t:12389 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.03639062 -0.12040052  0.00520958 -0.14502731 -0.10407138  0.12595205]] probs:[[0.1675104  0.15401278 0.17462586 0.15026627 0.15654834 0.19703634]] entropy:[1.7873161]
DEBUG:chainerrl.agents.a3c:t:12390 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.03657206 -0.11653246  0.00583066 -0.14493135 -0.10374379  0.12629311]] probs

DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:12415 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.03194618 -0.15840253  0.06024492 -0.17949077 -0.09067098  0.08054925]] probs:[[0.1694156  0.14929116 0.1857768  0.14617583 0.15975319 0.18958743]] entropy:[1.7867734]
DEBUG:chainerrl.agents.a3c:t:12416 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.03415425 -0.15575986  0.05794288 -0.1834993  -0.09172761  0.07758383]] probs:[[0.1693333  0.14994422 0.1856691  0.145842   0.15985954 0.18935187]] entropy:[1.7868475]
DEBUG:chainerrl.agents.a3c:t:12417 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.03513777 -0.1550977   0.05656274 -0.18493152 -0.09182741  0.07649307]] probs:[[0.16929454 0.1501568  0.185553   0.1457432  0.15996425 0.18928821]] entropy:[1.7868822]
DEBUG:chainerrl.agents.a3c:t:12418 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.03104714 -0.15656544  0.06308868 -0.18029

DEBUG:chainerrl.agents.a3c:t:12444 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.03258729 -0.09961615  0.00724147 -0.12578191 -0.07014931  0.0395658 ]] probs:[[0.16878287 0.15784037 0.17564096 0.15376392 0.16256064 0.18141119]] entropy:[1.7900729]
DEBUG:chainerrl.agents.a3c:t:12445 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.03235063 -0.09191512  0.00047554 -0.12957834 -0.07653958  0.03308071]] probs:[[0.16928244 0.15949364 0.17493154 0.15359831 0.1619649  0.18072921]] entropy:[1.7902104]
DEBUG:chainerrl.agents.a3c:t:12446 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.0307819  -0.09240272  0.00215084 -0.12775137 -0.07641231  0.03422472]] probs:[[0.16938056 0.15925826 0.1750516  0.15372704 0.16182534 0.1807572 ]] entropy:[1.7901965]
DEBUG:chainerrl.agents.a3c:pi_loss:[-1.1374205] v_loss:[[0.00137116]]
DEBUG:chainerrl.agents.a3c:grad norm:4.688040320892535
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents

DEBUG:chainerrl.agents.a3c:t:12472 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.06939664 -0.01568149 -0.06470984 -0.05301508 -0.05710382 -0.03682226]] probs:[[0.16334857 0.1723628  0.16411597 0.16604652 0.16536897 0.16875717]] entropy:[1.791591]
DEBUG:chainerrl.agents.a3c:t:12473 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.07953175 -0.02094257 -0.07329667 -0.05766061 -0.05948871 -0.03345041]] probs:[[0.16244023 0.17224179 0.16345622 0.16603212 0.16572887 0.17010084]] entropy:[1.7915432]
DEBUG:chainerrl.agents.a3c:t:12474 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.06894099 -0.01491708 -0.06177789 -0.05266119 -0.05542446 -0.03492491]] probs:[[0.16320345 0.17226285 0.16437669 0.16588211 0.16542436 0.1688505 ]] entropy:[1.7915936]
DEBUG:chainerrl.agents.a3c:t:12475 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.07243104 -0.0139322  -0.06705832 -0.05589807 -0.05554415 -0.03892718]] probs

DEBUG:chainerrl.agents.a3c:t:12501 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.01621549 -0.02395642  0.0635363  -0.1005044  -0.06667235 -0.04410223]] probs:[[0.16898268 0.16767965 0.18301132 0.15532309 0.16066788 0.16433541]] entropy:[1.7904432]
DEBUG:chainerrl.agents.a3c:t:12502 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.01483743 -0.02381326  0.0641017  -0.10247061 -0.06819335 -0.04762352]] probs:[[0.16934548 0.16783229 0.18325527 0.15513687 0.16054672 0.16388334]] entropy:[1.7903891]
DEBUG:chainerrl.agents.a3c:pi_loss:[-1.492632] v_loss:[[4.0777966e-05]]
DEBUG:chainerrl.agents.a3c:grad norm:0.12990191350227992
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:12503 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.00067493 -0.02583735  0.05721308 -0.09787831 -0.06665623 -0.07539508]] probs:[[0.17222758 0.16794798 0.1824917  0.1562744  0.16123056 0.15982774]] entropy:[1.7903713]
DEBUG:chainerrl.ag

DEBUG:chainerrl.agents.a3c:t:12529 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.1203289  -0.0249483   0.07686332 -0.05642515 -0.06138483 -0.01041757]] probs:[[0.15241718 0.1676707  0.18564078 0.16247515 0.16167133 0.17012486]] entropy:[1.7899234]
DEBUG:chainerrl.agents.a3c:t:12530 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.12033096 -0.02496106  0.07693072 -0.05648876 -0.06138459 -0.01041631]] probs:[[0.15241687 0.16766857 0.1856533  0.16246483 0.16167136 0.17012508]] entropy:[1.7899219]
DEBUG:chainerrl.agents.a3c:t:12531 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.12032971 -0.02496253  0.07693995 -0.05649694 -0.06138471 -0.01041554]] probs:[[0.15241699 0.16766824 0.18565494 0.16246341 0.16167128 0.17012513]] entropy:[1.7899218]
DEBUG:chainerrl.agents.a3c:t:12532 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.12031528 -0.02496042  0.07694207 -0.05649399 -0.06138589 -0.01041142]] prob

DEBUG:chainerrl.agents.a3c:t:12558 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.06813016 -0.02407047  0.02483237 -0.08374015 -0.04231988 -0.00426991]] probs:[[0.16079584 0.16803886 0.17646067 0.1583053  0.16500005 0.17139927]] entropy:[1.7910788]
DEBUG:chainerrl.agents.a3c:pi_loss:[-1.4660228] v_loss:[[1.4967618e-05]]
DEBUG:chainerrl.agents.a3c:grad norm:0.04617982822841904
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:12559 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.05215075 -0.02531488  0.02683609 -0.08760764 -0.05144514 -0.00926424]] probs:[[0.16342328 0.16786827 0.17685507 0.15773033 0.16353865 0.17058441]] entropy:[1.791099]
DEBUG:chainerrl.agents.a3c:t:12560 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.05215094 -0.02531494  0.02683607 -0.08760782 -0.05144511 -0.00926403]] probs:[[0.16342326 0.16786826 0.17685506 0.1577303  0.16353863 0.17058444]] entropy:[1.7910988]
DEBUG:chainerrl.ag

DEBUG:chainerrl.agents.a3c:t:12586 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.02682548 -0.02177576 -0.00825759 -0.13030228 -0.06975809 -0.0496886 ]] probs:[[0.17062305 0.17148682 0.17382075 0.15385027 0.16345277 0.16676632]] entropy:[1.7909553]
DEBUG:chainerrl.agents.a3c:t:12587 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.02647355 -0.02213607 -0.00761156 -0.1310675  -0.06995124 -0.0500595 ]] probs:[[0.17070025 0.17144227 0.17395057 0.15374805 0.16343763 0.16672124]] entropy:[1.7909409]
DEBUG:chainerrl.agents.a3c:t:12588 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.02642406 -0.02219832 -0.00752779 -0.13118407 -0.06997302 -0.05012015]] probs:[[0.17071198 0.1714349  0.17396848 0.15373307 0.1634372  0.16671433]] entropy:[1.7909389]
DEBUG:chainerrl.agents.a3c:t:12589 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.02652907 -0.02215884 -0.00804626 -0.13041526 -0.06975573 -0.05064704]] prob

DEBUG:chainerrl.agents.a3c:pi_loss:[-1.4555281] v_loss:[[1.5351146e-05]]
DEBUG:chainerrl.agents.a3c:grad norm:0.03652725175414846
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:12615 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.07368919 -0.01318119 -0.01735638 -0.09075272 -0.04051484 -0.07593536]] probs:[[0.16300201 0.17316943 0.17244793 0.16024421 0.16850019 0.16263628]] entropy:[1.79131]
DEBUG:chainerrl.agents.a3c:t:12616 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.07344184 -0.01337912 -0.01736095 -0.09037434 -0.04050103 -0.07588811]] probs:[[0.16302995 0.17312202 0.17243405 0.16029269 0.16848972 0.16263162]] entropy:[1.7913148]
DEBUG:chainerrl.agents.a3c:t:12617 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.07349183 -0.01333866 -0.01736061 -0.09045134 -0.04050396 -0.0758963 ]] probs:[[0.16302429 0.17313166 0.17243673 0.16028279 0.1684918  0.16263276]] entropy:[1.7913138]
DEBUG:chainerrl.age

DEBUG:chainerrl.agents.a3c:t:12643 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.00489053 -0.03940779  0.03116192 -0.11765724 -0.05930254 -0.05357946]] probs:[[0.1725429  0.1666888  0.17887698 0.15414275 0.16340534 0.16434321]] entropy:[1.7906892]
DEBUG:chainerrl.agents.a3c:t:12644 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.00393614 -0.04007275  0.03379304 -0.11855461 -0.05757124 -0.04965286]] probs:[[0.17248054 0.16635895 0.1791124  0.15380199 0.16347323 0.16477284]] entropy:[1.790654]
DEBUG:chainerrl.agents.a3c:t:12645 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.0031628  -0.04093267  0.03647602 -0.11944982 -0.0561201  -0.04612348]] probs:[[0.17241502 0.16602437 0.17938662 0.15348725 0.16352195 0.1651648 ]] entropy:[1.7906145]
DEBUG:chainerrl.agents.a3c:t:12646 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.00158563 -0.04135771  0.03894264 -0.11960219 -0.0552509  -0.04284137]] probs

DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:12671 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.13461363 -0.06797777 -0.03439038 -0.10354549 -0.12203127 -0.08906718]] probs:[[0.15961331 0.17061166 0.17643939 0.16465005 0.16163431 0.16705124]] entropy:[1.7911913]
DEBUG:chainerrl.agents.a3c:t:12672 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.13405982 -0.06768342 -0.03402196 -0.10304686 -0.12192243 -0.08921049]] probs:[[0.15965714 0.17061423 0.17645511 0.16468616 0.16160676 0.16698065]] entropy:[1.7911918]
DEBUG:chainerrl.agents.a3c:t:12673 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.13475344 -0.06811194 -0.03461999 -0.10373235 -0.12208688 -0.08900861]] probs:[[0.15960947 0.17060852 0.17641929 0.16463833 0.16164404 0.16708036]] entropy:[1.7911925]
DEBUG:chainerrl.agents.a3c:t:12674 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.13473009 -0.06804258 -0.03447801 -0.10365

DEBUG:chainerrl.agents.a3c:t:12700 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.12582132 -0.13206664  0.20768237 -0.19395517 -0.04663604 -0.17973162]] probs:[[0.15738906 0.15640917 0.21969154 0.1470227  0.17035869 0.14912882]] entropy:[1.7813102]
DEBUG:chainerrl.agents.a3c:t:12701 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.12799236 -0.13356696  0.20644769 -0.19196875 -0.04623964 -0.18034445]] probs:[[0.15713862 0.15626507 0.21954744 0.14740027 0.17052484 0.14912371]] entropy:[1.7813687]
DEBUG:chainerrl.agents.a3c:t:12702 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.12640545 -0.13257116  0.20984888 -0.19646026 -0.04999584 -0.18029426]] probs:[[0.1574101  0.15644254 0.22032613 0.14676017 0.1699092  0.14915197]] entropy:[1.7811161]
DEBUG:chainerrl.agents.a3c:pi_loss:[-1.5904229] v_loss:[[0.00031363]]
DEBUG:chainerrl.agents.a3c:grad norm:1.4614026930307746
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agent

DEBUG:chainerrl.agents.a3c:t:12728 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.03930497 -0.06106793  0.0560706  -0.10916115  0.02910828 -0.12329698]] probs:[[0.16663006 0.16304286 0.18330505 0.15538718 0.17842875 0.15320611]] entropy:[1.7895606]
DEBUG:chainerrl.agents.a3c:t:12729 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.03950416 -0.06105969  0.05603895 -0.10925703  0.0288433  -0.12353007]] probs:[[0.16661943 0.16306631 0.18332408 0.15539333 0.17840564 0.15319116]] entropy:[1.7895601]
DEBUG:chainerrl.agents.a3c:t:12730 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.03962678 -0.06105916  0.05606075 -0.10938776  0.02857383 -0.1237255 ]] probs:[[0.16661812 0.1630851  0.18334912 0.15539086 0.17837805 0.1531788 ]] entropy:[1.7895588]
DEBUG:chainerrl.agents.a3c:t:12731 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.03962502 -0.06105763  0.0560577  -0.10939359  0.02855719 -0.12372886]] prob

DEBUG:chainerrl.agents.a3c:t:12757 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.05192465 -0.08131373  0.02900082 -0.064613    0.00324615 -0.08637624]] probs:[[0.1648648  0.16009007 0.17876126 0.16278614 0.1742161  0.15928166]] entropy:[1.7908096]
DEBUG:chainerrl.agents.a3c:t:12758 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.05192722 -0.08133273  0.02899746 -0.06462105  0.00324573 -0.08637797]] probs:[[0.1648653  0.16008794 0.17876168 0.16278575 0.17421702 0.1592823 ]] entropy:[1.7908093]
DEBUG:chainerrl.agents.a3c:pi_loss:[-1.5175856] v_loss:[[8.908266e-05]]
DEBUG:chainerrl.agents.a3c:grad norm:0.19232039789896732
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:12759 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.05529324 -0.09561747 -0.00045799 -0.05428801  0.02475778 -0.07881653]] probs:[[0.16452932 0.1580268  0.17380328 0.1646948  0.17824158 0.16070423]] entropy:[1.7908547]
DEBUG:chainerrl.ag

DEBUG:chainerrl.agents.a3c:t:12785 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.03201836 -0.0898155  -0.01021309 -0.04476847 -0.00116204 -0.07250202]] probs:[[0.168212   0.15876545 0.1719202  0.16608089 0.17348331 0.16153817]] entropy:[1.7912617]
DEBUG:chainerrl.agents.a3c:t:12786 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.03927913 -0.09164634 -0.00757155 -0.04670829 -0.00261764 -0.07911805]] probs:[[0.16744584 0.1589028  0.17284021 0.16620646 0.17369856 0.1609061 ]] entropy:[1.7912134]
DEBUG:chainerrl.agents.a3c:t:12787 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.02855596 -0.08853833 -0.00283088 -0.05393468 -0.0083718  -0.07381292]] probs:[[0.1689497  0.15911365 0.17335233 0.16471593 0.17239445 0.16147399]] entropy:[1.7912472]
DEBUG:chainerrl.agents.a3c:t:12788 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.02627584 -0.08694021  0.00028523 -0.0606182  -0.00989263 -0.07275008]] prob

DEBUG:chainerrl.agents.a3c:t:12814 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.12515658 -0.17371593  0.02045969  0.09909961 -0.08580222 -0.0347379 ]] probs:[[0.15395236 0.14665513 0.17808473 0.19265468 0.16013186 0.16852126]] entropy:[1.7875582]
DEBUG:chainerrl.agents.a3c:pi_loss:[-1.5307105] v_loss:[[0.00012254]]
DEBUG:chainerrl.agents.a3c:grad norm:0.3553813305954451
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:12815 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.09151179 -0.14634573 -0.02676465  0.0649758  -0.05929062 -0.03431652]] probs:[[0.15937433 0.15087049 0.17003475 0.18637174 0.16459319 0.16875552]] entropy:[1.7896428]
DEBUG:chainerrl.agents.a3c:t:12816 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.08870617 -0.14634284 -0.03170274  0.05526483 -0.05327636 -0.03599367]] probs:[[0.16005905 0.1510946  0.16944802 0.18484429 0.16583157 0.1687225 ]] entropy:[1.7898806]
DEBUG:chainerrl.agent

DEBUG:chainerrl.agents.a3c:t:12842 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.11200613  0.06958529 -0.15471724  0.00371785 -0.15938465 -0.18286912]] probs:[[0.19498259 0.18688425 0.14933422 0.1749713  0.14863884 0.1451888 ]] entropy:[1.7847029]
DEBUG:chainerrl.agents.a3c:t:12843 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.11722019  0.06912673 -0.16093294 -0.00944322 -0.13881038 -0.17796181]] probs:[[0.1957035  0.1865142  0.14818296 0.1724207  0.15149768 0.14568093]] entropy:[1.7849718]
DEBUG:chainerrl.agents.a3c:t:12844 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.12143157  0.07003097 -0.16588885 -0.01452098 -0.13990211 -0.18329284]] probs:[[0.19683474 0.18697295 0.14767945 0.1718139  0.15156747 0.14513148]] entropy:[1.784626]
DEBUG:chainerrl.agents.a3c:t:12845 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.12011473  0.0704098  -0.16391349 -0.01362101 -0.14544508 -0.18517788]] probs

DEBUG:chainerrl.agents.a3c:pi_loss:[-1.637424] v_loss:[[0.00063863]]
DEBUG:chainerrl.agents.a3c:grad norm:2.7661059785468254
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:12871 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.00361828 -0.0413477   0.1563448   0.03092512 -0.15478076 -0.15146272]] probs:[[0.16966806 0.16338584 0.19909997 0.17563139 0.14586498 0.14634979]] entropy:[1.7858466]
DEBUG:chainerrl.agents.a3c:t:12872 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.01057924 -0.0432581   0.15776742  0.02931156 -0.15564555 -0.15727946]] probs:[[0.16890721 0.16347672 0.19987579 0.17578125 0.14609881 0.1458603 ]] entropy:[1.7856795]
DEBUG:chainerrl.agents.a3c:t:12873 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.01061605 -0.0432708   0.15778022  0.0292931  -0.15565902 -0.15731376]] probs:[[0.16890368 0.16347726 0.19988152 0.17578079 0.14609917 0.1458576 ]] entropy:[1.7856781]
DEBUG:chainerrl.agents

DEBUG:chainerrl.agents.a3c:t:12899 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.16862805 -0.01571723  0.02901405 -0.04709891  0.17753942 -0.12021139]] probs:[[0.14334416 0.16702765 0.17466861 0.16186742 0.20263694 0.15045516]] entropy:[1.7853421]
DEBUG:chainerrl.agents.a3c:t:12900 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.16923279 -0.01601002  0.03031419 -0.0461783   0.17840657 -0.11866689]] probs:[[0.14316458 0.16687042 0.17478241 0.16191141 0.20268117 0.15058997]] entropy:[1.7853162]
DEBUG:chainerrl.agents.a3c:t:12901 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.1682462  -0.01582108  0.0283819  -0.04701947  0.177486   -0.12119479]] probs:[[0.14343031 0.16704686 0.17459646 0.16191573 0.2026705  0.1503402 ]] entropy:[1.7853413]
DEBUG:chainerrl.agents.a3c:t:12902 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.17057326 -0.01680991  0.03376269 -0.04355597  0.18075466 -0.11428646]] prob

DEBUG:chainerrl.agents.a3c:grad norm:3.2617119594036836
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:12927 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.10308301 -0.07322309  0.03160641 -0.00723954  0.08654013 -0.08819894]] probs:[[0.15387295 0.15853688 0.17605858 0.16935055 0.1860007  0.15618034]] entropy:[1.789355]
DEBUG:chainerrl.agents.a3c:t:12928 r:0.1 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.10271561 -0.06367646  0.02947821  0.00201214  0.09336111 -0.0830097 ]] probs:[[0.15318462 0.15928309 0.17483412 0.17009747 0.18636751 0.1562332 ]] entropy:[1.7893462]
DEBUG:chainerrl.agents.a3c:t:12929 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.10371237 -0.05567654  0.02935428  0.00767174  0.09355889 -0.08342578]] probs:[[0.1527201  0.16023518 0.17445615 0.17071421 0.18602443 0.1558499 ]] entropy:[1.7893634]
DEBUG:chainerrl.agents.a3c:t:12930 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0

DEBUG:chainerrl.agents.a3c:t:12955 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.06780103 -0.02286739  0.05535621 -0.03016987 -0.02831337 -0.001483  ]] probs:[[0.15812244 0.1653895  0.17884633 0.16418615 0.16449125 0.16896434]] entropy:[1.7910486]
DEBUG:chainerrl.agents.a3c:t:12956 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.06759267 -0.02282664  0.05548601 -0.03002522 -0.02824054 -0.00149064]] probs:[[0.15813999 0.16538014 0.17885213 0.16419391 0.1644872  0.1689466 ]] entropy:[1.7910492]
DEBUG:chainerrl.agents.a3c:t:12957 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.06755808 -0.02281446  0.05555392 -0.02995659 -0.02824689 -0.00153124]] probs:[[0.15814182 0.16537835 0.17886016 0.16420141 0.16448238 0.16893585]] entropy:[1.791049]
DEBUG:chainerrl.agents.a3c:t:12958 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.0675531  -0.02281118  0.05555722 -0.02994706 -0.02825075 -0.00155513]] probs

INFO: outdir:result global_step:26444 local_step:12966 R:1.7000000000000002
INFO: statistics:[('average_value', 0.40045812838892464), ('average_entropy', 1.7874580166835385)]


DEBUG:chainerrl.agents.a3c:t:12967 r:0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.92862797  0.09197523 -0.17921698  0.40429226  0.12447171  0.35517707]] probs:[[0.06188273 0.17171642 0.1309286  0.23466557 0.17738825 0.2234184 ]] entropy:[1.7227168]
DEBUG:chainerrl.agents.a3c:t:12968 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-1.0776777   0.11624797 -0.27357385  0.4300091   0.19174424  0.42966756]] probs:[[0.05228895 0.1725541  0.11684975 0.23615111 0.18608566 0.23607047]] entropy:[1.7028912]
DEBUG:chainerrl.agents.a3c:t:12969 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-1.1010952   0.11505073 -0.2855803   0.43283108  0.20830388  0.43762246]] probs:[[0.05093361 0.17185809 0.11512724 0.23614578 0.1886554  0.23727997]] entropy:[1.6999779]
DEBUG:chainerrl.agents.a3c:t:12970 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-1.1045231   0.11335701 -0.2862397   0.4328921   0.21286291  0.43782026]] probs:

DEBUG:chainerrl.agents.a3c:t:12996 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.9628066   0.15542063 -0.17872404  0.23382515  0.23679958  0.31420448]] probs:[[0.06074047 0.18583094 0.1330457  0.20098732 0.20158604 0.21780957]] entropy:[1.7285484]
DEBUG:chainerrl.agents.a3c:t:12997 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.9628017   0.15542042 -0.17871696  0.23382238  0.23680046  0.314208  ]] probs:[[0.06074066 0.18583058 0.13304642 0.20098643 0.20158587 0.21780998]] entropy:[1.7285488]
DEBUG:chainerrl.agents.a3c:t:12998 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.9628017   0.15542042 -0.17871696  0.23382238  0.23680046  0.314208  ]] probs:[[0.06074066 0.18583058 0.13304642 0.20098643 0.20158587 0.21780998]] entropy:[1.7285488]
DEBUG:chainerrl.agents.a3c:pi_loss:[-1.2369242] v_loss:[[0.00029018]]
DEBUG:chainerrl.agents.a3c:grad norm:1.6042744366564523
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agent

DEBUG:chainerrl.agents.a3c:t:13024 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.6818519   0.1644283  -0.10712786  0.09412378  0.18279642  0.13792422]] probs:[[0.08386115 0.19547698 0.14899108 0.18220603 0.1991007  0.1903641 ]] entropy:[1.7579405]
DEBUG:chainerrl.agents.a3c:t:13025 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.68187684  0.16451685 -0.10728114  0.09392845  0.18258283  0.1377044 ]] probs:[[0.08386975 0.19551922 0.14898723 0.18219368 0.19908357 0.19034652]] entropy:[1.7579455]
DEBUG:chainerrl.agents.a3c:t:13026 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.68187994  0.16454469 -0.10730151  0.09389692  0.1825522   0.13767081]] probs:[[0.08387084 0.1955278  0.14898661 0.18219087 0.19908068 0.1903432 ]] entropy:[1.7579461]
DEBUG:chainerrl.agents.a3c:t:13027 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.6825982   0.16452679 -0.10836256  0.0942545   0.18242884  0.13694994]] prob

DEBUG:chainerrl.agents.a3c:t:13053 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.5287271   0.09916455 -0.09484094  0.06547599  0.11749251  0.1215938 ]] probs:[[0.09947325 0.18637866 0.15351142 0.1802044  0.1898261  0.19060622]] entropy:[1.7705321]
DEBUG:chainerrl.agents.a3c:t:13054 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.5287284   0.09916499 -0.09484287  0.06547663  0.11749218  0.12159258]] probs:[[0.09947317 0.18637884 0.15351118 0.18020461 0.18982613 0.19060609]] entropy:[1.770532]
DEBUG:chainerrl.agents.a3c:pi_loss:[-1.3661937] v_loss:[[4.3381897e-05]]
DEBUG:chainerrl.agents.a3c:grad norm:0.5340910532096406
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:13055 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.352394    0.08232583 -0.11211874  0.0115936   0.07801068  0.06626143]] probs:[[0.12029377 0.18579738 0.15296546 0.17310952 0.18499736 0.18283649]] entropy:[1.7811158]
DEBUG:chainerrl.age

DEBUG:chainerrl.agents.a3c:t:13081 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.243797    0.02376855 -0.11590582  0.11412579 -0.07538106  0.05400692]] probs:[[0.13506413 0.176499   0.15349083 0.19318967 0.15983877 0.18191755]] entropy:[1.7849083]
DEBUG:chainerrl.agents.a3c:t:13082 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.24375398  0.02376806 -0.11587367  0.1141363  -0.07538001  0.05403388]] probs:[[0.13506754 0.17649579 0.15349305 0.19318826 0.1598361  0.18191923]] entropy:[1.7849092]
DEBUG:chainerrl.agents.a3c:t:13083 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.24374516  0.02376679 -0.11586681  0.11413937 -0.07537878  0.05403416]] probs:[[0.13506834 0.17649505 0.15349364 0.19318831 0.15983583 0.18191876]] entropy:[1.7849095]
DEBUG:chainerrl.agents.a3c:t:13084 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.24374728  0.02376827 -0.11586013  0.11413853 -0.07537732  0.05403681]] prob

DEBUG:chainerrl.agents.a3c:t:13110 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.1808011  -0.13604887 -0.10227529 -0.0002806  -0.16924104 -0.00823911]] probs:[[0.20624165 0.15023468 0.1553953  0.17208128 0.14532991 0.1707172 ]] entropy:[1.7846022]
DEBUG:chainerrl.agents.a3c:pi_loss:[-1.6445612] v_loss:[[0.00068016]]
DEBUG:chainerrl.agents.a3c:grad norm:1.6271303899268506
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:13111 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.14540763 -0.1250323  -0.11609192  0.01562497 -0.1296307  -0.02201999]] probs:[[0.19933791 0.15210354 0.1534695  0.17507575 0.1514057  0.16860755]] entropy:[1.7866147]
DEBUG:chainerrl.agents.a3c:t:13112 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.14540763 -0.12503234 -0.11609191  0.01562494 -0.12963071 -0.02201996]] probs:[[0.19933791 0.15210353 0.15346952 0.17507574 0.1514057  0.16860755]] entropy:[1.7866147]
DEBUG:chainerrl.agent

DEBUG:chainerrl.agents.a3c:t:13138 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.03093798 -0.00977504 -0.09499388  0.02877235 -0.12295366 -0.06481425]] probs:[[0.17839079 0.17127383 0.1572827  0.17800488 0.15294601 0.16210178]] entropy:[1.7900028]
DEBUG:chainerrl.agents.a3c:t:13139 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.03093764 -0.00977473 -0.09499362  0.02877242 -0.12295385 -0.06481265]] probs:[[0.17839068 0.17127383 0.15728268 0.17800485 0.15294595 0.162102  ]] entropy:[1.7900027]
DEBUG:chainerrl.agents.a3c:t:13140 r:0.15 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.03093762 -0.00977437 -0.0949934   0.02877219 -0.12295405 -0.06481137]] probs:[[0.17839064 0.17127386 0.15728268 0.17800476 0.15294588 0.16210216]] entropy:[1.7900028]
DEBUG:chainerrl.agents.a3c:t:13141 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.03093796 -0.00977496 -0.0949937   0.02877238 -0.12295374 -0.06481399]] pro

DEBUG:chainerrl.agents.a3c:t:13166 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.16531602  0.24749327 -0.05112408 -0.05439529 -0.14206907 -0.07279913]] probs:[[0.14556503 0.21995704 0.16317363 0.16264074 0.14898862 0.15967488]] entropy:[1.7814224]
DEBUG:chainerrl.agents.a3c:pi_loss:[-1.7061188] v_loss:[[0.00105353]]
DEBUG:chainerrl.agents.a3c:grad norm:19.0663086457253
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:13167 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.1677048   0.17885536 -0.0354382  -0.05179199 -0.11034283 -0.05150035]] probs:[[0.14575624 0.20612769 0.16636802 0.16366939 0.15436156 0.16371714]] entropy:[1.7855285]
DEBUG:chainerrl.agents.a3c:t:13168 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.16744068  0.17867818 -0.03645663 -0.05241731 -0.11061573 -0.05199774]] probs:[[0.14585209 0.20617223 0.16626404 0.16363141 0.15438014 0.16370009]] entropy:[1.785532]
DEBUG:chainerrl.agents.a

DEBUG:chainerrl.agents.a3c:t:13194 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.13891277  0.03926633 -0.00803119 -0.08352275 -0.04760744  0.01653041]] probs:[[0.15024933 0.17955387 0.17125912 0.15880646 0.1646137  0.1755176 ]] entropy:[1.7899425]
DEBUG:chainerrl.agents.a3c:t:13195 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.13889761  0.04027857 -0.00844659 -0.08281465 -0.0479031   0.01581364]] probs:[[0.15024392 0.17972651 0.17117925 0.15891083 0.16455661 0.17538287]] entropy:[1.7899423]
DEBUG:chainerrl.agents.a3c:t:13196 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.13930288  0.03820769 -0.00713732 -0.08551423 -0.04859661  0.01634272]] probs:[[0.15028195 0.1794728  0.17151637 0.15858676 0.16455081 0.17559123]] entropy:[1.789929]
DEBUG:chainerrl.agents.a3c:t:13197 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.1389352   0.03841816 -0.00767774 -0.0841097  -0.04735529  0.01713168]] probs

DEBUG:chainerrl.agents.a3c:pi_loss:[-1.6907966] v_loss:[[0.00074706]]
DEBUG:chainerrl.agents.a3c:grad norm:1.5709875640581952
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:13223 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.07650492 -0.0674432  -0.09615058 -0.10091038  0.00011784  0.0690673 ]] probs:[[0.16124156 0.16270934 0.15810478 0.15735403 0.174082   0.18650833]] entropy:[1.7898346]
DEBUG:chainerrl.agents.a3c:t:13224 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.07635091 -0.06145606 -0.10080366 -0.09569816 -0.00011016  0.06317683]] probs:[[0.16127363 0.16369376 0.15737787 0.15818341 0.1740501  0.18542123]] entropy:[1.7899823]
DEBUG:chainerrl.agents.a3c:t:13225 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-7.6453127e-02 -6.2254418e-02 -1.0061089e-01 -9.6324623e-02
  -9.2461269e-05  6.3547708e-02]] probs:[[0.16128036 0.16358668 0.15743087 0.15810712 0.17407824 0.18551673]] entropy:[1.7899681]
D

DEBUG:chainerrl.agents.a3c:t:13251 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.05240205 -0.01855337 -0.08451031 -0.0898362  -0.02397515 -0.00619237]] probs:[[0.16550218 0.1712001  0.1602726  0.15942127 0.17027439 0.17332943]] entropy:[1.7912401]
DEBUG:chainerrl.agents.a3c:t:13252 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.05239948 -0.01855147 -0.08451194 -0.08982563 -0.02396345 -0.00618409]] probs:[[0.16550167 0.17119946 0.16027144 0.15942205 0.17027543 0.17332989]] entropy:[1.79124]
DEBUG:chainerrl.agents.a3c:t:13253 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.05239874 -0.01855387 -0.08451173 -0.08982257 -0.02395692 -0.00617702]] probs:[[0.16550137 0.17119862 0.16027106 0.15942214 0.17027612 0.17333068]] entropy:[1.79124]
DEBUG:chainerrl.agents.a3c:t:13254 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.0523986  -0.01855342 -0.08451202 -0.08982138 -0.02395581 -0.00617648]] probs:[[

DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:13279 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.18315178 -0.07454675 -0.07441547 -0.10418875 -0.05082332  0.19686905]] probs:[[0.1446016  0.16119058 0.16121174 0.15648268 0.1650603  0.2114531 ]] entropy:[1.7841822]
DEBUG:chainerrl.agents.a3c:t:13280 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.18315172 -0.07454649 -0.07441577 -0.10418916 -0.05082351  0.19686928]] probs:[[0.14460161 0.16119063 0.1612117  0.15648264 0.16506027 0.21145315]] entropy:[1.7841821]
DEBUG:chainerrl.agents.a3c:t:13281 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.18315157 -0.07454658 -0.07441575 -0.10418934 -0.05082358  0.19686913]] probs:[[0.14460164 0.16119063 0.16121171 0.15648262 0.16506027 0.21145314]] entropy:[1.7841821]
DEBUG:chainerrl.agents.a3c:t:13282 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.18315154 -0.07454654 -0.07441565 -0.10418

DEBUG:chainerrl.agents.a3c:t:13308 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.14682521 -0.02901138 -0.05203846 -0.08625222  0.24809605  0.03956526]] probs:[[0.14334731 0.16127068 0.15759952 0.15229864 0.21276571 0.17271811]] entropy:[1.7831029]
DEBUG:chainerrl.agents.a3c:t:13309 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.1468424  -0.02899122 -0.05201848 -0.08625279  0.24802901  0.03960205]] probs:[[0.14334543 0.1612746  0.15760331 0.15229918 0.21275233 0.17272517]] entropy:[1.783106]
DEBUG:chainerrl.agents.a3c:t:13310 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.1468224  -0.02902551 -0.05206284 -0.08625213  0.24814434  0.03953474]] probs:[[0.14334781 0.16126852 0.15759578 0.15229876 0.21277614 0.17271295]] entropy:[1.7831001]
DEBUG:chainerrl.agents.a3c:pi_loss:[-1.3937823] v_loss:[[4.1529816e-05]]
DEBUG:chainerrl.agents.a3c:grad norm:0.30736155526048564
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.ag

DEBUG:chainerrl.agents.a3c:t:13336 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.02138546 -0.00757269 -0.03205133 -0.03565607  0.05566284 -0.00548512]] probs:[[0.16433175 0.16661738 0.16258833 0.1620033  0.17749378 0.16696556]] entropy:[1.7912824]
DEBUG:chainerrl.agents.a3c:t:13337 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.02138546 -0.0075727  -0.03205133 -0.03565607  0.05566286 -0.00548512]] probs:[[0.16433175 0.16661736 0.16258833 0.1620033  0.17749378 0.16696556]] entropy:[1.7912823]
DEBUG:chainerrl.agents.a3c:t:13338 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.02138561 -0.0075727  -0.03205133 -0.03565605  0.05566292 -0.00548514]] probs:[[0.1643317  0.16661736 0.16258831 0.16200328 0.17749378 0.16696554]] entropy:[1.7912823]
DEBUG:chainerrl.agents.a3c:t:13339 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.02138561 -0.0075727  -0.03205133 -0.03565605  0.05566292 -0.00548514]] prob

DEBUG:chainerrl.agents.a3c:t:13365 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.02767806 -0.03884602  0.00329149  0.02138325 -0.0161724   0.00455346]] probs:[[0.16353332 0.16171713 0.1686771  0.17175654 0.16542573 0.1688901 ]] entropy:[1.7915472]
DEBUG:chainerrl.agents.a3c:t:13366 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.02767982 -0.03882582  0.00327307  0.02139502 -0.01618602  0.0045547 ]] probs:[[0.16353306 0.16172044 0.16867404 0.1717586  0.16542353 0.16889036]] entropy:[1.7915471]
DEBUG:chainerrl.agents.a3c:pi_loss:[-1.5331132] v_loss:[[0.00012482]]
DEBUG:chainerrl.agents.a3c:grad norm:0.2386893049807319
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:13367 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-2.0584239e-02 -3.4226596e-02  3.0508172e-03  2.2748085e-02
  -2.5308279e-02  2.4428125e-05]] probs:[[0.16472398 0.16249202 0.16866362 0.17201877 0.16394766 0.16815394]] entropy:[1.7915695]
D

DEBUG:chainerrl.agents.a3c:t:13393 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.06351613 -0.13551235  0.08572115  0.02779937 -0.03275771  0.06167824]] probs:[[0.15743811 0.14650159 0.18277752 0.17249149 0.1623559  0.17843542]] entropy:[1.7889092]
DEBUG:chainerrl.agents.a3c:t:13394 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.06354559 -0.13558626  0.08567862  0.02782219 -0.03269815  0.06173453]] probs:[[0.15743339 0.1464907  0.18276967 0.17249535 0.1623655  0.1784454 ]] entropy:[1.7889076]
DEBUG:chainerrl.agents.a3c:t:13395 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.06338919 -0.1351173   0.0858175   0.02775354 -0.03305952  0.06145896]] probs:[[0.15745817 0.14655954 0.18279523 0.17248367 0.16230698 0.17839639]] entropy:[1.7889177]
DEBUG:chainerrl.agents.a3c:t:13396 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.06355067 -0.13560699  0.08568004  0.02782076 -0.03268316  0.06174198]] prob

DEBUG:chainerrl.agents.a3c:t:13422 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.04770375 -0.1019588   0.08162327  0.04167235 -0.04375825  0.01285733]] probs:[[0.16012476 0.15166865 0.18223195 0.17509513 0.16075778 0.17012174]] entropy:[1.789877]
DEBUG:chainerrl.agents.a3c:pi_loss:[-1.4864364] v_loss:[[3.6066507e-05]]
DEBUG:chainerrl.agents.a3c:grad norm:0.1578455694174398
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:13423 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.0438243  -0.12029901  0.04811496  0.12503718 -0.05195724 -0.02203181]] probs:[[0.16075717 0.14892164 0.1762378  0.19032945 0.15945505 0.16429894]] entropy:[1.788623]
DEBUG:chainerrl.agents.a3c:t:13424 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.04440324 -0.12068537  0.04860802  0.12564157 -0.05224625 -0.02328448]] probs:[[0.16069633 0.14889394 0.17636004 0.19048266 0.1594409  0.16412611]] entropy:[1.7885871]
DEBUG:chainerrl.agen

DEBUG:chainerrl.agents.a3c:t:13450 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.07458959 -0.11906164  0.10251592  0.11389899 -0.0538288  -0.04489712]] probs:[[0.1560355  0.1492483  0.18626846 0.18840088 0.15930876 0.16073804]] entropy:[1.7877283]
DEBUG:chainerrl.agents.a3c:t:13451 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.07459696 -0.11907715  0.10251857  0.11387853 -0.0538558  -0.0449059 ]] probs:[[0.1560363  0.14924788 0.18627131 0.1883994  0.15930648 0.16073865]] entropy:[1.7877283]
DEBUG:chainerrl.agents.a3c:t:13452 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.07599273 -0.12162109  0.10017126  0.1134624  -0.05558953 -0.04474799]] probs:[[0.15603125 0.14907178 0.18608811 0.18857795 0.15924749 0.16098337]] entropy:[1.7877128]
DEBUG:chainerrl.agents.a3c:t:13453 r:0.3 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.07702035 -0.12367803  0.09825633  0.11316651 -0.05691462 -0.04449198]] prob

DEBUG:chainerrl.agents.a3c:pi_loss:[-1.616426] v_loss:[[0.00040314]]
DEBUG:chainerrl.agents.a3c:grad norm:0.48956480327250407
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:13479 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.01927991 -0.18439114  0.06803673 -0.05587204 -0.04948606  0.12613274]] probs:[[0.1712442  0.13968915 0.17980042 0.15884656 0.15986419 0.1905555 ]] entropy:[1.7869184]
DEBUG:chainerrl.agents.a3c:t:13480 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.01927952 -0.1843915   0.06803703 -0.05587158 -0.04948624  0.12613264]] probs:[[0.17124414 0.1396891  0.17980047 0.15884663 0.15986416 0.19055548]] entropy:[1.7869182]
DEBUG:chainerrl.agents.a3c:t:13481 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.01927943 -0.1843916   0.06803705 -0.05587172 -0.04948636  0.12613273]] probs:[[0.17124414 0.1396891  0.1798005  0.15884662 0.15986416 0.19055551]] entropy:[1.7869184]
DEBUG:chainerrl.agent

DEBUG:chainerrl.agents.a3c:t:13507 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.07942728 -0.12501693  0.03294095 -0.09403539 -0.10671671  0.08460622]] probs:[[0.16099973 0.15382461 0.1801466  0.15866493 0.15666556 0.18969855]] entropy:[1.7885988]
DEBUG:chainerrl.agents.a3c:t:13508 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.07942738 -0.12501775  0.03294074 -0.09403635 -0.10671785  0.08460625]] probs:[[0.1609998  0.15382457 0.18014666 0.15866487 0.15666546 0.18969865]] entropy:[1.7885988]
DEBUG:chainerrl.agents.a3c:t:13509 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.07942737 -0.12501863  0.03294059 -0.09403723 -0.10671894  0.08460612]] probs:[[0.16099988 0.15382451 0.18014671 0.1586648  0.15666535 0.18969873]] entropy:[1.7885988]
DEBUG:chainerrl.agents.a3c:t:13510 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.07944342 -0.12502888  0.03294533 -0.09401376 -0.10670753  0.08462343]] prob

DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:13535 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.15318176 -0.13811608  0.00206947  0.01741418 -0.09206261  0.08391221]] probs:[[0.14925978 0.15152551 0.17432818 0.17702381 0.15866697 0.18919577]] entropy:[1.7879653]
DEBUG:chainerrl.agents.a3c:t:13536 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.15315624 -0.13804188  0.00208461  0.01756247 -0.09211168  0.08387635]] probs:[[0.14925921 0.1515323  0.17432569 0.17704487 0.15865451 0.18918343]] entropy:[1.7879655]
DEBUG:chainerrl.agents.a3c:t:13537 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.15313017 -0.13796555  0.00210016  0.01771502 -0.09216218  0.0838393 ]] probs:[[0.1492586  0.1515393  0.17432314 0.17706653 0.15864173 0.18917072]] entropy:[1.787966]
DEBUG:chainerrl.agents.a3c:t:13538 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.15423508 -0.14200278  0.00159468  0.011071

DEBUG:chainerrl.agents.a3c:t:13564 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.12234667 -0.13631843 -0.0402708  -0.05857828 -0.0736508   0.15384497]] probs:[[0.15371585 0.15158309 0.16686442 0.16383734 0.16138642 0.20261292]] entropy:[1.7868054]
DEBUG:chainerrl.agents.a3c:t:13565 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.12234382 -0.13632964 -0.04026924 -0.05858444 -0.07364235  0.15384689]] probs:[[0.15371633 0.15158144 0.16686472 0.16383637 0.16138783 0.20261335]] entropy:[1.7868054]
DEBUG:chainerrl.agents.a3c:t:13566 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.12234385 -0.13632984 -0.04026932 -0.05858466 -0.07364257  0.15384701]] probs:[[0.15371633 0.1515814  0.16686471 0.16383635 0.1613878  0.2026134 ]] entropy:[1.7868053]
DEBUG:chainerrl.agents.a3c:pi_loss:[-1.5912625] v_loss:[[0.00030083]]
DEBUG:chainerrl.agents.a3c:grad norm:0.5372718395846887
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agent

INFO: outdir:result global_step:27693 local_step:13579 R:1.6500000000000001
INFO: statistics:[('average_value', 0.36569141379405085), ('average_entropy', 1.7851869724943321)]


DEBUG:chainerrl.agents.a3c:t:13580 r:0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.10737924 -0.12076704 -0.20932698  0.12133271  0.20290652  0.06156075]] probs:[[0.14937365 0.14738719 0.13489586 0.18775967 0.20371799 0.17686574]] entropy:[1.7809967]
DEBUG:chainerrl.agents.a3c:t:13581 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.13330068 -0.13045421 -0.31478086  0.09294207  0.26610506  0.0518181 ]] probs:[[0.14738253 0.14780264 0.12292214 0.18479998 0.21973826 0.17735443]] entropy:[1.7742051]
DEBUG:chainerrl.agents.a3c:t:13582 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.13821614 -0.13175668 -0.33016488  0.08978921  0.2755157   0.0496946 ]] probs:[[0.14690547 0.14785747 0.1212483  0.1845268  0.2221874  0.17727461]] entropy:[1.7729734]
DEBUG:chainerrl.agents.a3c:t:13583 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.13896954 -0.13178787 -0.3322491   0.08940209  0.2768325   0.04940225]] probs:

DEBUG:chainerrl.agents.a3c:t:13609 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.09379657 -0.09666313 -0.23951733  0.07309564  0.14397135  0.01521516]] probs:[[0.15559956 0.15515417 0.13450012 0.18386063 0.19736478 0.1735208 ]] entropy:[1.783986]
DEBUG:chainerrl.agents.a3c:t:13610 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.09379657 -0.09666313 -0.23951733  0.07309564  0.14397135  0.01521516]] probs:[[0.15559956 0.15515417 0.13450012 0.18386063 0.19736478 0.1735208 ]] entropy:[1.783986]
DEBUG:chainerrl.agents.a3c:t:13611 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.09379672 -0.09666278 -0.23951726  0.07309568  0.1439712   0.01521566]] probs:[[0.15559952 0.1551542  0.1345001  0.18386061 0.19736473 0.17352086]] entropy:[1.783986]
DEBUG:chainerrl.agents.a3c:pi_loss:[-1.4364383] v_loss:[[9.620147e-07]]
DEBUG:chainerrl.agents.a3c:grad norm:0.28652857231242645
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agent

DEBUG:chainerrl.agents.a3c:t:13637 r:0.05 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.13702416 -0.15706636  0.04148981 -0.08954554  0.16034913 -0.01239721]] probs:[[0.14918105 0.14622091 0.17833693 0.15643479 0.20084509 0.16898122]] entropy:[1.7854766]
DEBUG:chainerrl.agents.a3c:t:13638 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.13702413 -0.1570668   0.04149017 -0.08954705  0.16034849 -0.01239831]] probs:[[0.14918113 0.14622091 0.1783371  0.15643464 0.20084508 0.16898113]] entropy:[1.7854767]
DEBUG:chainerrl.agents.a3c:t:13639 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.13701336 -0.15708283  0.0415008  -0.08953646  0.16035503 -0.01238239]] probs:[[0.14918172 0.14621757 0.17833777 0.15643524 0.20084502 0.16898265]] entropy:[1.7854763]
DEBUG:chainerrl.agents.a3c:t:13640 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.13696265 -0.15716496  0.04155556 -0.08948747  0.160379   -0.01230164]] pro

DEBUG:chainerrl.agents.a3c:t:13666 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.11768604 -0.08782271  0.00417705 -0.05017519  0.03332558  0.01569782]] probs:[[0.15301156 0.15764992 0.17284177 0.16369818 0.177954   0.17484456]] entropy:[1.7902248]
DEBUG:chainerrl.agents.a3c:t:13667 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.11769255 -0.08783377  0.00416851 -0.05017029  0.03332324  0.01570915]] probs:[[0.15301085 0.15764846 0.17284061 0.16369927 0.17795391 0.17484686]] entropy:[1.7902246]
DEBUG:chainerrl.agents.a3c:pi_loss:[-1.4814442] v_loss:[[3.337759e-05]]
DEBUG:chainerrl.agents.a3c:grad norm:0.17047145545598158
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:13668 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.0721132  -0.04776679 -0.02845225 -0.06718182  0.02466689 -0.01071161]] probs:[[0.16027798 0.16422807 0.16743088 0.16107032 0.17656511 0.17042771]] entropy:[1.7911904]
DEBUG:chainerrl.ag

DEBUG:chainerrl.agents.a3c:t:13694 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.01215836 -0.09007763 -0.04553927 -0.10548823  0.0836415  -0.02663211]] probs:[[0.17325576 0.15641814 0.16354223 0.15402614 0.186094   0.16666375]] entropy:[1.789697]
DEBUG:chainerrl.agents.a3c:t:13695 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.01216164 -0.09007778 -0.04553313 -0.10548912  0.08364296 -0.0266333 ]] probs:[[0.17325605 0.15641789 0.16354299 0.15402576 0.186094   0.16666332]] entropy:[1.789697]
DEBUG:chainerrl.agents.a3c:t:13696 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.01215316 -0.09007706 -0.04554424 -0.10548984  0.08364186 -0.02663945]] probs:[[0.17325537 0.15641871 0.1635419  0.15402634 0.18609464 0.16666305]] entropy:[1.7896972]
DEBUG:chainerrl.agents.a3c:t:13697 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.01215837 -0.09007785 -0.04553839 -0.10548831  0.08364195 -0.02663203]] probs:

DEBUG:chainerrl.agents.a3c:t:13723 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.00291871 -0.09579648  0.00067034 -0.07142465  0.01077343 -0.02832945]] probs:[[0.1721399  0.15595888 0.1717533  0.15980656 0.17349733 0.16684401]] entropy:[1.7909608]
DEBUG:chainerrl.agents.a3c:pi_loss:[-0.2594729] v_loss:[[0.02252208]]
DEBUG:chainerrl.agents.a3c:grad norm:26.106489072723228
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:13724 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.00603906 -0.06342745 -0.07337489 -0.11539192  0.02420674  0.04527281]] probs:[[0.1723975  0.16082813 0.15923624 0.15268421 0.17555818 0.17929572]] entropy:[1.790087]
DEBUG:chainerrl.agents.a3c:t:13725 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.00603903 -0.06342749 -0.07337487 -0.11539192  0.02420673  0.04527278]] probs:[[0.1723975  0.16082813 0.15923624 0.15268423 0.17555818 0.17929572]] entropy:[1.790087]
DEBUG:chainerrl.agents.

DEBUG:chainerrl.agents.a3c:t:13751 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.2681447  -0.13504523  0.09290256  0.00380121  0.21988645 -0.08436701]] probs:[[0.1295242  0.14796373 0.18584538 0.17000258 0.2110086  0.15565553]] entropy:[1.7792783]
DEBUG:chainerrl.agents.a3c:t:13752 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.26814467 -0.13504523  0.09290256  0.00380123  0.21988644 -0.08436701]] probs:[[0.1295242  0.14796373 0.18584538 0.17000258 0.2110086  0.15565553]] entropy:[1.7792783]
DEBUG:chainerrl.agents.a3c:t:13753 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.26814467 -0.13504526  0.09290255  0.0038012   0.21988653 -0.08436698]] probs:[[0.1295242  0.14796373 0.18584538 0.1700026  0.21100862 0.15565553]] entropy:[1.7792788]
DEBUG:chainerrl.agents.a3c:t:13754 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.2681447  -0.13504523  0.09290256  0.00380121  0.21988645 -0.08436701]] prob

DEBUG:chainerrl.agents.a3c:pi_loss:[-1.7396889] v_loss:[[0.00119794]]
DEBUG:chainerrl.agents.a3c:grad norm:1.8407608783844065
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:13780 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.2367095   0.00877624 -0.01140122  0.06190671  0.05878061 -0.06859707]] probs:[[0.13502935 0.1726002  0.16915245 0.1820185  0.18145038 0.15974912]] entropy:[1.7869545]
DEBUG:chainerrl.agents.a3c:t:13781 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.2367095   0.00877624 -0.01140122  0.06190671  0.05878061 -0.06859707]] probs:[[0.13502935 0.1726002  0.16915245 0.1820185  0.18145038 0.15974912]] entropy:[1.7869545]
DEBUG:chainerrl.agents.a3c:t:13782 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.2367095   0.00877624 -0.01140122  0.06190671  0.05878061 -0.06859707]] probs:[[0.13502935 0.1726002  0.16915245 0.1820185  0.18145038 0.15974912]] entropy:[1.7869545]
DEBUG:chainerrl.agent

DEBUG:chainerrl.agents.a3c:t:13808 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.17273475  0.0203157  -0.00516632  0.04151512  0.03329534 -0.10506295]] probs:[[0.14423548 0.17494956 0.17054781 0.17869799 0.17723514 0.154334  ]] entropy:[1.7887095]
DEBUG:chainerrl.agents.a3c:t:13809 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.17273475  0.0203157  -0.00516632  0.04151512  0.03329534 -0.10506295]] probs:[[0.14423548 0.17494956 0.17054781 0.17869799 0.17723514 0.154334  ]] entropy:[1.7887095]
DEBUG:chainerrl.agents.a3c:t:13810 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.17273481  0.02031569 -0.00516642  0.04151512  0.0332954  -0.10506307]] probs:[[0.14423548 0.17494956 0.1705478  0.17869799 0.17723516 0.15433398]] entropy:[1.7887095]
DEBUG:chainerrl.agents.a3c:t:13811 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.17273483  0.02031567 -0.00516639  0.04151515  0.03329539 -0.10506307]] prob

DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:13836 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.17098692  0.02692396 -0.03776084  0.06881427  0.02250348 -0.09892413]] probs:[[0.14449699 0.17612071 0.16508903 0.18365517 0.17534389 0.15529422]] entropy:[1.7884871]
DEBUG:chainerrl.agents.a3c:t:13837 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.17084265  0.02700884 -0.03744714  0.06898753  0.02275965 -0.09886853]] probs:[[0.14449285 0.17610519 0.16511224 0.18365522 0.17535847 0.155276  ]] entropy:[1.7884855]
DEBUG:chainerrl.agents.a3c:t:13838 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.17085311  0.0270029  -0.03746959  0.06897528  0.02274099 -0.09887168]] probs:[[0.14449312 0.1761063  0.16511056 0.18365522 0.17535736 0.15527742]] entropy:[1.7884858]
DEBUG:chainerrl.agents.a3c:t:13839 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.17084256  0.02700886 -0.03744733  0.06898

DEBUG:chainerrl.agents.a3c:t:13865 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.2697466   0.13090903 -0.13584904  0.21620458 -0.09529916  0.034865  ]] probs:[[0.1280658  0.19117703 0.14641452 0.20819922 0.15247363 0.1736698 ]] entropy:[1.7783352]
DEBUG:chainerrl.agents.a3c:t:13866 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.26974636  0.130909   -0.13584907  0.21620443 -0.09529855  0.03486414]] probs:[[0.12806584 0.19117703 0.14641452 0.2081992  0.15247372 0.17366967]] entropy:[1.7783352]
DEBUG:chainerrl.agents.a3c:t:13867 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.26974654  0.13090904 -0.13584928  0.21620446 -0.09529898  0.03486474]] probs:[[0.12806582 0.19117706 0.1464145  0.20819922 0.15247367 0.17366979]] entropy:[1.7783353]
DEBUG:chainerrl.agents.a3c:pi_loss:[-1.4804472] v_loss:[[4.736656e-05]]
DEBUG:chainerrl.agents.a3c:grad norm:0.900771979939595
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agen

DEBUG:chainerrl.agents.a3c:t:13893 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.17474467  0.05122079 -0.0901389   0.07358222 -0.05649194  0.01322937]] probs:[[0.14376016 0.18020801 0.15645245 0.18428312 0.16180617 0.17349008]] entropy:[1.7881433]
DEBUG:chainerrl.agents.a3c:t:13894 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.17497689  0.05128259 -0.0901359   0.07407697 -0.0572646   0.01514123]] probs:[[0.14368707 0.18016934 0.15640968 0.18432337 0.16163652 0.17377406]] entropy:[1.7881119]
DEBUG:chainerrl.agents.a3c:t:13895 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.17505543  0.05130291 -0.09013889  0.07424147 -0.05752246  0.01577547]] probs:[[0.14366274 0.18015665 0.15639502 0.18433695 0.16158016 0.1738685 ]] entropy:[1.7881014]
DEBUG:chainerrl.agents.a3c:t:13896 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.17506279  0.05130527 -0.09013762  0.07425863 -0.05754836  0.01584196]] prob

DEBUG:chainerrl.agents.a3c:t:13922 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.09422054  0.08060289 -0.17249629  0.02161803  0.04430471 -0.11476954]] probs:[[0.15706478 0.18706979 0.14523928 0.17635463 0.18040125 0.15387018]] entropy:[1.7875044]
DEBUG:chainerrl.agents.a3c:t:13923 r:0.2 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.09374791  0.08063728 -0.17259479  0.02180077  0.04435543 -0.11518716]] probs:[[0.15713221 0.1870681  0.14521867 0.1763792  0.18040258 0.15379927]] entropy:[1.7874987]
DEBUG:chainerrl.agents.a3c:pi_loss:[0.8539164] v_loss:[[0.06105677]]
DEBUG:chainerrl.agents.a3c:grad norm:103.72928873292008
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:13924 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-1.9965723e-01  2.5087159e-02 -2.7089781e-01 -8.7646674e-03
   2.3943372e-01  5.6374818e-05]] probs:[[0.13954943 0.17471626 0.12995371 0.16890079 0.21648254 0.17039725]] entropy:[1.7779998]
DE

DEBUG:chainerrl.agents.a3c:t:13950 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.10420643 -0.03610233 -0.20318364  0.0293519   0.07692072  0.03814599]] probs:[[0.15454328 0.16543499 0.13997965 0.17662565 0.18523057 0.17818578]] entropy:[1.7873636]
DEBUG:chainerrl.agents.a3c:t:13951 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.10468017 -0.03606718 -0.20303221  0.0292327   0.07684797  0.03854519]] probs:[[0.15447159 0.1654424  0.1400022  0.17660631 0.18521889 0.17825864]] entropy:[1.7873597]
DEBUG:chainerrl.agents.a3c:t:13952 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.10649878 -0.03577473 -0.20218925  0.02860637  0.07681216  0.04078435]] probs:[[0.154165   0.16546297 0.1400967  0.17646605 0.18518111 0.17862819]] entropy:[1.7873394]
DEBUG:chainerrl.agents.a3c:t:13953 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.10510904 -0.03621017 -0.2032033   0.02898339  0.07695784  0.03904958]] prob

DEBUG:chainerrl.agents.a3c:t:13979 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.00708656 -0.01905463 -0.14819266 -0.02557117 -0.01730175 -0.03128809]] probs:[[0.17229167 0.17024195 0.1496176  0.16913617 0.17054063 0.16817199]] entropy:[1.7906544]
DEBUG:chainerrl.agents.a3c:pi_loss:[-1.5412824] v_loss:[[0.00014299]]
DEBUG:chainerrl.agents.a3c:grad norm:0.1888466635132297
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:13980 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.00269272 -0.02067647 -0.12565589 -0.0286489  -0.03560507 -0.03076565]] probs:[[0.17298667 0.16990353 0.15297146 0.16855437 0.16738594 0.16819796]] entropy:[1.7910119]
DEBUG:chainerrl.agents.a3c:t:13981 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.01445934 -0.02128269 -0.11078566 -0.03107903 -0.04228651 -0.02635404]] probs:[[0.17520821 0.1690565  0.15458281 0.16740845 0.1655427  0.16820133]] entropy:[1.7910656]
DEBUG:chainerrl.agent

INFO: outdir:result global_step:28517 local_step:13984 R:1.25
INFO: statistics:[('average_value', 0.3872752825599144), ('average_entropy', 1.7853255429414034)]


DEBUG:chainerrl.agents.a3c:t:13985 r:0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.07266062 -0.20251864 -0.06936086  0.00352757  0.10084392  0.04081338]] probs:[[0.17993768 0.13665114 0.1561144  0.16791828 0.18508105 0.17429744]] entropy:[1.7868633]
DEBUG:chainerrl.agents.a3c:t:13986 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.07455219 -0.2247695  -0.15328166 -0.04952068  0.14802518  0.01526438]] probs:[[0.18383813 0.13628304 0.1463823  0.16238706 0.19785385 0.17325558]] entropy:[1.7837257]
DEBUG:chainerrl.agents.a3c:t:13987 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.07463808 -0.22878765 -0.16682698 -0.05607951  0.15579377  0.01110012]] probs:[[0.18435882 0.1361093  0.14480948 0.16176851 0.19994447 0.17300943]] entropy:[1.7830489]
DEBUG:chainerrl.agents.a3c:t:13988 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.07461344 -0.22939411 -0.16866176 -0.05703498  0.156806    0.01045749]] probs:

DEBUG:chainerrl.agents.a3c:t:14014 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.26014778 -0.18239792 -0.20506285 -0.12524264  0.07301507 -0.03534731]] probs:[[0.22103924 0.14199537 0.13881327 0.15034758 0.18331528 0.16448924]] entropy:[1.7776845]
DEBUG:chainerrl.agents.a3c:t:14015 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.26014778 -0.18239802 -0.2050628  -0.12524296  0.07301503 -0.03534772]] probs:[[0.22103928 0.14199539 0.1388133  0.15034756 0.18331532 0.1644892 ]] entropy:[1.7776843]
DEBUG:chainerrl.agents.a3c:t:14016 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.26015127 -0.18236677 -0.20507236 -0.12518232  0.07301823 -0.03524939]] probs:[[0.22103347 0.1419956  0.13880783 0.1503522  0.18331042 0.16450047]] entropy:[1.7776861]
DEBUG:chainerrl.agents.a3c:pi_loss:[-1.5547781] v_loss:[[0.00021247]]
DEBUG:chainerrl.agents.a3c:grad norm:0.9461618939598295
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agent

DEBUG:chainerrl.agents.a3c:t:14042 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.05757257 -0.07486082 -0.1495091  -0.02147657 -0.02856775 -0.02553774]] probs:[[0.18346523 0.16070844 0.14914866 0.16952087 0.16832303 0.16883382]] entropy:[1.7898312]
DEBUG:chainerrl.agents.a3c:t:14043 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.05774008 -0.0745307  -0.14961645 -0.02055579 -0.02782545 -0.02500335]] probs:[[0.18341541 0.16069092 0.14906718 0.16960254 0.16837406 0.1688499 ]] entropy:[1.7898238]
DEBUG:chainerrl.agents.a3c:t:14044 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.05762777 -0.07477168 -0.14954662 -0.02120383 -0.02835429 -0.02537795]] probs:[[0.18345186 0.16070218 0.14912397 0.1695454  0.16833739 0.16883917]] entropy:[1.7898287]
DEBUG:chainerrl.agents.a3c:t:14045 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.05762692 -0.07474501 -0.14953765 -0.02117546 -0.02833152 -0.02536337]] prob

DEBUG:chainerrl.agents.a3c:t:14071 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.02597824 -0.05069683 -0.10396695 -0.03801689 -0.01353957 -0.05793485]] probs:[[0.1778376  0.16471153 0.15616694 0.16681337 0.1709469  0.16352366]] entropy:[1.7909592]
DEBUG:chainerrl.agents.a3c:t:14072 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.02597854 -0.05070624 -0.10397164 -0.03803785 -0.01352364 -0.05794467]] probs:[[0.17783847 0.16471076 0.15616693 0.16681065 0.17095041 0.16352281]] entropy:[1.7909592]
DEBUG:chainerrl.agents.a3c:pi_loss:[-1.4895973] v_loss:[[3.9055736e-05]]
DEBUG:chainerrl.agents.a3c:grad norm:0.10401458386534737
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:14073 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.0153057  -0.05010494 -0.10014606 -0.04795098 -0.0161179  -0.05811411]] probs:[[0.1765342  0.16535655 0.15728554 0.1657131  0.17107312 0.16403747]] entropy:[1.7911174]
DEBUG:chainerrl.a

DEBUG:chainerrl.agents.a3c:t:14099 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.01350353 -0.0397466  -0.08232311 -0.04473869 -0.03873041 -0.07252067]] probs:[[0.17646442 0.16731349 0.16033939 0.16648032 0.1674836  0.16191882]] entropy:[1.7912836]
DEBUG:chainerrl.agents.a3c:t:14100 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.01352149 -0.03977557 -0.08236209 -0.04478474 -0.03856102 -0.07253725]] probs:[[0.1764658  0.16730693 0.1603315  0.16647096 0.16751026 0.16191448]] entropy:[1.7912829]
DEBUG:chainerrl.agents.a3c:t:14101 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.01351599 -0.03976756 -0.08235232 -0.04477414 -0.0386031  -0.07253284]] probs:[[0.1764653  0.16730872 0.16033348 0.16647317 0.16750367 0.16191565]] entropy:[1.7912831]
DEBUG:chainerrl.agents.a3c:t:14102 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.01350438 -0.03974893 -0.08232664 -0.04474406 -0.03871501 -0.07252221]] prob

DEBUG:chainerrl.agents.a3c:t:14128 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.00399623 -0.02459248 -0.04855413 -0.08265992 -0.0376056  -0.07514878]] probs:[[0.17480157 0.16987498 0.16585287 0.16029172 0.1676787  0.1615002 ]] entropy:[1.791324]
DEBUG:chainerrl.agents.a3c:pi_loss:[-1.4785298] v_loss:[[2.5111995e-05]]
DEBUG:chainerrl.agents.a3c:grad norm:0.05711335226473582
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:14129 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.01622287 -0.03021466 -0.03854015 -0.07961689 -0.03912026 -0.06115704]] probs:[[0.17134929 0.1689685  0.1675676  0.16082391 0.16747041 0.16382027]] entropy:[1.791546]
DEBUG:chainerrl.agents.a3c:t:14130 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.01622284 -0.03021464 -0.03854007 -0.0796168  -0.03912024 -0.06115697]] probs:[[0.17134929 0.1689685  0.1675676  0.16082393 0.16747041 0.16382028]] entropy:[1.7915459]
DEBUG:chainerrl.age

DEBUG:chainerrl.agents.a3c:t:14156 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.02636273 -0.04089438 -0.03598035 -0.07035421 -0.04108673 -0.05066257]] probs:[[0.16965425 0.16720672 0.1680304  0.1623527  0.16717456 0.16558136]] entropy:[1.7916657]
DEBUG:chainerrl.agents.a3c:t:14157 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.02636276 -0.04089427 -0.03598034 -0.07035427 -0.04108679 -0.05066259]] probs:[[0.16965425 0.16720673 0.16803041 0.1623527  0.16717456 0.16558136]] entropy:[1.7916657]
DEBUG:chainerrl.agents.a3c:t:14158 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.02635945 -0.04089808 -0.0359848  -0.07035766 -0.04107364 -0.05066465]] probs:[[0.16965471 0.16720602 0.16802956 0.16235204 0.16717666 0.16558093]] entropy:[1.7916656]
DEBUG:chainerrl.agents.a3c:t:14159 r:0.05 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.0263396  -0.04091583 -0.03600199 -0.07035418 -0.04106837 -0.05067836]] pro

DEBUG:chainerrl.agents.a3c:pi_loss:[-0.06811826] v_loss:[[0.01902978]]
DEBUG:chainerrl.agents.a3c:grad norm:28.031366187285506
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:14185 r:0.15 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.03101918 -0.01953815 -0.11539634 -0.05905505 -0.00252768 -0.01908584]] probs:[[0.16823995 0.17018265 0.15462674 0.1635887  0.1731023  0.17025964]] entropy:[1.7910796]
DEBUG:chainerrl.agents.a3c:t:14186 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.03102235 -0.01953407 -0.11539674 -0.05905452 -0.0025295  -0.01908623]] probs:[[0.16823944 0.17018336 0.1546267  0.16358882 0.173102   0.17025961]] entropy:[1.7910796]
DEBUG:chainerrl.agents.a3c:t:14187 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.03101823 -0.01953903 -0.11539677 -0.05905548 -0.00252583 -0.01908621]] probs:[[0.1682401  0.17018248 0.15462665 0.16358861 0.1731026  0.17025957]] entropy:[1.7910799]
DEBUG:chainerrl.age

DEBUG:chainerrl.agents.a3c:t:14213 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.13221322 -0.09095094  0.02788727  0.02977994  0.00483039 -0.04097329]] probs:[[0.15073825 0.15708816 0.17691076 0.17724591 0.17287841 0.16513856]] entropy:[1.7899334]
DEBUG:chainerrl.agents.a3c:t:14214 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.13195074 -0.09089473  0.02773042  0.02932983  0.00512619 -0.04104324]] probs:[[0.15078075 0.15710005 0.17688645 0.17716959 0.17293292 0.16513021]] entropy:[1.7899421]
DEBUG:chainerrl.agents.a3c:t:14215 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.13011774 -0.09047362  0.02661746  0.02613817  0.00720493 -0.04151941]] probs:[[0.15107813 0.1571878  0.17671396 0.17662928 0.17331658 0.16507427]] entropy:[1.7900043]
DEBUG:chainerrl.agents.a3c:t:14216 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.11946303 -0.08706182  0.01957751  0.00781308  0.01928717 -0.04575448]] prob

DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:14241 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.06147833 -0.11324041 -0.15026361  0.04460401  0.06437074 -0.00245654]] probs:[[0.16203728 0.15386327 0.14827092 0.18017143 0.18376826 0.17188889]] entropy:[1.7886723]
DEBUG:chainerrl.agents.a3c:t:14242 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.06147824 -0.11324044 -0.15026365  0.04460405  0.0643708  -0.00245654]] probs:[[0.16203728 0.15386327 0.1482709  0.18017143 0.18376826 0.17188887]] entropy:[1.7886723]
DEBUG:chainerrl.agents.a3c:t:14243 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.06147827 -0.11324038 -0.15026356  0.04460402  0.06437075 -0.00245658]] probs:[[0.16203728 0.15386327 0.14827092 0.18017143 0.18376826 0.17188887]] entropy:[1.7886721]
DEBUG:chainerrl.agents.a3c:t:14244 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.06147827 -0.11324037 -0.15026362  0.04460

DEBUG:chainerrl.agents.a3c:t:14270 r:0.25 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.09107013 -0.08522591 -0.08471218  0.07166421  0.00976538 -0.05279715]] probs:[[0.1578763  0.15880166 0.15888327 0.18577683 0.17462614 0.16403581]] entropy:[1.7898864]
DEBUG:chainerrl.agents.a3c:t:14271 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.09212385 -0.08570997 -0.08620626  0.07010298  0.01500001 -0.05663015]] probs:[[0.15778597 0.15880123 0.15872243 0.18557632 0.17562716 0.16348694]] entropy:[1.7898359]
DEBUG:chainerrl.agents.a3c:t:14272 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.09241523 -0.08575463 -0.08658264  0.06970812  0.01622263 -0.05751472]] probs:[[0.15775824 0.15881251 0.15868106 0.18552452 0.17586236 0.1633613 ]] entropy:[1.7898237]
DEBUG:chainerrl.agents.a3c:pi_loss:[-0.08511212] v_loss:[[0.04092425]]
DEBUG:chainerrl.agents.a3c:grad norm:48.45634535850147
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agen

DEBUG:chainerrl.agents.a3c:t:14298 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.07852452 -0.12489611  0.02263851 -0.06811797  0.01951654 -0.00256867]] probs:[[0.15990974 0.15266377 0.17693326 0.16158254 0.17638174 0.17252901]] entropy:[1.7902473]
DEBUG:chainerrl.agents.a3c:t:14299 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.0785227  -0.12489377  0.02264262 -0.06811571  0.01951947 -0.00256768]] probs:[[0.15990964 0.15266375 0.17693354 0.1615825  0.17638181 0.17252874]] entropy:[1.7902471]
DEBUG:chainerrl.agents.a3c:t:14300 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.07847625 -0.12483425  0.02274868 -0.06805167  0.01958843 -0.00254967]] probs:[[0.15990733 0.15266354 0.17694154 0.16158302 0.17638324 0.17252135]] entropy:[1.7902468]
DEBUG:chainerrl.agents.a3c:t:14301 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.07845277 -0.12480654  0.02279774 -0.06802548  0.01962059 -0.00254563]] prob

DEBUG:chainerrl.agents.a3c:t:14327 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.07499456 -0.18406962  0.03729989 -0.04525428  0.16119559 -0.11920643]] probs:[[0.15951869 0.14303452 0.17847623 0.16433406 0.20201686 0.1526197 ]] entropy:[1.7853]
DEBUG:chainerrl.agents.a3c:t:14328 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.07499412 -0.18406978  0.03730013 -0.04525394  0.1611955  -0.1192063 ]] probs:[[0.15951872 0.14303446 0.17847624 0.16433409 0.2020168  0.15261967]] entropy:[1.7852997]
DEBUG:chainerrl.agents.a3c:pi_loss:[-1.5441666] v_loss:[[0.00032685]]
DEBUG:chainerrl.agents.a3c:grad norm:0.7631387022287608
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:14329 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.06919155 -0.1614351   0.02058906 -0.02262587  0.12248603 -0.11973744]] probs:[[0.16088878 0.14671175 0.17600174 0.16855784 0.19488133 0.15295862]] entropy:[1.7873018]
DEBUG:chainerrl.agents.a

DEBUG:chainerrl.agents.a3c:t:14355 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.06657201 -0.08292643 -0.01260266 -0.08229393  0.04450278 -0.02993403]] probs:[[0.16185348 0.15922798 0.17082863 0.15932873 0.18086778 0.16789344]] entropy:[1.7907124]
DEBUG:chainerrl.agents.a3c:t:14356 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.06660025 -0.08330611 -0.01270233 -0.08233902  0.04439054 -0.03002634]] probs:[[0.16186912 0.15918744 0.17083293 0.15934145 0.18087007 0.16789891]] entropy:[1.7907109]
DEBUG:chainerrl.agents.a3c:t:14357 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.06660339 -0.08334833 -0.01271352 -0.08234403  0.04437794 -0.03003654]] probs:[[0.16187088 0.15918294 0.17083341 0.15934289 0.18087032 0.16789955]] entropy:[1.790711]
DEBUG:chainerrl.agents.a3c:t:14358 r:0.15 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.06660427 -0.08335846 -0.01271603 -0.08234515  0.04437493 -0.03003906]] prob

DEBUG:chainerrl.agents.a3c:t:14384 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.10656962 -0.07872009  0.01734171 -0.03629807 -0.00837511 -0.02389167]] probs:[[0.15570685 0.16010416 0.17624699 0.16704223 0.17177226 0.16912752]] entropy:[1.7908939]
DEBUG:chainerrl.agents.a3c:pi_loss:[-1.6296923] v_loss:[[0.00044644]]
DEBUG:chainerrl.agents.a3c:grad norm:0.6869986064544569
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:14385 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.11185111 -0.05953756  0.00863494 -0.02478255 -0.00108933 -0.0480684 ]] probs:[[0.15490158 0.16322073 0.17473595 0.1689932  0.173045   0.16510351]] entropy:[1.790962]
DEBUG:chainerrl.agents.a3c:t:14386 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.11185133 -0.05953655  0.00863315 -0.02478253 -0.00108859 -0.04806941]] probs:[[0.15490156 0.16322093 0.17473567 0.16899323 0.17304517 0.16510338]] entropy:[1.7909617]
DEBUG:chainerrl.agents

DEBUG:chainerrl.agents.a3c:t:14412 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.03295214 -1.0750028  -1.083693    1.6973782   1.181925   -1.0171258 ]] probs:[[0.09573857 0.0316161  0.03134253 0.5057524  0.30205047 0.03349993]] entropy:[1.2625041]
DEBUG:chainerrl.agents.a3c:t:14413 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.03294882 -1.0750009  -1.0836918   1.6973735   1.1819224  -1.017118  ]] probs:[[0.09573855 0.03161625 0.03134266 0.5057516  0.30205062 0.0335003 ]] entropy:[1.2625059]
DEBUG:chainerrl.agents.a3c:t:14414 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.03295111 -1.0750022  -1.0836928   1.6973766   1.1819243  -1.0171238 ]] probs:[[0.09573856 0.03161614 0.03134257 0.50575215 0.30205056 0.03350003]] entropy:[1.2625047]
DEBUG:chainerrl.agents.a3c:t:14415 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.03295217 -1.0750028  -1.0836933   1.6973778   1.1819252  -1.0171262 ]] prob

DEBUG:chainerrl.agents.a3c:pi_loss:[-1.7537198] v_loss:[[0.0061628]]
DEBUG:chainerrl.agents.a3c:grad norm:13.874700484103776
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:14441 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.31940973 -0.88477105 -0.8055225   1.06452     0.910979   -0.8207505 ]] probs:[[0.17071037 0.05120246 0.0554253  0.35963106 0.3084432  0.05458768]] entropy:[1.503602]
DEBUG:chainerrl.agents.a3c:t:14442 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.31999958 -0.8846365  -0.8048859   1.0642841   0.910882   -0.8204197 ]] probs:[[0.1708032  0.05120698 0.05545804 0.3595296  0.30839902 0.05460321]] entropy:[1.5037638]
DEBUG:chainerrl.agents.a3c:t:14443 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.32419723 -0.8834643  -0.8006237   1.0625546   0.9100671  -0.81806296]] probs:[[0.17147507 0.05125311 0.05567978 0.3588108  0.30806407 0.05471718]] entropy:[1.5049288]
DEBUG:chainerrl.agents.

DEBUG:chainerrl.agents.a3c:t:14469 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.3608577  -0.5971747  -0.63562423  0.7905444   0.4532205  -0.59215003]] probs:[[0.20955834 0.08039643 0.07736389 0.32204422 0.22983576 0.08080141]] entropy:[1.6342653]
DEBUG:chainerrl.agents.a3c:t:14470 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.37223592 -0.5799629  -0.6397981   0.7940538   0.45900768 -0.5809928 ]] probs:[[0.21051551 0.08123616 0.07651795 0.3209795  0.22959827 0.08115255]] entropy:[1.6350269]
DEBUG:chainerrl.agents.a3c:t:14471 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.37696657 -0.56129473 -0.6459134   0.7986117   0.45887384 -0.57210493]] probs:[[0.21062438 0.08241893 0.07573167 0.32109004 0.22860226 0.08153276]] entropy:[1.635744]
DEBUG:chainerrl.agents.a3c:t:14472 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.37809354 -0.55613804 -0.6474855   0.8015027   0.458552   -0.5706359 ]] probs

DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:14497 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.24581477 -0.38358673 -0.41203412  0.3089004   0.4583064  -0.30031833]] probs:[[0.20276022 0.10805307 0.10502256 0.21596354 0.25076485 0.11743569]] entropy:[1.7300549]
DEBUG:chainerrl.agents.a3c:t:14498 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.2458152  -0.38358745 -0.4120346   0.30889907  0.45830545 -0.30031928]] probs:[[0.20276044 0.10805308 0.10502259 0.21596344 0.25076482 0.11743568]] entropy:[1.7300549]
DEBUG:chainerrl.agents.a3c:t:14499 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.24581832 -0.38358858 -0.41203943  0.30889788  0.45830798 -0.30032432]] probs:[[0.20276111 0.10805298 0.1050221  0.2159632  0.25076547 0.11743511]] entropy:[1.7300541]
DEBUG:chainerrl.agents.a3c:t:14500 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.24589512 -0.3836083  -0.4121377   0.30886

DEBUG:chainerrl.agents.a3c:t:14526 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.16537115 -0.19155505 -0.23860534  0.21415286  0.1103994  -0.17173569]] probs:[[0.19693492 0.13781981 0.13148554 0.20677991 0.18640125 0.14057855]] entropy:[1.7747402]
DEBUG:chainerrl.agents.a3c:t:14527 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.16529362 -0.19156402 -0.23865156  0.21412252  0.11031511 -0.17181925]] probs:[[0.19693074 0.13782632 0.13148685 0.20678528 0.18639602 0.14057472]] entropy:[1.7747413]
DEBUG:chainerrl.agents.a3c:t:14528 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.1652937  -0.19156401 -0.2386516   0.21412252  0.11031511 -0.1718192 ]] probs:[[0.19693075 0.13782632 0.13148685 0.20678528 0.18639602 0.14057474]] entropy:[1.7747413]
DEBUG:chainerrl.agents.a3c:pi_loss:[-1.7360111] v_loss:[[0.00137393]]
DEBUG:chainerrl.agents.a3c:grad norm:3.6278480476868804
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agent

DEBUG:chainerrl.agents.a3c:t:14554 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.07368091 -0.09098299 -0.15522622  0.05404486  0.1065326  -0.1960631 ]] probs:[[0.18446411 0.15645854 0.1467232  0.1808773  0.19062471 0.14085217]] entropy:[1.7849329]
DEBUG:chainerrl.agents.a3c:t:14555 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.07368091 -0.09098299 -0.15522622  0.05404485  0.1065326  -0.1960631 ]] probs:[[0.18446411 0.15645854 0.1467232  0.1808773  0.19062471 0.14085217]] entropy:[1.7849329]
DEBUG:chainerrl.agents.a3c:t:14556 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.07368089 -0.09098299 -0.15522617  0.05404489  0.10653262 -0.19606309]] probs:[[0.1844641  0.15645854 0.1467232  0.1808773  0.19062471 0.14085217]] entropy:[1.7849329]
DEBUG:chainerrl.agents.a3c:t:14557 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.07368092 -0.09098306 -0.15522614  0.05404485  0.10653277 -0.19606313]] prob

DEBUG:chainerrl.agents.a3c:t:14583 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.2568615   0.02307214 -0.2199744   0.06886254  0.03530639 -0.19229577]] probs:[[0.21366031 0.16911823 0.13262844 0.17704226 0.17119996 0.13635068]] entropy:[1.7786046]
DEBUG:chainerrl.agents.a3c:t:14584 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.25691     0.02318058 -0.22002448  0.06888997  0.03531704 -0.19221427]] probs:[[0.21366218 0.16912985 0.13261652 0.17704009 0.171195   0.13635637]] entropy:[1.7786026]
DEBUG:chainerrl.agents.a3c:pi_loss:[-0.84621906] v_loss:[[0.02324732]]
DEBUG:chainerrl.agents.a3c:grad norm:18.614022703563315
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:14585 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.1444567   0.09796523 -0.22356138  0.02601327  0.11560234 -0.17917047]] probs:[[0.19120249 0.18251668 0.13233213 0.16984558 0.18576431 0.13833882]] entropy:[1.7818558]
DEBUG:chainerrl.agen

DEBUG:chainerrl.agents.a3c:t:14611 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.07127548 -0.06259189  0.01466269 -0.05551929 -0.02060002 -0.15345928]] probs:[[0.18479072 0.16163757 0.17461982 0.16278481 0.16856955 0.14759754]] entropy:[1.789353]
DEBUG:chainerrl.agents.a3c:t:14612 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.071241   -0.0626459   0.01454232 -0.05558129 -0.02068482 -0.15348096]] probs:[[0.18479611 0.16163912 0.17460991 0.16278508 0.16856599 0.14760372]] entropy:[1.7893538]
DEBUG:chainerrl.agents.a3c:t:14613 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.0727453  -0.06145508  0.01781376 -0.05373885 -0.01792635 -0.15336317]] probs:[[0.18473695 0.16153674 0.17486276 0.162788   0.16872351 0.14735202]] entropy:[1.7893121]
DEBUG:chainerrl.agents.a3c:t:14614 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.07123569 -0.06264472  0.01454081 -0.05558324 -0.02068989 -0.15347794]] probs

DEBUG:chainerrl.agents.a3c:t:14640 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-1.2582532   0.4167714   0.16054545  0.12536044  0.08578133  0.31659088]] probs:[[0.04324354 0.23087408 0.17868893 0.17251107 0.1658166  0.20886578]] entropy:[1.7101811]
DEBUG:chainerrl.agents.a3c:pi_loss:[-1.3080386] v_loss:[[4.7059188e-05]]
DEBUG:chainerrl.agents.a3c:grad norm:0.8454812211799896
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:14641 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-1.2075955   0.38106662  0.17174466  0.12812094  0.08053721  0.28846344]] probs:[[0.04595111 0.22503142 0.18253064 0.17473917 0.16661915 0.20512857]] entropy:[1.7159929]
DEBUG:chainerrl.agents.a3c:t:14642 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-1.2050322   0.3804399   0.16573478  0.12743634  0.08085804  0.28527915]] probs:[[0.04615374 0.22530386 0.1817705  0.17494059 0.166979   0.2048523 ]] entropy:[1.7162862]
DEBUG:chainerrl.ag

DEBUG:chainerrl.agents.a3c:t:14668 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-1.0383155   0.29642564  0.18622196  0.13929999  0.09806655  0.2414216 ]] probs:[[0.0550679  0.20920366 0.18737361 0.17878474 0.17156276 0.19800736]] entropy:[1.7316117]
DEBUG:chainerrl.agents.a3c:t:14669 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-1.0397067   0.29475653  0.18179576  0.13522838  0.09411006  0.23988275]] probs:[[0.05515468 0.20947509 0.18710014 0.17858714 0.17139286 0.19829008]] entropy:[1.7316474]
DEBUG:chainerrl.agents.a3c:t:14670 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-1.038986    0.29079437  0.17763007  0.13200799  0.09083053  0.23997249]] probs:[[0.05534298 0.20920825 0.18682379 0.178492   0.17129141 0.19884154]] entropy:[1.7318604]
DEBUG:chainerrl.agents.a3c:t:14671 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-1.0381103   0.2923205   0.18088165  0.13497105  0.09382185  0.24100153]] prob

DEBUG:chainerrl.agents.a3c:pi_loss:[-1.3981622] v_loss:[[5.131816e-05]]
DEBUG:chainerrl.agents.a3c:grad norm:0.7259699654550362
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:14697 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.8283436   0.18186773  0.13939454  0.11354008  0.06262646  0.18557173]] probs:[[0.07073718 0.1942571  0.18617915 0.18142729 0.17242137 0.19497797]] entropy:[1.750174]
DEBUG:chainerrl.agents.a3c:t:14698 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.8280375   0.18238418  0.14070418  0.1145054   0.06384546  0.18560521]] probs:[[0.07070523 0.19421022 0.1862819  0.18146493 0.17250091 0.19483678]] entropy:[1.7501584]
DEBUG:chainerrl.agents.a3c:t:14699 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.8285528   0.18151148  0.13850245  0.11288297  0.06179268  0.18554363]] probs:[[0.07075906 0.19428861 0.1861096  0.18140213 0.172367   0.1950736 ]] entropy:[1.7501844]
DEBUG:chainerrl.agen

DEBUG:chainerrl.agents.a3c:t:14725 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.6352661   0.14713372  0.11319625  0.1653075   0.10102392  0.10902075]] probs:[[0.08532137 0.18657342 0.18034783 0.18999517 0.17816588 0.17959635]] entropy:[1.7634135]
DEBUG:chainerrl.agents.a3c:t:14726 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.63527906  0.14713322  0.11318901  0.16532141  0.10102899  0.10902134]] probs:[[0.08532017 0.18657312 0.18034633 0.1899976  0.17816658 0.17959626]] entropy:[1.7634127]
DEBUG:chainerrl.agents.a3c:t:14727 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.6352677   0.14712827  0.11320396  0.16531298  0.10103809  0.10901686]] probs:[[0.08532098 0.18657182 0.18034865 0.18999562 0.17816785 0.1795951 ]] entropy:[1.7634133]
DEBUG:chainerrl.agents.a3c:t:14728 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.6355255   0.1462049   0.11347319  0.16643369  0.10134826  0.10763019]] prob

DEBUG:chainerrl.agents.a3c:grad norm:0.534038571988778
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:14753 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.45303416  0.09062371  0.09502599  0.1606994   0.03340162  0.04236453]] probs:[[0.10452417 0.18002146 0.18081571 0.19308911 0.17000945 0.17154008]] entropy:[1.7751844]
DEBUG:chainerrl.agents.a3c:t:14754 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.4530343   0.09062368  0.09502586  0.16069943  0.03340138  0.04236448]] probs:[[0.10452416 0.1800215  0.18081571 0.19308914 0.17000943 0.17154008]] entropy:[1.7751843]
DEBUG:chainerrl.agents.a3c:t:14755 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.4530343   0.09062368  0.09502586  0.16069943  0.03340138  0.04236448]] probs:[[0.10452416 0.1800215  0.18081571 0.19308914 0.17000943 0.17154008]] entropy:[1.7751843]
DEBUG:chainerrl.agents.a3c:t:14756 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0

DEBUG:chainerrl.agents.a3c:t:14781 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.29207066  0.00613159  0.08895792  0.11977132 -0.00208491  0.04414692]] probs:[[0.12411764 0.16724038 0.18168211 0.18736748 0.16587187 0.17372046]] entropy:[1.7837536]
DEBUG:chainerrl.agents.a3c:t:14782 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.29207066  0.00613159  0.08895792  0.11977132 -0.00208491  0.04414692]] probs:[[0.12411764 0.16724038 0.18168211 0.18736748 0.16587187 0.17372046]] entropy:[1.7837536]
DEBUG:chainerrl.agents.a3c:t:14783 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.29207066  0.00613159  0.08895792  0.11977132 -0.00208491  0.04414692]] probs:[[0.12411764 0.16724038 0.18168211 0.18736748 0.16587187 0.17372046]] entropy:[1.7837536]
DEBUG:chainerrl.agents.a3c:t:14784 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.2920721   0.00612877  0.0889669   0.11977274 -0.00207275  0.04414874]] prob

DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:14809 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.19860001  0.03734827  0.06006867  0.06630618 -0.0220959   0.01932373]] probs:[[0.13696465 0.17341216 0.17739725 0.17850722 0.16340423 0.17031448]] entropy:[1.7879843]
DEBUG:chainerrl.agents.a3c:t:14810 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.1985903   0.03724253  0.06029522  0.06628882 -0.02195783  0.01928312]] probs:[[0.13696109 0.17338762 0.1774311  0.17849775 0.16342095 0.17030148]] entropy:[1.7879837]
DEBUG:chainerrl.agents.a3c:t:14811 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.19858779  0.03724067  0.06030003  0.06627925 -0.02196025  0.01928401]] probs:[[0.13696158 0.17338748 0.17743215 0.17849623 0.16342072 0.17030181]] entropy:[1.7879839]
DEBUG:chainerrl.agents.a3c:t:14812 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.19943921  0.03737203  0.05801857  0.06727

DEBUG:chainerrl.agents.a3c:t:14838 r:0.15 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.13460416  0.02279452  0.0303299   0.05440566 -0.07451463  0.02118235]] probs:[[0.14731145 0.17242248 0.17372665 0.17796001 0.1564347  0.17214473]] entropy:[1.7895597]
DEBUG:chainerrl.agents.a3c:t:14839 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.13024604  0.0761134  -0.00601431  0.08221554 -0.04854051  0.00679112]] probs:[[0.14640863 0.17996475 0.1657753  0.18106627 0.1588733  0.16791178]] entropy:[1.7891612]
DEBUG:chainerrl.agents.a3c:t:14840 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.13433868  0.0740793   0.00465357  0.07867013 -0.04542188  0.01493057]] probs:[[0.14551352 0.17923306 0.1672118  0.18005778 0.15904477 0.16893908]] entropy:[1.7891768]
DEBUG:chainerrl.agents.a3c:pi_loss:[0.1628458] v_loss:[[0.03804969]]
DEBUG:chainerrl.agents.a3c:grad norm:81.2500775892959
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.

DEBUG:chainerrl.agents.a3c:t:14866 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.21618778  0.02734099  0.16191813  0.02828716 -0.20715208  0.1084085 ]] probs:[[0.13504827 0.17228688 0.1971053  0.17244998 0.13627405 0.18683548]] entropy:[1.7816024]
DEBUG:chainerrl.agents.a3c:t:14867 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.24924581  0.01716646  0.21946506  0.05360997 -0.16495636  0.12878591]] probs:[[0.12813486 0.16725098 0.20475091 0.17345862 0.13940352 0.1870012 ]] entropy:[1.7791698]
DEBUG:chainerrl.agents.a3c:t:14868 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.24695489  0.04387649  0.23556271  0.06458321 -0.17134883  0.12792584]] probs:[[0.12728207 0.17024472 0.2062158  0.17380668 0.13727848 0.18517223]] entropy:[1.7783905]
DEBUG:chainerrl.agents.a3c:t:14869 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.23129731  0.04992964  0.23587254  0.06619962 -0.16498652  0.12163354]] prob

DEBUG:chainerrl.agents.a3c:t:14895 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.19327258 -0.20096113  0.18774655  0.21689235 -0.35592222  0.01564231]] probs:[[0.14193635 0.14084925 0.20776296 0.21390748 0.12063012 0.17491385]] entropy:[1.7696389]
DEBUG:chainerrl.agents.a3c:t:14896 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.1767021  -0.20512468  0.17261547  0.21787961 -0.3610186   0.01298978]] probs:[[0.14462592 0.14057316 0.20509395 0.21459065 0.12028141 0.17483497]] entropy:[1.7702888]
DEBUG:chainerrl.agents.a3c:pi_loss:[-1.8669459] v_loss:[[0.00306316]]
DEBUG:chainerrl.agents.a3c:grad norm:35.18325623645559
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:14897 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.08770262 -0.1345772   0.0470931   0.16724059 -0.33093706  0.01756038]] probs:[[0.15913458 0.15184735 0.18209818 0.2053454  0.12477547 0.17679895]] entropy:[1.7799646]
DEBUG:chainerrl.agents

DEBUG:chainerrl.agents.a3c:t:14923 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.1200006  -0.07983445  0.06398465  0.06163785 -0.12370737  0.02219155]] probs:[[0.15171604 0.15793394 0.18236239 0.18193492 0.1511547  0.17489797]] entropy:[1.7884938]
DEBUG:chainerrl.agents.a3c:t:14924 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.08235398 -0.07662731  0.04319924  0.0389526  -0.10609864  0.01018354]] probs:[[0.1576795  0.15858506 0.17877315 0.17801556 0.15397954 0.1729672 ]] entropy:[1.7898929]
DEBUG:chainerrl.agents.a3c:t:14925 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.07583186 -0.0741773   0.03733968  0.05487016 -0.10270189  0.02950157]] probs:[[0.1575876  0.15784857 0.17647038 0.17959128 0.15340962 0.17509261]] entropy:[1.7897472]
DEBUG:chainerrl.agents.a3c:t:14926 r:0.3 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.07457978 -0.0644223   0.03934766  0.06186093 -0.10065344  0.04723051]] prob

DEBUG:chainerrl.agents.a3c:t:14952 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.01382908 -0.07345422  0.14793013  0.1503047  -0.22186126 -0.03530445]] probs:[[0.1642845  0.15477534 0.1931291  0.19358826 0.13342874 0.16079405]] entropy:[1.7835798]
DEBUG:chainerrl.agents.a3c:pi_loss:[-1.7139615] v_loss:[[0.00090008]]
DEBUG:chainerrl.agents.a3c:grad norm:3.5180970018058253
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:14953 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.00739374 -0.04215595  0.09625264  0.17816523 -0.15489472 -0.01935288]] probs:[[0.16313457 0.1575611  0.1809502  0.19639628 0.14076255 0.16119523]] entropy:[1.7861493]
DEBUG:chainerrl.agents.a3c:t:14954 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.02026673 -0.04415944  0.09597877  0.1885744  -0.1533177  -0.00777132]] probs:[[0.16077414 0.15697834 0.18059303 0.19811381 0.14074497 0.1627957 ]] entropy:[1.78583]
DEBUG:chainerrl.agents.

DEBUG:chainerrl.agents.a3c:t:14980 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.02553419  0.01144021  0.12137517  0.17926048 -0.09197741  0.09119777]] probs:[[0.16103686 0.15878314 0.1772346  0.18779662 0.14318274 0.17196603]] entropy:[1.788038]
DEBUG:chainerrl.agents.a3c:t:14981 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.01274114  0.02375996  0.11295253  0.18885083 -0.08511619  0.07984543]] probs:[[0.15910295 0.16086577 0.17587312 0.18974122 0.14427109 0.17014581]] entropy:[1.7880919]
DEBUG:chainerrl.agents.a3c:t:14982 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.00394904  0.03270963  0.10692634  0.20030412 -0.08272708  0.07664469]] probs:[[0.15755524 0.1621524  0.17464465 0.19173825 0.14447406 0.1694354 ]] entropy:[1.7878878]
DEBUG:chainerrl.agents.a3c:t:14983 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.00938813  0.03611118  0.10165517  0.19731455 -0.08771142  0.07045641]] probs

DEBUG:chainerrl.agents.a3c:pi_loss:[-1.9337404] v_loss:[[0.00315188]]
DEBUG:chainerrl.agents.a3c:grad norm:32.63618085773112
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:15009 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.01643568  0.2787122  -0.01159229  0.16000827 -0.1308416  -0.13172399]] probs:[[0.15812394 0.21241185 0.15889166 0.18863675 0.14103009 0.14090571]] entropy:[1.7800094]
DEBUG:chainerrl.agents.a3c:t:15010 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.00541269  0.28571808 -0.01520029  0.15436482 -0.13638774 -0.13170597]] probs:[[0.15974326 0.21372683 0.15818739 0.18741876 0.14013313 0.14079075]] entropy:[1.779703]
DEBUG:chainerrl.agents.a3c:t:15011 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.00763058  0.26777428  0.01357666  0.15196319 -0.137733   -0.14040661]] probs:[[0.1616696  0.2097043  0.16263376 0.18677177 0.13979697 0.1394237 ]] entropy:[1.7806845]
DEBUG:chainerrl.agents.

DEBUG:chainerrl.agents.a3c:t:15037 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.04850433  0.178903    0.20151253  0.04791506 -0.04252068  0.16201936]] probs:[[0.14532909 0.18243758 0.18660939 0.16003941 0.1462013  0.17938323]] entropy:[1.7865412]
DEBUG:chainerrl.agents.a3c:t:15038 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.04343903  0.2106899   0.22451784  0.06656715 -0.03831747  0.13153054]] probs:[[0.14473332 0.18661019 0.18920855 0.16156363 0.14547649 0.17240779]] entropy:[1.7860552]
DEBUG:chainerrl.agents.a3c:t:15039 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.04905182  0.18429886  0.30850697  0.02281354 -0.01073476  0.16074029]] probs:[[0.15443356 0.17679857 0.20018044 0.1504342  0.14547111 0.17268212]] entropy:[1.7854997]
DEBUG:chainerrl.agents.a3c:t:15040 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.11099178 0.17539836 0.333574   0.02105737 0.00153614 0.17428014]] probs:[[0.

DEBUG:chainerrl.agents.a3c:t:15065 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.11554427  0.19585167  0.02920866  0.11213312 -0.08655419 -0.06346757]] probs:[[0.17697197 0.1917704  0.16233398 0.17636934 0.14458868 0.14796557]] entropy:[1.7866888]
DEBUG:chainerrl.agents.a3c:t:15066 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.08322332  0.19689886  0.02173258  0.09788834 -0.09660058 -0.1182835 ]] probs:[[0.17456764 0.19558358 0.1641567  0.17714654 0.14583683 0.14270869]] entropy:[1.7856879]
DEBUG:chainerrl.agents.a3c:t:15067 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.02644605  0.2619015   0.05832192  0.12153549 -0.10179966 -0.09642164]] probs:[[0.1622942  0.20538089 0.1675508  0.17848423 0.14276002 0.14352986]] entropy:[1.7836173]
DEBUG:chainerrl.agents.a3c:t:15068 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.03589297  0.23950274  0.0504196   0.1052215  -0.09915392 -0.08894836]] prob

DEBUG:chainerrl.agents.a3c:t:15094 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.04279928  0.18632255  0.03751359  0.05572247 -0.03750632  0.03586186]] probs:[[0.16452807 0.1899203  0.16366073 0.1666681  0.15183215 0.16339064]] entropy:[1.7894537]
DEBUG:chainerrl.agents.a3c:t:15095 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.0428202   0.20661314  0.08668824  0.12695672 -0.10411908  0.01221247]] probs:[[0.16276586 0.19173338 0.17006502 0.17705302 0.14052333 0.15785944]] entropy:[1.7871611]
DEBUG:chainerrl.agents.a3c:t:15096 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.04534082  0.1993713   0.08675577  0.11036915 -0.11284265 -0.00154777]] probs:[[0.1643686  0.19174032 0.17131884 0.1754124  0.14032035 0.15683948]] entropy:[1.7871585]
DEBUG:chainerrl.agents.a3c:pi_loss:[-2.074574] v_loss:[[0.00475845]]
DEBUG:chainerrl.agents.a3c:grad norm:45.58292819321993
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.

DEBUG:chainerrl.agents.a3c:t:15122 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.22879171  0.19252144  0.00213917  0.07472868 -0.2138573   0.09223499]] probs:[[0.19479944 0.18786061 0.15529369 0.16698559 0.12512603 0.16993463]] entropy:[1.782115]
DEBUG:chainerrl.agents.a3c:t:15123 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.2267456   0.19567363 -0.0179541   0.08048717 -0.20166723  0.1304372 ]] probs:[[0.19319916 0.18728839 0.15126328 0.16691141 0.12587744 0.17546037]] entropy:[1.7821052]
DEBUG:chainerrl.agents.a3c:t:15124 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.24464783  0.17081736  0.00035355  0.07318064 -0.19956154  0.08809572]] probs:[[0.19798326 0.18389262 0.15507181 0.16678663 0.12697284 0.16929291]] entropy:[1.7825371]
DEBUG:chainerrl.agents.a3c:t:15125 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.24307525  0.1643053   0.00348847  0.07737599 -0.20751625  0.09378767]] probs

DEBUG:chainerrl.agents.a3c:t:15151 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.18579124  0.17029361  0.07358737  0.03861234 -0.14499737  0.17489754]] probs:[[0.183515   0.18069288 0.1640371  0.15839906 0.13182929 0.18152669]] entropy:[1.785563]
DEBUG:chainerrl.agents.a3c:t:15152 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.19469385  0.14924291  0.09780922  0.03870576 -0.14655055  0.16323958]] probs:[[0.1852292  0.17699881 0.16812527 0.15847643 0.13167669 0.17949362]] entropy:[1.785795]
DEBUG:chainerrl.agents.a3c:pi_loss:[-0.95360965] v_loss:[[0.00324603]]
DEBUG:chainerrl.agents.a3c:grad norm:32.433209290514384
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:15153 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.18809925  0.21202964  0.04103856  0.03310039 -0.10700952  0.08027108]] probs:[[0.18566278 0.19015935 0.16027184 0.1590046  0.13821676 0.16668467]] entropy:[1.7862501]
DEBUG:chainerrl.agents

DEBUG:chainerrl.agents.a3c:t:15179 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.2630399  0.21215373 0.08948628 0.3414859  0.00973023 0.11127227]] probs:[[0.18155147 0.17254414 0.15262526 0.19636698 0.14092524 0.15598685]] entropy:[1.785452]
DEBUG:chainerrl.agents.a3c:t:15180 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.25387624 0.20348692 0.06488247 0.33557814 0.04217288 0.12310692]] probs:[[0.18016694 0.17131339 0.14914069 0.19550496 0.14579193 0.15808211]] entropy:[1.7862542]
DEBUG:chainerrl.agents.a3c:t:15181 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.25952944 0.19485527 0.06227581 0.33443615 0.0438062  0.13091408]] probs:[[0.1811132  0.16977058 0.14869073 0.19520083 0.14596969 0.15925503]] entropy:[1.7862927]
DEBUG:chainerrl.agents.a3c:t:15182 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.27728745 0.19170883 0.06501301 0.32555774 0.06035197 0.15409946]] probs:[[0.18297401 0.16796666

DEBUG:chainerrl.agents.a3c:t:15208 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.11175171  0.11281571 -0.2806292   0.3296518  -0.15588683 -0.00544728]] probs:[[0.17938615 0.17957713 0.1211658  0.22306001 0.13726346 0.15954752]] entropy:[1.772398]
DEBUG:chainerrl.agents.a3c:pi_loss:[-1.9485043] v_loss:[[0.00492609]]
DEBUG:chainerrl.agents.a3c:grad norm:50.6790277589607
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:15209 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.07551016 -0.03993966 -0.17091927  0.27358356 -0.19804972 -0.00698383]] probs:[[0.17943376 0.15986924 0.14024301 0.21873906 0.1364893  0.16522565]] entropy:[1.7786078]
DEBUG:chainerrl.agents.a3c:t:15210 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.07482274 -0.04272147 -0.16937448  0.27507335 -0.21110176 -0.01388118]] probs:[[0.17983738 0.15989363 0.14087258 0.21970892 0.1351153  0.16457215]] entropy:[1.7781315]
DEBUG:chainerrl.agents.a

DEBUG:chainerrl.agents.a3c:t:15236 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.25140685 -0.04164359  0.05011975  0.16468553  0.2540438   0.41664407]] probs:[[0.17658772 0.13173169 0.14439182 0.16191901 0.17705397 0.20831573]] entropy:[1.7807593]
DEBUG:chainerrl.agents.a3c:t:15237 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.24989206 -0.04981679  0.08507536  0.19424304  0.23247398  0.42880315]] probs:[[0.17502098 0.12969649 0.14842638 0.16554728 0.17199884 0.2093101 ]] entropy:[1.7809445]
DEBUG:chainerrl.agents.a3c:t:15238 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.25179204 -0.04873056  0.08575124  0.19085579  0.2292843   0.4251597 ]] probs:[[0.17558111 0.13000573 0.14871925 0.16520134 0.17167333 0.2088192 ]] entropy:[1.7811455]
DEBUG:chainerrl.agents.a3c:t:15239 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.23279953 -0.05765818  0.0847038   0.19605535  0.24949999  0.43007168]] prob

DEBUG:chainerrl.agents.a3c:pi_loss:[-1.961425] v_loss:[[0.00859824]]
DEBUG:chainerrl.agents.a3c:grad norm:217.43316863160135
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:15265 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.40308186 -0.03790532 -0.02677841 -0.25737408  0.08571411 -0.06489469]] probs:[[0.24009906 0.15447997 0.15620846 0.12403896 0.17480722 0.15036641]] entropy:[1.7697438]
DEBUG:chainerrl.agents.a3c:t:15266 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.41688365 -0.01890763 -0.03493787 -0.26552597  0.07093067 -0.08142442]] probs:[[0.24368125 0.1576015  0.15509525 0.12315594 0.17241563 0.14805043]] entropy:[1.7681162]
DEBUG:chainerrl.agents.a3c:t:15267 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.43290052 -0.0310452  -0.02020526 -0.26361522  0.0669182  -0.06283884]] probs:[[0.24597807 0.15467045 0.15635617 0.12257544 0.1705895  0.14983025]] entropy:[1.7672025]
DEBUG:chainerrl.agents

DEBUG:chainerrl.agents.a3c:t:15293 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.37100783 -0.24836649  0.48561496  0.06836853 -0.16751318  0.0183242 ]] probs:[[0.21344763 0.11489483 0.23936717 0.1577091  0.12457034 0.15001088]] entropy:[1.7558126]
DEBUG:chainerrl.agents.a3c:t:15294 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.3840405  -0.1872546   0.45190868  0.05738381 -0.18168266  0.06958304]] probs:[[0.21484663 0.12134398 0.22993407 0.15497561 0.12202198 0.15687777]] entropy:[1.760535]
DEBUG:chainerrl.agents.a3c:t:15295 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.37052938 -0.19034877  0.49429026  0.05180266 -0.20078291  0.06719366]] probs:[[0.21129756 0.12058914 0.239135   0.153629   0.11933743 0.1560118 ]] entropy:[1.7569966]
DEBUG:chainerrl.agents.a3c:t:15296 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.36143154 -0.22940138  0.4992144   0.05884843 -0.20529692  0.1056954 ]] probs

DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:15321 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.3024413   0.01861614  0.20922476 -0.09035026 -0.2527487   0.12001232]] probs:[[0.21069174 0.15862945 0.19193937 0.14225262 0.12092931 0.17555758]] entropy:[1.7753155]
DEBUG:chainerrl.agents.a3c:t:15322 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.34753978  0.00912759  0.2464089  -0.11419172 -0.250845    0.12171071]] probs:[[0.2176534  0.15516539 0.19671834 0.13716331 0.11964381 0.17365573]] entropy:[1.7713977]
DEBUG:chainerrl.agents.a3c:t:15323 r:0.2 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.32878265 -0.08303771  0.21338193 -0.098437   -0.17531875  0.1272836 ]] probs:[[0.21610758 0.14315902 0.1925538  0.14097135 0.13053937 0.17666888]] entropy:[1.7747852]
DEBUG:chainerrl.agents.a3c:t:15324 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.34191445 -0.06425155  0.2233361  -0.09423

DEBUG:chainerrl.agents.a3c:t:15350 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[1.4804137e-01 1.7166963e-02 2.5044894e-04 1.5107465e-01 2.7088836e-01
  1.3611433e-01]] probs:[[0.17059511 0.14966786 0.1471573  0.17111336 0.19289383 0.1685725 ]] entropy:[1.7875936]
DEBUG:chainerrl.agents.a3c:t:15351 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.14915541  0.01695332 -0.00917452  0.14681245  0.26750392  0.1280086 ]] probs:[[0.17146471 0.1502312  0.14635682 0.17106345 0.19300692 0.16787685]] entropy:[1.7875199]
DEBUG:chainerrl.agents.a3c:t:15352 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[1.4815485e-01 1.7191164e-02 2.5522476e-04 1.5112422e-01 2.7090719e-01
  1.3611731e-01]] probs:[[0.1706083  0.14966606 0.14715266 0.17111565 0.19289048 0.1685669 ]] entropy:[1.7875931]
DEBUG:chainerrl.agents.a3c:pi_loss:[-1.614173] v_loss:[[0.00041759]]
DEBUG:chainerrl.agents.a3c:grad norm:2.5478184114312477
DEBUG:chainerrl.agents.a3c:u

INFO: outdir:result global_step:31299 local_step:15353 R:6.449999999999999
INFO: statistics:[('average_value', 0.4648181014821239), ('average_entropy', 1.7700411628829078)]


DEBUG:chainerrl.agents.a3c:t:15354 r:0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.04759635 -0.02985953  0.03231404 -0.0259002   0.20527531  0.03936705]] probs:[[0.16661014 0.15419233 0.1640833  0.15480404 0.19506553 0.16524467]] entropy:[1.7885289]
DEBUG:chainerrl.agents.a3c:t:15355 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.04073739 -0.029013   -0.00792032 -0.02867369  0.2431828  -0.02032776]] probs:[[0.16713375 0.15587337 0.1591961  0.15592627 0.20463742 0.15723307]] entropy:[1.7865759]
DEBUG:chainerrl.agents.a3c:t:15356 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.04111015 -0.03043787 -0.0132212  -0.02428463  0.25134334 -0.02816651]] probs:[[0.16717371 0.15563063 0.15833326 0.15659122 0.20628664 0.15598452]] entropy:[1.7861373]
DEBUG:chainerrl.agents.a3c:t:15357 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.04557335 -0.02830528 -0.01413268 -0.02361538  0.2545747  -0.02340828]] probs:

DEBUG:chainerrl.agents.a3c:t:15383 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.04166786  0.02756068 -0.1198357  -0.0386323   0.10648926 -0.01336017]] probs:[[0.1732165  0.17079008 0.14738362 0.159851   0.18481655 0.16394226]] entropy:[1.7893028]
DEBUG:chainerrl.agents.a3c:t:15384 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.04168169  0.02762789 -0.1197996  -0.03867021  0.10646136 -0.01332439]] probs:[[0.17321652 0.1707992  0.1473869  0.15984274 0.18480885 0.16394587]] entropy:[1.7893034]
DEBUG:chainerrl.agents.a3c:t:15385 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.04155789  0.02749678 -0.11990118 -0.03877022  0.10656573 -0.01353508]] probs:[[0.17321067 0.17079218 0.1473852  0.15984115 0.18484478 0.1639261 ]] entropy:[1.7892995]
DEBUG:chainerrl.agents.a3c:pi_loss:[-1.5303427] v_loss:[[0.00012226]]
DEBUG:chainerrl.agents.a3c:grad norm:0.41275793267847316
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agen

DEBUG:chainerrl.agents.a3c:t:15411 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.19265296 -0.00298934 -0.085896   -0.0447622   0.14234944 -0.03196633]] probs:[[0.1954036  0.16068162 0.14789732 0.15410775 0.18581724 0.15609236]] entropy:[1.7863169]
DEBUG:chainerrl.agents.a3c:t:15412 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.19365703 -0.00301181 -0.08730935 -0.04681786  0.14038543 -0.03129423]] probs:[[0.19571584 0.16077325 0.14777596 0.15388244 0.1855626  0.15628989]] entropy:[1.7862782]
DEBUG:chainerrl.agents.a3c:t:15413 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.1956786  -0.00992236 -0.10484181 -0.05709277  0.11823808 -0.03127555]] probs:[[0.19787578 0.16110213 0.14651372 0.15367931 0.18313047 0.15769854]] entropy:[1.7860867]
DEBUG:chainerrl.agents.a3c:t:15414 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.19465259 -0.01175369 -0.10742261 -0.05436044  0.11954613 -0.03322002]] prob

DEBUG:chainerrl.agents.a3c:t:15440 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.09120867  0.00918677 -0.02426353 -0.0541168   0.19730884 -0.03789556]] probs:[[0.17643677 0.16254269 0.15719551 0.15257208 0.1961859  0.15506716]] entropy:[1.7876538]
DEBUG:chainerrl.agents.a3c:t:15441 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.09272901  0.00841057 -0.02183956 -0.05324687  0.19855568 -0.03777746]] probs:[[0.17654283 0.16226731 0.15743221 0.15256453 0.19625016 0.15494296]] entropy:[1.7876339]
DEBUG:chainerrl.agents.a3c:pi_loss:[-1.660181] v_loss:[[0.00060665]]
DEBUG:chainerrl.agents.a3c:grad norm:1.0390532190392734
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:15442 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.06764806  0.00954801 -0.04680154 -0.03580327  0.18039615 -0.02727604]] probs:[[0.17343274 0.16364342 0.15467718 0.15638776 0.19413193 0.15772702]] entropy:[1.7884387]
DEBUG:chainerrl.agents

DEBUG:chainerrl.agents.a3c:t:15468 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.08060095 -0.00973701  0.02441942 -0.00504229  0.0448201   0.02192646]] probs:[[0.17590839 0.16071384 0.16629808 0.16147012 0.16972551 0.16588402]] entropy:[1.7912884]
DEBUG:chainerrl.agents.a3c:t:15469 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.07068175 -0.00765122  0.01348593  0.00253921  0.04523214  0.01634493]] probs:[[0.17466706 0.16150704 0.16495717 0.16316128 0.17027795 0.16542946]] entropy:[1.7914004]
DEBUG:chainerrl.agents.a3c:t:15470 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.0837158  -0.00730771  0.02514822 -0.0060822   0.04593157  0.02365802]] probs:[[0.17621587 0.16088443 0.16619176 0.16108172 0.1696819  0.16594428]] entropy:[1.7912654]
DEBUG:chainerrl.agents.a3c:t:15471 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.0862309  -0.00751552  0.027133   -0.00623754  0.04555145  0.022931  ]] prob

DEBUG:chainerrl.agents.a3c:t:15497 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.16156149 -0.12822701 -0.03762744  0.03351483  0.14193688  0.05915514]] probs:[[0.18758704 0.14039424 0.15370789 0.16504139 0.1839416  0.16932783]] entropy:[1.786897]
DEBUG:chainerrl.agents.a3c:pi_loss:[-1.6955588] v_loss:[[0.00077922]]
DEBUG:chainerrl.agents.a3c:grad norm:4.498082097319289
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:15498 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.09419619 -0.11594919 -0.01422359  0.04017749  0.13513075  0.09235518]] probs:[[0.17559028 0.14231001 0.15754852 0.16635674 0.18292712 0.17526731]] entropy:[1.7884103]
DEBUG:chainerrl.agents.a3c:t:15499 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.09181743 -0.11092749 -0.02497763  0.0491737   0.14496489  0.09268662]] probs:[[0.17482635 0.14274335 0.1555548  0.16752782 0.18436928 0.17497838]] entropy:[1.7882637]
DEBUG:chainerrl.agents.

DEBUG:chainerrl.agents.a3c:t:15525 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.01294386 -0.20343643  0.15540431 -0.03565968  0.5022105  -0.13613147]] probs:[[0.15277822 0.12627913 0.18078989 0.14934687 0.25573498 0.13507089]] entropy:[1.760676]
DEBUG:chainerrl.agents.a3c:t:15526 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.01315859 -0.20346963  0.15541497 -0.03584812  0.5019568  -0.13596907]] probs:[[0.15276162 0.12628834 0.180811   0.14933456 0.25569725 0.13510716]] entropy:[1.7606978]
DEBUG:chainerrl.agents.a3c:t:15527 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.01155681 -0.20383385  0.15645899 -0.03267884  0.50346655 -0.13711004]] probs:[[0.1528393  0.12610438 0.18080208 0.14964488 0.25580373 0.1348056 ]] entropy:[1.7605786]
DEBUG:chainerrl.agents.a3c:t:15528 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.01493874 -0.20651294  0.16042474 -0.02910677  0.4984263  -0.1342181 ]] probs

DEBUG:chainerrl.agents.a3c:pi_loss:[-1.4255679] v_loss:[[0.00088943]]
DEBUG:chainerrl.agents.a3c:grad norm:3.3576740041320616
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:15554 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.01239964 -0.17547159  0.12782706  0.0487128   0.23298828 -0.03157205]] probs:[[0.15817252 0.13437231 0.18198296 0.1681403  0.202163   0.15516886]] entropy:[1.7835534]
DEBUG:chainerrl.agents.a3c:t:15555 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.01330464 -0.17128031  0.1288716   0.04917385  0.23404609 -0.0315736 ]] probs:[[0.15788692 0.13481499 0.18200885 0.16806614 0.20219444 0.15502867]] entropy:[1.7836152]
DEBUG:chainerrl.agents.a3c:t:15556 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.01276322 -0.17295256  0.12918265  0.04905817  0.23347338 -0.03158031]] probs:[[0.1580071  0.13461927 0.18210542 0.16808358 0.20212302 0.15506166]] entropy:[1.7835875]
DEBUG:chainerrl.agent

DEBUG:chainerrl.agents.a3c:t:15582 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.02534869 -0.08946086  0.18189459  0.03074631  0.1521124   0.04412618]] probs:[[0.16075836 0.14332186 0.18800117 0.16162841 0.18248464 0.16380551]] entropy:[1.787796]
DEBUG:chainerrl.agents.a3c:t:15583 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.02603558 -0.08821029  0.18043232  0.02886849  0.15317109  0.04439997]] probs:[[0.16087687 0.14350837 0.18773587 0.16133326 0.18268709 0.16385858]] entropy:[1.7878333]
DEBUG:chainerrl.agents.a3c:t:15584 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.02610376 -0.08817103  0.18045451  0.02886927  0.15317883  0.04451266]] probs:[[0.16088128 0.14350815 0.18773238 0.16132681 0.18268104 0.16387035]] entropy:[1.7878343]
DEBUG:chainerrl.agents.a3c:t:15585 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.02580708 -0.08896811  0.18260899  0.03080756  0.15203607  0.04506408]] probs

DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:15610 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.05100524 -0.12770699  0.20755956 -0.0783251   0.2757931  -0.06721902]] probs:[[0.16597348 0.13881136 0.19410172 0.1458382  0.20780829 0.14746691]] entropy:[1.7799298]
DEBUG:chainerrl.agents.a3c:t:15611 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.05047407 -0.1278901   0.20707841 -0.07770257  0.27675915 -0.06705359]] probs:[[0.16586725 0.1387708  0.19398719 0.14591308 0.20798644 0.14747521]] entropy:[1.7799108]
DEBUG:chainerrl.agents.a3c:t:15612 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.05039784 -0.12791988  0.20701176 -0.07760688  0.2768818  -0.06703007]] probs:[[0.16585241 0.13876481 0.19397168 0.14592512 0.2080092  0.14747673]] entropy:[1.7799087]
DEBUG:chainerrl.agents.a3c:t:15613 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.05038794 -0.12792325  0.20700283 -0.07759

DEBUG:chainerrl.agents.a3c:t:15639 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.08103625 -0.04302273  0.12368646  0.0017593   0.13328712 -0.02307259]] probs:[[0.1722519  0.15215488 0.17975739 0.15912355 0.1814915  0.15522087]] entropy:[1.7892988]
DEBUG:chainerrl.agents.a3c:t:15640 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.06527273 -0.05838462  0.14594695  0.03045756  0.13324876 -0.03112496]] probs:[[0.1691496  0.14947456 0.18336116 0.16336195 0.18104753 0.15360522]] entropy:[1.7888505]
DEBUG:chainerrl.agents.a3c:t:15641 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.07635354 -0.05252589  0.12807831  0.00454074  0.1339054  -0.02226341]] probs:[[0.17157952 0.15083213 0.18068798 0.15968995 0.18174393 0.15546645]] entropy:[1.789141]
DEBUG:chainerrl.agents.a3c:pi_loss:[-1.5718422] v_loss:[[0.00027171]]
DEBUG:chainerrl.agents.a3c:grad norm:0.6313340904717355
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents

DEBUG:chainerrl.agents.a3c:t:15667 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.03820069 -0.05604247  0.04832356  0.02464966  0.10496704 -0.00260847]] probs:[[0.16846833 0.15331653 0.17018236 0.1662008  0.18010034 0.16173166]] entropy:[1.7905616]
DEBUG:chainerrl.agents.a3c:t:15668 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.03820137 -0.05604359  0.04832232  0.02464847  0.10496695 -0.00260769]] probs:[[0.16846849 0.15331641 0.17018221 0.16620067 0.18010038 0.16173184]] entropy:[1.7905614]
DEBUG:chainerrl.agents.a3c:t:15669 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.03758265 -0.0550794   0.04937978  0.0256669   0.10502318 -0.00335607]] probs:[[0.16831678 0.153421   0.1703142  0.16632307 0.18005967 0.16156526]] entropy:[1.7905673]
DEBUG:chainerrl.agents.a3c:t:15670 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.03820072 -0.05604247  0.0483236   0.02464965  0.10496692 -0.00260852]] prob

DEBUG:chainerrl.agents.a3c:t:15696 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.00201966 -0.09916149 -0.04057063  0.02256472  0.08675098 -0.06844347]] probs:[[0.16939914 0.15309775 0.16233587 0.17291546 0.18437819 0.15787359]] entropy:[1.7898511]
DEBUG:chainerrl.agents.a3c:t:15697 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.02195437 -0.1219322  -0.05643848 -0.01704084  0.0489477  -0.10201342]] probs:[[0.17658138 0.152917   0.16326734 0.16982807 0.1814128  0.15599345]] entropy:[1.7898388]
DEBUG:chainerrl.agents.a3c:pi_loss:[-1.5953532] v_loss:[[0.00028888]]
DEBUG:chainerrl.agents.a3c:grad norm:0.5445049550992644
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:15698 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.04675401 -0.12747037 -0.06937063 -0.0457722  -0.00341927 -0.12307207]] probs:[[0.18392584 0.15451764 0.16376099 0.16767146 0.17492536 0.15519875]] entropy:[1.7898074]
DEBUG:chainerrl.agent

DEBUG:chainerrl.agents.a3c:t:15724 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.04104047 -0.07802179 -0.10628638 -0.01780617  0.0362793  -0.19096547]] probs:[[0.1824216  0.16194522 0.15743199 0.17199644 0.18155512 0.14464962]] entropy:[1.7884557]
DEBUG:chainerrl.agents.a3c:t:15725 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.04619171 -0.07687139 -0.1063496  -0.01866318  0.03856485 -0.19019817]] probs:[[0.18308948 0.16188917 0.15718661 0.1715921  0.1816984  0.14454417]] entropy:[1.7883632]
DEBUG:chainerrl.agents.a3c:t:15726 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.04417615 -0.08115035 -0.09928565 -0.02005909  0.03439023 -0.19167365]] probs:[[0.18293212 0.16138433 0.15848397 0.17155088 0.18115069 0.14449796]] entropy:[1.7884743]
DEBUG:chainerrl.agents.a3c:t:15727 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.04440366 -0.08128833 -0.10124135 -0.02033273  0.03416205 -0.19211641]] prob

DEBUG:chainerrl.agents.a3c:t:15753 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.00016235 -0.105715   -0.02494187  0.01344556 -0.02604217 -0.12635662]] probs:[[0.17411265 0.15662041 0.16979611 0.17644085 0.16960937 0.15342066]] entropy:[1.7904097]
DEBUG:chainerrl.agents.a3c:pi_loss:[-1.1879536] v_loss:[[0.00064178]]
DEBUG:chainerrl.agents.a3c:grad norm:5.614455895562116
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:15754 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.00192436 -0.09648321  0.0106005   0.00668911 -0.03685193 -0.10082703]] probs:[[0.17233849 0.1567891  0.17451057 0.17382933 0.16642302 0.15610951]] entropy:[1.7906901]
DEBUG:chainerrl.agents.a3c:t:15755 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.0023745  -0.10137643  0.00817857  0.00629431 -0.0397393  -0.10533543]] probs:[[0.17269538 0.15641727 0.1745275  0.17419894 0.16636172 0.15579924]] entropy:[1.7906172]
DEBUG:chainerrl.agents

DEBUG:chainerrl.agents.a3c:t:15781 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.0125857  -0.0829984   0.00232942 -0.03219064 -0.02985403 -0.07585774]] probs:[[0.17096384 0.15933986 0.1735329  0.16764475 0.16803692 0.16048172]] entropy:[1.791277]
DEBUG:chainerrl.agents.a3c:t:15782 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.01166019 -0.08274227  0.00265837 -0.03143686 -0.03011049 -0.07462837]] probs:[[0.17103031 0.15929514 0.17349683 0.16768113 0.16790368 0.1605929 ]] entropy:[1.7912796]
DEBUG:chainerrl.agents.a3c:t:15783 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.00630024 -0.08363553  0.01056739 -0.03096263 -0.03401798 -0.08802032]] probs:[[0.17204532 0.15924162 0.17497194 0.16785417 0.1673421  0.1585449 ]] entropy:[1.7910954]
DEBUG:chainerrl.agents.a3c:t:15784 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.00617337 -0.08363865  0.01054066 -0.03084668 -0.03388849 -0.08809333]] probs

DEBUG:chainerrl.agents.a3c:pi_loss:[-1.4037277] v_loss:[[2.4255307e-05]]
DEBUG:chainerrl.agents.a3c:grad norm:0.4049220737025374
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:15810 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-5.0941773e-05 -2.8884506e-02 -2.8489061e-02 -8.4734242e-03
  -2.1773754e-02 -1.0500111e-01]] probs:[[0.1719976  0.16710912 0.16717522 0.17055504 0.16830163 0.15486138]] entropy:[1.7911924]
DEBUG:chainerrl.agents.a3c:t:15811 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.00033609 -0.02971746 -0.02966214 -0.00830997 -0.02330547 -0.10652125]] probs:[[0.1720947  0.16711189 0.16712113 0.1707279  0.16818684 0.15475756]] entropy:[1.7911791]
DEBUG:chainerrl.agents.a3c:t:15812 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.00156016 -0.0323619  -0.03334212 -0.00758051 -0.0270828  -0.11193667]] probs:[[0.17233415 0.16710687 0.16694315 0.17129976 0.16799138 0.1543247 ]] entropy:[1.7911257

DEBUG:chainerrl.agents.a3c:t:15838 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.01229956 -0.06902166  0.0159282  -0.03446423 -0.08987292  0.04599525]] probs:[[0.17191023 0.15848361 0.17253517 0.16405614 0.15521324 0.17780156]] entropy:[1.7905842]
DEBUG:chainerrl.agents.a3c:t:15839 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.01170989 -0.06895545  0.01609567 -0.03517063 -0.09111445  0.04681129]] probs:[[0.1718476  0.15852982 0.17260294 0.16397722 0.15505558 0.17798682]] entropy:[1.7905614]
DEBUG:chainerrl.agents.a3c:t:15840 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.01594277 -0.06950252  0.01732288 -0.03134442 -0.08651776  0.04532348]] probs:[[0.17224367 0.1581375  0.17248157 0.16428834 0.1554695  0.1773794 ]] entropy:[1.7906058]
DEBUG:chainerrl.agents.a3c:t:15841 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.02152689 -0.07033853  0.01968965 -0.02567843 -0.07850274  0.04283862]] prob

DEBUG:chainerrl.agents.a3c:grad norm:0.24360825443561046
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:15866 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.03005916 -0.05934443  0.01258471 -0.03844465 -0.04863645  0.02784075]] probs:[[0.17382087 0.15895508 0.17080982 0.16231216 0.16066632 0.17343569]] entropy:[1.7910733]
DEBUG:chainerrl.agents.a3c:t:15867 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.03119403 -0.05923315  0.01044465 -0.03587281 -0.04730982  0.02908006]] probs:[[0.17389718 0.15886217 0.1703261  0.16261694 0.16076769 0.17352995]] entropy:[1.7910846]
DEBUG:chainerrl.agents.a3c:t:15868 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.03103131 -0.05982574  0.0091839  -0.03568491 -0.04658329  0.03100836]] probs:[[0.1738436  0.15874499 0.17008677 0.16262385 0.16086115 0.17383963]] entropy:[1.791077]
DEBUG:chainerrl.agents.a3c:t:15869 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.

DEBUG:chainerrl.agents.a3c:t:15894 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.06865292 -0.01608476  0.05270156 -0.05765365 -0.05251559  0.05593136]] probs:[[0.17675601 0.16239515 0.17395888 0.15578295 0.15658543 0.17452163]] entropy:[1.7903907]
DEBUG:chainerrl.agents.a3c:t:15895 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.08243378 -0.01935736  0.05643426 -0.0439224  -0.05607339  0.05453676]] probs:[[0.17850767 0.16123137 0.17392637 0.15731896 0.15541896 0.17359667]] entropy:[1.7903055]
DEBUG:chainerrl.agents.a3c:t:15896 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.08400952 -0.02397605  0.05348434 -0.04658245 -0.0555266   0.05791904]] probs:[[0.17891774 0.1606038  0.17353876 0.15701386 0.15561578 0.17431004]] entropy:[1.790236]
DEBUG:chainerrl.agents.a3c:t:15897 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.08009811 -0.02921839  0.0480854  -0.05581707 -0.05266478  0.06340744]] probs

DEBUG:chainerrl.agents.a3c:grad norm:0.4307190871841662
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:15922 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.06716605 -0.01514596  0.05565372 -0.07804088 -0.04261427  0.00528718]] probs:[[0.17824012 0.16415639 0.17619993 0.15414977 0.15970866 0.16754512]] entropy:[1.7904482]
DEBUG:chainerrl.agents.a3c:t:15923 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.10581143 -0.01230336  0.0750287  -0.05523081 -0.05665398  0.02878968]] probs:[[0.18230039 0.16199104 0.17677417 0.15518433 0.15496363 0.1687864 ]] entropy:[1.7898421]
DEBUG:chainerrl.agents.a3c:t:15924 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.10136543 -0.01040667  0.0752178  -0.05727318 -0.05962523  0.02401642]] probs:[[0.18186413 0.16263163 0.17717044 0.1551855  0.15482093 0.16832739]] entropy:[1.7898688]
DEBUG:chainerrl.agents.a3c:t:15925 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.

DEBUG:chainerrl.agents.a3c:t:15950 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.07411771 -0.00923648  0.01766014 -0.07870414 -0.01457247  0.02922956]] probs:[[0.17874226 0.16444738 0.16893049 0.15341137 0.16357224 0.17089626]] entropy:[1.7906781]
DEBUG:chainerrl.agents.a3c:t:15951 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.07158758 -0.00982475  0.01861847 -0.08025431 -0.01928462  0.03153004]] probs:[[0.17846896 0.16451511 0.16926162 0.15332699 0.16296615 0.17146122]] entropy:[1.7906587]
DEBUG:chainerrl.agents.a3c:t:15952 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.07416959  0.00016114  0.02231184 -0.06991249 -0.01801105  0.02534041]] probs:[[0.17830941 0.1655895  0.16929834 0.15438326 0.16260755 0.16981186]] entropy:[1.7907944]
DEBUG:chainerrl.agents.a3c:t:15953 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.07305364  0.00978999  0.02443284 -0.06572664 -0.02377214  0.01244613]] prob

DEBUG:chainerrl.agents.a3c:t:15979 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[1.03409410e-01 1.11608826e-01 4.93364446e-02 3.84985842e-02
  7.85529763e-02 7.96687091e-05]] probs:[[0.17330942 0.17473629 0.16418692 0.1624171  0.16905466 0.15629555]] entropy:[1.7910167]
DEBUG:chainerrl.agents.a3c:t:15980 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.10160309 0.11445341 0.04709764 0.03701696 0.08051051 0.00207264]] probs:[[0.17295854 0.17519544 0.16378365 0.1621409  0.1693486  0.15657285]] entropy:[1.791007]
DEBUG:chainerrl.agents.a3c:t:15981 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.10298684 0.11338539 0.04831633 0.03801659 0.07940538 0.00099132]] probs:[[0.17318797 0.17499827 0.16397385 0.16229364 0.16915172 0.15639456]] entropy:[1.7910073]
DEBUG:chainerrl.agents.a3c:t:15982 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[0.1050615  0.11035363 0.04812303 0.03718054 0.07932866 0.0007764 ]] pro

DEBUG:chainerrl.agents.a3c:t:16007 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.05827329  0.07094433  0.09495508 -0.00596888  0.10892639  0.01960715]] probs:[[0.16661328 0.16873789 0.17283843 0.15624626 0.17527016 0.16029395]] entropy:[1.7909592]
DEBUG:chainerrl.agents.a3c:t:16008 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.05825755  0.07110327  0.0946953  -0.00614092  0.10923246  0.01950844]] probs:[[0.16661228 0.16876635 0.17279522 0.1562209  0.17532551 0.16027969]] entropy:[1.7909553]
DEBUG:chainerrl.agents.a3c:t:16009 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.0595114   0.06963077  0.09505876 -0.00583667  0.11000594  0.01940535]] probs:[[0.1667896  0.16848598 0.17282519 0.15623873 0.17542782 0.16023271]] entropy:[1.7909517]
DEBUG:chainerrl.agents.a3c:pi_loss:[-1.5177176] v_loss:[[7.054063e-05]]
DEBUG:chainerrl.agents.a3c:grad norm:0.20485331863652861
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.ag

DEBUG:chainerrl.agents.a3c:t:16035 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.03659379  0.09396407  0.07005414  0.05068422  0.05854972 -0.05824501]] probs:[[0.16559081 0.17536859 0.17122528 0.16794057 0.16926672 0.15060808]] entropy:[1.7906461]
DEBUG:chainerrl.agents.a3c:t:16036 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.02768224  0.08851992  0.0776889   0.05539481  0.04573782 -0.05253878]] probs:[[0.16438618 0.17469753 0.17281559 0.16900545 0.16738123 0.15171404]] entropy:[1.7907318]
DEBUG:chainerrl.agents.a3c:t:16037 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.02114075  0.08439682  0.08734062  0.06261262  0.03743764 -0.04503545]] probs:[[0.1631726  0.17382771 0.17434017 0.17008196 0.16585359 0.15272398]] entropy:[1.7907599]
DEBUG:chainerrl.agents.a3c:t:16038 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.02084869  0.08404377  0.08741404  0.06271234  0.03699044 -0.04508833]] prob

DEBUG:chainerrl.agents.a3c:t:16064 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.08467068 -0.01060324  0.04540357 -0.00497892  0.06672367 -0.01204773]] probs:[[0.17621474 0.16020103 0.16942938 0.16110459 0.17308041 0.15996978]] entropy:[1.7909889]
DEBUG:chainerrl.agents.a3c:t:16065 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.08401467 -0.01153016  0.03476509 -0.0055394   0.06458667 -0.02393566]] probs:[[0.1768787  0.1607611  0.16837855 0.16172707 0.17347547 0.1587791 ]] entropy:[1.7909373]
DEBUG:chainerrl.agents.a3c:pi_loss:[-1.4990648] v_loss:[[8.8067754e-05]]
DEBUG:chainerrl.agents.a3c:grad norm:0.4162474147895423
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:16066 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.06600539 -0.00257204  0.04447455 -0.00012463  0.06333034 -0.00772495]] probs:[[0.17316976 0.16169226 0.16948113 0.16208848 0.17270714 0.16086122]] entropy:[1.7912617]
DEBUG:chainerrl.ag

DEBUG:chainerrl.agents.a3c:t:16092 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.06620565 -0.0292041   0.02110142  0.01132191  0.04078528  0.02261393]] probs:[[0.17410265 0.15825938 0.16642435 0.16480473 0.16973267 0.16667625]] entropy:[1.7913418]
DEBUG:chainerrl.agents.a3c:t:16093 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.06674732 -0.02858793  0.0200128   0.01063075  0.04080678  0.02110753]] probs:[[0.17425801 0.1584124  0.16630152 0.16474856 0.1697958  0.16648367]] entropy:[1.7913405]
DEBUG:chainerrl.agents.a3c:t:16094 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.06603016 -0.02645424  0.01711015  0.00949586  0.04191052  0.02113946]] probs:[[0.17417884 0.15879247 0.16586307 0.16460493 0.17002797 0.16653273]] entropy:[1.7913553]
DEBUG:chainerrl.agents.a3c:t:16095 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.06806387 -0.02748575  0.0177779   0.01412821  0.03975292  0.02142075]] prob

DEBUG:chainerrl.agents.a3c:t:16121 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.0047388  -0.01827728  0.10395981 -0.04242548  0.00955489  0.00325472]] probs:[[0.16428952 0.16208027 0.18315427 0.1582132  0.16665469 0.16560803]] entropy:[1.7906674]
DEBUG:chainerrl.agents.a3c:pi_loss:[-1.4244554] v_loss:[[1.596093e-05]]
DEBUG:chainerrl.agents.a3c:grad norm:0.1468125623914032
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:16122 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.00287894 -0.01810924  0.09170017 -0.04123133  0.01391241  0.0051512 ]] probs:[[0.1647053  0.1622158  0.18104343 0.15850806 0.16749427 0.16603324]] entropy:[1.7908788]
DEBUG:chainerrl.agents.a3c:t:16123 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.00425328 -0.03122687  0.07904099 -0.06062094  0.02071983 -0.00417538]] probs:[[0.16581532 0.16140246 0.18021831 0.15672725 0.17000839 0.16582824]] entropy:[1.7907988]
DEBUG:chainerrl.age

DEBUG:chainerrl.agents.a3c:t:16149 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.03353243  0.00254423  0.05085472 -0.06122966  0.009998    0.05169149]] probs:[[0.16973275 0.16455369 0.17269851 0.15438709 0.16578482 0.17284308]] entropy:[1.7910309]
DEBUG:chainerrl.agents.a3c:t:16150 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.03516694 -0.00149139  0.05124209 -0.0592806   0.01054807  0.05206502]] probs:[[0.16998681 0.16386822 0.17274146 0.15466683 0.16585302 0.17288366]] entropy:[1.7910342]
DEBUG:chainerrl.agents.a3c:t:16151 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.03206779  0.00354889  0.05389278 -0.05867013  0.00835271  0.04959308]] probs:[[0.1694499  0.16468564 0.1731888  0.15475132 0.16547866 0.17244574]] entropy:[1.7910588]
DEBUG:chainerrl.agents.a3c:t:16152 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.04398135  0.03620063  0.01505334 -0.11097741 -0.05058611  0.00021606]] prob

DEBUG:chainerrl.agents.a3c:pi_loss:[-1.4342185] v_loss:[[9.101977e-06]]
DEBUG:chainerrl.agents.a3c:grad norm:0.040231221039416384
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:16178 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.01787722 -0.00331271  0.02792701  0.00465904 -0.05371184 -0.01285386]] probs:[[0.16517544 0.16759875 0.17291713 0.16894014 0.15936124 0.16600727]] entropy:[1.7914551]
DEBUG:chainerrl.agents.a3c:t:16179 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.02136607  0.00310751  0.02787588  0.00237807 -0.054821   -0.01593338]] probs:[[0.16469514 0.16877556 0.17300805 0.16865247 0.15927643 0.16559231]] entropy:[1.7914338]
DEBUG:chainerrl.agents.a3c:t:16180 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.02234766  0.00356943  0.03147093 -0.00337168 -0.05802565 -0.02312186]] probs:[[0.16488363 0.16921279 0.17400056 0.16804233 0.15910462 0.16475601]] entropy:[1.7913805]
DEBUG:chainerrl.a

DEBUG:chainerrl.agents.a3c:t:16206 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.03217086 -0.02740562  0.02113609 -0.00511731 -0.06222286 -0.00378824]] probs:[[0.16430783 0.16509266 0.17330424 0.16881362 0.15944351 0.16903813]] entropy:[1.7914153]
DEBUG:chainerrl.agents.a3c:t:16207 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.03222413 -0.02739137  0.02092821 -0.00514914 -0.06196544 -0.00394141]] probs:[[0.16430445 0.16510041 0.17327386 0.16881375 0.15948977 0.16901776]] entropy:[1.791419]
DEBUG:chainerrl.agents.a3c:t:16208 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.03277865 -0.0278908   0.01887877 -0.00473465 -0.0610191  -0.00535521]] probs:[[0.1643031  0.16510814 0.17301361 0.16897602 0.159728   0.1688712 ]] entropy:[1.7914393]
DEBUG:chainerrl.agents.a3c:t:16209 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.03812718 -0.03063207  0.00772881 -0.00092346 -0.05061159 -0.01287058]] probs

DEBUG:chainerrl.agents.a3c:grad norm:0.1678901197577951
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:16234 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.02616849 -0.01699942  0.02265558 -0.0089523  -0.03446902 -0.01116877]] probs:[[0.16438018 0.16589433 0.17260505 0.16723469 0.16302139 0.16686442]] entropy:[1.791596]
DEBUG:chainerrl.agents.a3c:t:16235 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.02529972 -0.01301232  0.02530074 -0.00979321 -0.04348098 -0.00873857]] probs:[[0.16451223 0.16654612 0.17305084 0.16708313 0.16154823 0.16725942]] entropy:[1.7915437]
DEBUG:chainerrl.agents.a3c:t:16236 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.03662301 -0.02186565  0.01449188 -0.0146116  -0.03776605 -0.00770943]] probs:[[0.16345854 0.16588864 0.17203093 0.16709638 0.16327181 0.1682537 ]] entropy:[1.7915983]
DEBUG:chainerrl.agents.a3c:t:16237 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0

DEBUG:chainerrl.agents.a3c:t:16262 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.0184363  -0.01677613 -0.01711924 -0.03940066 -0.045282    0.00737067]] probs:[[0.16717131 0.16744907 0.16739163 0.16370314 0.16274318 0.17154165]] entropy:[1.7916117]
DEBUG:chainerrl.agents.a3c:t:16263 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.01909151 -0.01755995 -0.01818733 -0.03915945 -0.04677051  0.00666751]] probs:[[0.16718596 0.1674422  0.1673372  0.16386431 0.16262189 0.17154846]] entropy:[1.7916116]
DEBUG:chainerrl.agents.a3c:t:16264 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.01908665 -0.0175594  -0.01818336 -0.0391576  -0.04676874  0.00667102]] probs:[[0.1671863  0.16744184 0.16733739 0.16386418 0.1626217  0.17154858]] entropy:[1.7916114]
DEBUG:chainerrl.agents.a3c:t:16265 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.01784028 -0.01615483 -0.01623976 -0.03946876 -0.04400831  0.00793335]] prob

DEBUG:chainerrl.agents.a3c:grad norm:24.292077951335706
DEBUG:chainerrl.agents.a3c:update


INFO: outdir:result global_step:33197 local_step:16289 R:1.1500000000000001
INFO: statistics:[('average_value', 0.3083083506847507), ('average_entropy', 1.7818060272103204)]


DEBUG:chainerrl.agents.a3c:t:16290 r:0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.00316708 -0.05247522 -0.11800665  0.08870815  0.05821752 -0.00079724]] probs:[[0.16651942 0.1585078  0.14845358 0.18254325 0.1770614  0.16691451]] entropy:[1.7894716]
DEBUG:chainerrl.agents.a3c:t:16291 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.01662295 -0.04955784 -0.1990742   0.0656103   0.06513437 -0.04870578]] probs:[[0.1683458  0.16289164 0.14026996 0.18277454 0.18268758 0.1630305 ]] entropy:[1.7879567]
DEBUG:chainerrl.agents.a3c:t:16292 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.019908   -0.04973145 -0.21119907  0.06258131  0.06585567 -0.05883862]] probs:[[0.1685243  0.16357252 0.1391829  0.1830152  0.18361545 0.1620896 ]] entropy:[1.7876439]
DEBUG:chainerrl.agents.a3c:t:16293 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.02027138 -0.04951015 -0.21296401  0.06211399  0.06588187 -0.06024003]] probs:

DEBUG:chainerrl.agents.a3c:t:16319 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.00086678 -0.025944   -0.11439963 -0.00386921  0.04562251 -0.01972541]] probs:[[0.16991372 0.16541873 0.15141499 0.16911091 0.17769107 0.1664506 ]] entropy:[1.7906286]
DEBUG:chainerrl.agents.a3c:t:16320 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.00078363 -0.02606976 -0.11430343 -0.00385331  0.04560126 -0.01979052]] probs:[[0.16990508 0.16540326 0.15143445 0.16911906 0.17769304 0.16644514]] entropy:[1.7906302]
DEBUG:chainerrl.agents.a3c:t:16321 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.00104005 -0.02575347 -0.11452626 -0.00390413  0.045696   -0.01963012]] probs:[[0.16993214 0.16543952 0.15138601 0.16909404 0.1776926  0.16645567]] entropy:[1.7906256]
DEBUG:chainerrl.agents.a3c:pi_loss:[-1.4505645] v_loss:[[3.9859046e-06]]
DEBUG:chainerrl.agents.a3c:grad norm:0.07627902784066597
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.a

DEBUG:chainerrl.agents.a3c:t:16347 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.03432737 -0.04975623 -0.13672058  0.00603107 -0.07993125 -0.04468467]] probs:[[0.17025147 0.16764484 0.15368167 0.1772631  0.16266173 0.16849723]] entropy:[1.7908102]
DEBUG:chainerrl.agents.a3c:t:16348 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.03294561 -0.04831715 -0.13386679  0.00485239 -0.08124848 -0.04480103]] probs:[[0.1704062  0.16780682 0.15404794 0.1769705  0.16237073 0.1683979 ]] entropy:[1.790847]
DEBUG:chainerrl.agents.a3c:t:16349 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.03392453 -0.04928214 -0.13572322  0.00501137 -0.07913398 -0.04396493]] probs:[[0.17025676 0.167662   0.15377784 0.1770166  0.16273095 0.16855587]] entropy:[1.790834]
DEBUG:chainerrl.agents.a3c:t:16350 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.03260133 -0.04797434 -0.13310005  0.0041754  -0.08083152 -0.04447229]] probs:

DEBUG:chainerrl.agents.a3c:t:16376 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.14817889  0.0093811  -0.17003593  0.02578735  0.04145305 -0.03226558]] probs:[[0.14990819 0.17549017 0.1466672  0.17839305 0.18120971 0.16833168]] entropy:[1.7883782]
DEBUG:chainerrl.agents.a3c:t:16377 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.14292176  0.01444229 -0.16014482  0.0167079   0.05083947 -0.02740199]] probs:[[0.15008964 0.17566817 0.14752677 0.17606659 0.18217976 0.16846912]] entropy:[1.7885534]
DEBUG:chainerrl.agents.a3c:pi_loss:[-1.4407833] v_loss:[[2.3925918e-06]]
DEBUG:chainerrl.agents.a3c:grad norm:0.20909501823112264
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:16378 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.08406107 -0.1870132  -0.01389921 -0.14447692  0.11796319  0.00490831]] probs:[[0.1843888  0.1406075  0.16718256 0.14671744 0.19074716 0.17035659]] entropy:[1.7857491]
DEBUG:chainerrl.a

DEBUG:chainerrl.agents.a3c:t:16404 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.02844302 -0.03827968  0.06113941 -0.13712981 -0.04685783  0.07212617]] probs:[[0.16478552 0.16317253 0.18022881 0.14781448 0.16177881 0.18221986]] entropy:[1.7892891]
DEBUG:chainerrl.agents.a3c:t:16405 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.02850105 -0.03838769  0.06107037 -0.13711481 -0.04683935  0.07201964]] probs:[[0.16478482 0.16316369 0.18022607 0.14782466 0.1617905  0.18221025]] entropy:[1.7892916]
DEBUG:chainerrl.agents.a3c:t:16406 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.02847083 -0.03822289  0.0610779  -0.13714552 -0.04689121  0.07219496]] probs:[[0.16478121 0.16318206 0.18021803 0.1478124  0.16177367 0.1822327 ]] entropy:[1.7892886]
DEBUG:chainerrl.agents.a3c:t:16407 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.02565302 -0.05014113  0.06199795 -0.14740606 -0.03974336  0.07831632]] prob

DEBUG:chainerrl.agents.a3c:t:16433 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.03266292 -0.04506027 -0.0696753  -0.06918038 -0.07956626  0.03654062]] probs:[[0.1683128  0.16623905 0.16219702 0.16227731 0.16060065 0.18037315]] entropy:[1.7909715]
DEBUG:chainerrl.agents.a3c:pi_loss:[-1.4838117] v_loss:[[3.140123e-05]]
DEBUG:chainerrl.agents.a3c:grad norm:0.13221197964448594
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:16434 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.04101568 -0.04266448 -0.07077705 -0.07548653 -0.09069585  0.0218802 ]] probs:[[0.16802217 0.16774537 0.16309528 0.16232899 0.15987876 0.1789295 ]] entropy:[1.7910744]
DEBUG:chainerrl.agents.a3c:t:16435 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.04496044 -0.0476753  -0.07915937 -0.06876567 -0.0992479   0.01741413]] probs:[[0.16802022 0.16756468 0.16237125 0.16406769 0.15914199 0.17883416]] entropy:[1.791069]
DEBUG:chainerrl.age

DEBUG:chainerrl.agents.a3c:t:16461 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.00496507 -0.0029429  -0.07897813 -0.00583195 -0.00072375 -0.02120991]] probs:[[0.16897732 0.16931936 0.1569224  0.1688309  0.16969553 0.16625449]] entropy:[1.7913902]
DEBUG:chainerrl.agents.a3c:t:16462 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.00500152 -0.00294609 -0.07889576 -0.00593586 -0.00069645 -0.02117163]] probs:[[0.16897121 0.16931888 0.15693538 0.1688134  0.1697002  0.1662609 ]] entropy:[1.791391]
DEBUG:chainerrl.agents.a3c:t:16463 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.00496528 -0.00294282 -0.07897802 -0.00583223 -0.00072386 -0.02120975]] probs:[[0.16897729 0.16931938 0.15692243 0.16883086 0.16969551 0.16625452]] entropy:[1.7913902]
DEBUG:chainerrl.agents.a3c:t:16464 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.00500155 -0.00294606 -0.07889577 -0.00593584 -0.00069648 -0.02117165]] probs

DEBUG:chainerrl.agents.a3c:pi_loss:[-1.4959711] v_loss:[[4.823385e-05]]
DEBUG:chainerrl.agents.a3c:grad norm:0.0650834091360554
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:16490 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.0166683  -0.02345095 -0.05634159 -0.01659242  0.00799983 -0.01031291]] probs:[[0.16706288 0.1659336  0.1605647  0.16707556 0.17123526 0.16812801]] entropy:[1.7915754]
DEBUG:chainerrl.agents.a3c:t:16491 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.01719119 -0.02327818 -0.05567576 -0.01716555  0.00747835 -0.00999769]] probs:[[0.16698952 0.16597615 0.16068509 0.16699381 0.17116031 0.1681951 ]] entropy:[1.7915817]
DEBUG:chainerrl.agents.a3c:t:16492 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.01731695 -0.02323739 -0.05551672 -0.01730344  0.0073533  -0.00992192]] probs:[[0.16697194 0.16598631 0.16071393 0.16697419 0.1711424  0.16821128]] entropy:[1.7915833]
DEBUG:chainerrl.age

DEBUG:chainerrl.agents.a3c:t:16518 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.03956537 -0.01060297 -0.028293   -0.0282863   0.01721845 -0.0249059 ]] probs:[[0.16325837 0.16805586 0.1651091  0.16511022 0.17279707 0.16566929]] entropy:[1.7915895]
DEBUG:chainerrl.agents.a3c:t:16519 r:0.05 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.04143928 -0.00919764 -0.0279291  -0.03178212  0.01849074 -0.02473563]] probs:[[0.1630077  0.16834898 0.16522491 0.16458952 0.17307542 0.1657534 ]] entropy:[1.7915671]
DEBUG:chainerrl.agents.a3c:t:16520 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.03922676 -0.0117242  -0.03183697 -0.030611    0.01846284 -0.02644522]] probs:[[0.16350012 0.16805919 0.16471282 0.16491488 0.17320976 0.16560331]] entropy:[1.7915726]
DEBUG:chainerrl.agents.a3c:t:16521 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.03946055 -0.0116098  -0.03213635 -0.03131162  0.0187403  -0.02651205]] pro

DEBUG:chainerrl.agents.a3c:grad norm:0.7708591516109049
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:16546 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.03851823 -0.03549766 -0.01768615 -0.04448622 -0.01051082  0.02195532]] probs:[[0.16369644 0.16419165 0.16714233 0.16272241 0.16834596 0.17390119]] entropy:[1.7915035]
DEBUG:chainerrl.agents.a3c:t:16547 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.0392607  -0.03525379 -0.01692848 -0.04572864 -0.01073586  0.02215447]] probs:[[0.16360115 0.16425799 0.16729581 0.1625464  0.16833504 0.17396368]] entropy:[1.7914956]
DEBUG:chainerrl.agents.a3c:t:16548 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.03900243 -0.03527876 -0.01693509 -0.04521106 -0.01080524  0.02217772]] probs:[[0.16362481 0.16423523 0.16727571 0.16261207 0.16830423 0.17394796]] entropy:[1.7914981]
DEBUG:chainerrl.agents.a3c:t:16549 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.

DEBUG:chainerrl.agents.a3c:t:16574 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.04065911 -0.02509119 -0.01217926 -0.04179114 -0.02857356  0.01732839]] probs:[[0.16352406 0.1660897  0.16824815 0.16333905 0.16551232 0.17328672]] entropy:[1.791554]
DEBUG:chainerrl.agents.a3c:t:16575 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.06086365 -0.05017956 -0.05854853  0.00201843 -0.07998337 -0.0077029 ]] probs:[[0.16356917 0.16532612 0.16394828 0.17418501 0.16047148 0.1724999 ]] entropy:[1.7913182]
DEBUG:chainerrl.agents.a3c:t:16576 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.06078751 -0.05013432 -0.0583854   0.00250811 -0.08029459 -0.00772459]] probs:[[0.16356881 0.16532066 0.1639622  0.17425668 0.16040899 0.17248265]] entropy:[1.7913134]
DEBUG:chainerrl.agents.a3c:t:16577 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.06075305 -0.05008992 -0.05836733  0.00264247 -0.08030038 -0.00780376]] probs

DEBUG:chainerrl.agents.a3c:grad norm:0.013385509732554127
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:16602 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.0601317  -0.04815679 -0.02666336  0.00095188 -0.05554055 -0.05092692]] probs:[[0.16332082 0.16528833 0.16887939 0.17360803 0.16407238 0.16483109]] entropy:[1.7915324]
DEBUG:chainerrl.agents.a3c:t:16603 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.06142044 -0.05095539 -0.02325614  0.0048348  -0.05684234 -0.04941224]] probs:[[0.16301006 0.16472493 0.16935147 0.17417616 0.16375805 0.16497932]] entropy:[1.7914853]
DEBUG:chainerrl.agents.a3c:t:16604 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.05993357 -0.04933675 -0.025508    0.00241392 -0.05603351 -0.05080483]] probs:[[0.16331626 0.1650561  0.16903642 0.17382275 0.16395445 0.16481397]] entropy:[1.7915168]
DEBUG:chainerrl.agents.a3c:t:16605 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=

DEBUG:chainerrl.agents.a3c:t:16630 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.07281382 -0.06541833 -0.00513381  0.01295782 -0.02802872 -0.06410738]] probs:[[0.16073161 0.16192472 0.1719865  0.17512634 0.16809362 0.16213714]] entropy:[1.7912209]
DEBUG:chainerrl.agents.a3c:t:16631 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.07286161 -0.06548814 -0.00505491  0.01298125 -0.02804871 -0.06409672]] probs:[[0.16072443 0.1619139  0.1720006  0.17513098 0.16809078 0.16213936]] entropy:[1.79122]
DEBUG:chainerrl.agents.a3c:t:16632 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.07286192 -0.06552844 -0.00498603  0.01306152 -0.02806436 -0.06407756]] probs:[[0.16072118 0.16190416 0.17200904 0.17514156 0.16808482 0.16213924]] entropy:[1.7912189]
DEBUG:chainerrl.agents.a3c:t:16633 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.0728171  -0.06553832 -0.00492487  0.01320004 -0.02814185 -0.06406783]] probs:

DEBUG:chainerrl.agents.a3c:grad norm:0.016466965477450288
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:16658 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.0626829  -0.05339688 -0.02878118  0.0086611  -0.03103958 -0.0546027 ]] probs:[[0.16238971 0.1639047  0.1679894  0.17439854 0.16761044 0.16370717]] entropy:[1.79147]
DEBUG:chainerrl.agents.a3c:t:16659 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.0626775  -0.05339903 -0.02876052  0.00868738 -0.03099729 -0.05457206]] probs:[[0.16238724 0.16390097 0.1679894  0.17439952 0.16761407 0.16370882]] entropy:[1.79147]
DEBUG:chainerrl.agents.a3c:t:16660 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.06267744 -0.05339863 -0.02875964  0.00868691 -0.03099797 -0.05457252]] probs:[[0.16238727 0.16390103 0.16798957 0.17439945 0.16761395 0.16370875]] entropy:[1.7914698]
DEBUG:chainerrl.agents.a3c:t:16661 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0)

DEBUG:chainerrl.agents.a3c:t:16686 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.08581331 -0.02927489 -0.00180019  0.00054874 -0.06403492 -0.04443642]] probs:[[0.15872326 0.16795576 0.17263427 0.17304026 0.16221792 0.1654285 ]] entropy:[1.7912716]
DEBUG:chainerrl.agents.a3c:t:16687 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.08581296 -0.02926837 -0.00178834  0.00053776 -0.0640415  -0.04444373]] probs:[[0.15872349 0.16795702 0.1726365  0.17303853 0.162217   0.16542746]] entropy:[1.7912717]
DEBUG:chainerrl.agents.a3c:t:16688 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.0858126  -0.02926181 -0.0017763   0.00052678 -0.064048   -0.04445121]] probs:[[0.1587237  0.16795829 0.17263873 0.1730368  0.16221611 0.16542639]] entropy:[1.7912717]
DEBUG:chainerrl.agents.a3c:t:16689 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.08579764 -0.029223   -0.0017626   0.00053407 -0.06409419 -0.04443344]] prob

DEBUG:chainerrl.agents.a3c:grad norm:0.5126207868426762
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:16714 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.05270357 -0.11072227 -0.03496879  0.08571994 -0.03015849 -0.08564086]] probs:[[0.16392401 0.15468399 0.16685711 0.18826047 0.16766167 0.15861276]] entropy:[1.7897524]
DEBUG:chainerrl.agents.a3c:t:16715 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.05271418 -0.11068114 -0.03500461  0.0856635  -0.03010846 -0.08563668]] probs:[[0.16392276 0.15469082 0.16685161 0.1882504  0.16767055 0.15861388]] entropy:[1.7897542]
DEBUG:chainerrl.agents.a3c:t:16716 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.05273177 -0.11070123 -0.03500981  0.08562391 -0.03008002 -0.08563711]] probs:[[0.16392145 0.1546892  0.16685236 0.18824476 0.16767693 0.15861534]] entropy:[1.7897546]
DEBUG:chainerrl.agents.a3c:t:16717 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.

DEBUG:chainerrl.agents.a3c:t:16742 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.04748648 -0.0654723  -0.09025026  0.0742151  -0.02873637 -0.10721446]] probs:[[0.17937589 0.16021629 0.15629524 0.184235   0.16621144 0.15366618]] entropy:[1.7894143]
DEBUG:chainerrl.agents.a3c:t:16743 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.04638192 -0.06575708 -0.08821125  0.07463619 -0.0297408  -0.10961895]] probs:[[0.1792465  0.16023202 0.15667425 0.1843832  0.16610816 0.15335585]] entropy:[1.7894075]
DEBUG:chainerrl.agents.a3c:t:16744 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.0456694  -0.06585023 -0.08710076  0.07493202 -0.03032594 -0.11083866]] probs:[[0.1791543  0.16024885 0.15687941 0.1844743  0.16604389 0.15319927]] entropy:[1.7894049]
DEBUG:chainerrl.agents.a3c:t:16745 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.04755401 -0.06543338 -0.09041764  0.07416407 -0.0286686  -0.10699565]] prob

DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:16770 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.06607582 -0.00860807  0.00981202 -0.05338641 -0.07163388 -0.06307644]] probs:[[0.16264859 0.17226943 0.17547205 0.16472565 0.16174708 0.16313717]] entropy:[1.7912664]
DEBUG:chainerrl.agents.a3c:t:16771 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.06658491 -0.01255795  0.01784823 -0.0555209  -0.06151223 -0.07847935]] probs:[[0.16265452 0.17168397 0.1769844  0.16446412 0.16348171 0.1607313 ]] entropy:[1.791171]
DEBUG:chainerrl.agents.a3c:t:16772 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.06394898 -0.01313866  0.01158976 -0.06015784 -0.06578174 -0.06881765]] probs:[[0.1631934  0.17169958 0.17599836 0.16381326 0.16289458 0.1624008 ]] entropy:[1.7912686]
DEBUG:chainerrl.agents.a3c:t:16773 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.06537115 -0.00819882  0.00696912 -0.054359

DEBUG:chainerrl.agents.a3c:t:16799 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.05659598 -0.00746251 -0.00041116 -0.04478455 -0.04254096 -0.0500231 ]] probs:[[0.16284566 0.17104664 0.17225702 0.1647805  0.16515061 0.16391954]] entropy:[1.7915251]
DEBUG:chainerrl.agents.a3c:t:16800 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.05658831 -0.00745799 -0.00040582 -0.04480314 -0.04256545 -0.05003   ]] probs:[[0.16284777 0.17104833 0.17225885 0.16477832 0.16514745 0.16391928]] entropy:[1.7915251]
DEBUG:chainerrl.agents.a3c:t:16801 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.05659598 -0.00746251 -0.00041116 -0.04478455 -0.04254096 -0.0500231 ]] probs:[[0.16284566 0.17104664 0.17225702 0.1647805  0.16515061 0.16391954]] entropy:[1.7915251]
DEBUG:chainerrl.agents.a3c:pi_loss:[-1.4884406] v_loss:[[3.8285893e-05]]
DEBUG:chainerrl.agents.a3c:grad norm:0.11026285388948714
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.a

DEBUG:chainerrl.agents.a3c:t:16827 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.03958141 -0.01583321 -0.00658642 -0.04028808 -0.04389937 -0.03832215]] probs:[[0.16518489 0.16915469 0.17072608 0.16506821 0.16447316 0.16539304]] entropy:[1.7916584]
DEBUG:chainerrl.agents.a3c:t:16828 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.03957992 -0.01583469 -0.00658213 -0.04027588 -0.0438868  -0.03830576]] probs:[[0.16518387 0.16915315 0.17072551 0.16506895 0.16447398 0.16539447]] entropy:[1.7916583]
DEBUG:chainerrl.agents.a3c:t:16829 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.03958141 -0.01583321 -0.00658642 -0.04028808 -0.04389937 -0.03832215]] probs:[[0.16518489 0.16915469 0.17072608 0.16506821 0.16447316 0.16539304]] entropy:[1.7916584]
DEBUG:chainerrl.agents.a3c:t:16830 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.03957992 -0.01583469 -0.00658213 -0.04027588 -0.0438868  -0.03830576]] prob

INFO: outdir:result global_step:34301 local_step:16831 R:0.2
INFO: statistics:[('average_value', 0.2652686761578887), ('average_entropy', 1.785567825252489)]


DEBUG:chainerrl.agents.a3c:t:16832 r:0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.03698565 -0.17259142  0.09465145 -0.09243525 -0.02615811  0.11008161]] probs:[[0.17350465 0.14069963 0.18380405 0.1524419  0.16288763 0.18666217]] entropy:[1.7868093]
DEBUG:chainerrl.agents.a3c:t:16833 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.01704299 -0.17794414  0.03772852 -0.13808261 -0.02375202  0.08923244]] probs:[[0.17436978 0.14347936 0.17801428 0.14931418 0.16739951 0.18742293]] entropy:[1.787333]
DEBUG:chainerrl.agents.a3c:t:16834 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.01151818 -0.18011512  0.02968349 -0.1451661  -0.02460773  0.08594188]] probs:[[0.17419489 0.14381696 0.1773881  0.14893208 0.16801426 0.18765372]] entropy:[1.7873576]
DEBUG:chainerrl.agents.a3c:t:16835 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.01086736 -0.18043457  0.02860108 -0.14624196 -0.02483846  0.08546264]] probs:[

DEBUG:chainerrl.agents.a3c:t:16861 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.00989088 -0.22694193 -0.04709996 -0.07079779  0.00653913 -0.02124169]] probs:[[0.17499182 0.14084896 0.16860019 0.16465169 0.17789069 0.17301676]] entropy:[1.7889307]
DEBUG:chainerrl.agents.a3c:t:16862 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.00989491 -0.22694927 -0.04711001 -0.07079517  0.00654386 -0.02125203]] probs:[[0.17499179 0.14084847 0.16859913 0.16465275 0.17789221 0.17301562]] entropy:[1.7889304]
DEBUG:chainerrl.agents.a3c:t:16863 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.0099301  -0.22690351 -0.04701828 -0.07073194  0.00648754 -0.02125834]] probs:[[0.17498301 0.14085282 0.16861208 0.1646607  0.17787953 0.17301194]] entropy:[1.7889326]
DEBUG:chainerrl.agents.a3c:pi_loss:[-1.45031] v_loss:[[4.650905e-06]]
DEBUG:chainerrl.agents.a3c:grad norm:0.12391785816557002
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agen

DEBUG:chainerrl.agents.a3c:t:16889 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.01700924 -0.16029526 -0.04368232 -0.07150739 -0.03329697 -0.04011077]] probs:[[0.17396893 0.15074515 0.16938996 0.16474165 0.17115831 0.16999604]] entropy:[1.7906896]
DEBUG:chainerrl.agents.a3c:t:16890 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.01683278 -0.16011247 -0.04348757 -0.07179831 -0.03353044 -0.03989297]] probs:[[0.17399257 0.1507666  0.1694161  0.16468707 0.17111143 0.17002618]] entropy:[1.7906902]
DEBUG:chainerrl.agents.a3c:t:16891 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.01671995 -0.15996626 -0.04335468 -0.07205711 -0.03373456 -0.0397647 ]] probs:[[0.17401074 0.15078737 0.16943718 0.16464306 0.17107506 0.17004655]] entropy:[1.7906913]
DEBUG:chainerrl.agents.a3c:t:16892 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.01703121 -0.1603607  -0.04371892 -0.07137284 -0.03319797 -0.04012999]] prob

DEBUG:chainerrl.agents.a3c:t:16918 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.01933651 -0.11179189 -0.04538299 -0.01872244  0.00114997 -0.00430173]] probs:[[0.16884933 0.15393823 0.16450818 0.16895305 0.17234413 0.17140712]] entropy:[1.7910545]
DEBUG:chainerrl.agents.a3c:t:16919 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.05516712 -0.11059558 -0.04509322 -0.02760937  0.03428747 -0.03254353]] probs:[[0.1639182  0.15507968 0.16557783 0.16849823 0.17925727 0.16766888]] entropy:[1.7908435]
DEBUG:chainerrl.agents.a3c:pi_loss:[-1.5041848] v_loss:[[5.5792138e-05]]
DEBUG:chainerrl.agents.a3c:grad norm:0.16528350648179507
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:16920 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.05750009 -0.0980149  -0.05357201 -0.02866752  0.02893359 -0.04319522]] probs:[[0.16398363 0.15747264 0.16462903 0.1687805  0.17878792 0.16634625]] entropy:[1.7910228]
DEBUG:chainerrl.a

DEBUG:chainerrl.agents.a3c:t:16946 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.08532159 -0.13902797  0.02252874 -0.07301289  0.08374379 -0.10609794]] probs:[[0.16031778 0.15193482 0.17857493 0.16230327 0.18984792 0.15702131]] entropy:[1.7886658]
DEBUG:chainerrl.agents.a3c:t:16947 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.08531915 -0.13903463  0.02249051 -0.07300882  0.08374909 -0.10612885]] probs:[[0.16031986 0.15193541 0.17857    0.16230565 0.18985094 0.15701814]] entropy:[1.7886655]
DEBUG:chainerrl.agents.a3c:t:16948 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.09474379 -0.14388926  0.03364969 -0.05131354  0.08196131 -0.11879218]] probs:[[0.15865593 0.1510472  0.18039183 0.1656982  0.1893208  0.15488602]] entropy:[1.788354]
DEBUG:chainerrl.agents.a3c:t:16949 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.09036075 -0.14194843  0.0290638  -0.06147157  0.08305357 -0.11299302]] probs

DEBUG:chainerrl.agents.a3c:t:16975 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.08358449 -0.12050013 -0.00687495 -0.02621078  0.02881754 -0.10657037]] probs:[[0.16132216 0.15547544 0.17418413 0.17084849 0.18051349 0.15765633]] entropy:[1.7902615]
DEBUG:chainerrl.agents.a3c:pi_loss:[-1.3024768] v_loss:[[0.0006418]]
DEBUG:chainerrl.agents.a3c:grad norm:0.8139286671447541
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:16976 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.08717128 -0.12180752 -0.00584703 -0.01798223  0.03059922 -0.10586252]] probs:[[0.16054492 0.15507944 0.1741467  0.17204615 0.18061076 0.157572  ]] entropy:[1.7901614]
DEBUG:chainerrl.agents.a3c:t:16977 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.08880126 -0.12330318 -0.0080985  -0.01668323  0.02867495 -0.1063697 ]] probs:[[0.16045809 0.15501639 0.17394437 0.1724575  0.18045996 0.1576637 ]] entropy:[1.7901659]
DEBUG:chainerrl.agents

DEBUG:chainerrl.agents.a3c:t:17003 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.08306573 -0.0730815  -0.05638429 -0.01971795  0.00720693 -0.14557776]] probs:[[0.16296342 0.16459863 0.16737005 0.17362079 0.178359   0.15308812]] entropy:[1.7905908]
DEBUG:chainerrl.agents.a3c:t:17004 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.10479273 -0.09624454 -0.09223661  0.00078124 -0.0238982  -0.15602243]] probs:[[0.16215642 0.1635485  0.1642053  0.18021226 0.17581914 0.15405838]] entropy:[1.790378]
DEBUG:chainerrl.agents.a3c:t:17005 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.10331763 -0.09469446 -0.08970957 -0.00069169 -0.02187084 -0.15532564]] probs:[[0.16221614 0.16362101 0.16443866 0.17974794 0.17598106 0.15399523]] entropy:[1.7904062]
DEBUG:chainerrl.agents.a3c:t:17006 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.10373969 -0.09513867 -0.0904332  -0.00027031 -0.02245186 -0.15552585]] probs

DEBUG:chainerrl.agents.a3c:pi_loss:[-1.7184361] v_loss:[[0.00097216]]
DEBUG:chainerrl.agents.a3c:grad norm:1.0332741325110002
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:17032 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.1800113  -0.10488272 -0.15640931  0.03675734 -0.02282224 -0.05262105]] probs:[[0.15037507 0.16210774 0.15396643 0.18677436 0.17597143 0.17080505]] entropy:[1.7889035]
DEBUG:chainerrl.agents.a3c:t:17033 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.18001132 -0.10488269 -0.1564093   0.03675744 -0.0228221  -0.05262091]] probs:[[0.15037504 0.16210772 0.15396641 0.18677434 0.17597143 0.17080505]] entropy:[1.7889035]
DEBUG:chainerrl.agents.a3c:t:17034 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.17919058 -0.10038006 -0.15547489  0.03909316 -0.0242203  -0.05087412]] probs:[[0.15027483 0.16259725 0.1538813  0.18693286 0.17546438 0.17084937]] entropy:[1.7889067]
DEBUG:chainerrl.agent

DEBUG:chainerrl.agents.a3c:t:17060 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.10369223 -0.11950892 -0.13546085  0.01908842 -0.0538899  -0.0872017 ]] probs:[[0.16256554 0.16001453 0.15748224 0.18380252 0.17086668 0.16526856]] entropy:[1.7904058]
DEBUG:chainerrl.agents.a3c:t:17061 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.10367978 -0.11951277 -0.13546218  0.01907192 -0.05391109 -0.0872274 ]] probs:[[0.16256914 0.16001545 0.15748355 0.18380126 0.17086472 0.1652659 ]] entropy:[1.790406]
DEBUG:chainerrl.agents.a3c:t:17062 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.10367969 -0.1195128  -0.13546206  0.01907182 -0.05391115 -0.08722743]] probs:[[0.16256915 0.16001545 0.15748356 0.18380125 0.1708647  0.16526592]] entropy:[1.790406]
DEBUG:chainerrl.agents.a3c:t:17063 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.10369223 -0.11950892 -0.13546085  0.01908842 -0.0538899  -0.0872017 ]] probs:

DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:17088 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.06420378 -0.08585772 -0.13985753 -0.01678917 -0.04751107 -0.12026515]] probs:[[0.16901638 0.16539586 0.15670136 0.17722325 0.1718614  0.1598018 ]] entropy:[1.7908831]
DEBUG:chainerrl.agents.a3c:t:17089 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.06420378 -0.08585772 -0.13985753 -0.01678917 -0.04751107 -0.12026515]] probs:[[0.16901638 0.16539586 0.15670136 0.17722325 0.1718614  0.1598018 ]] entropy:[1.7908831]
DEBUG:chainerrl.agents.a3c:t:17090 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.06633036 -0.1008444  -0.14131583 -0.01767037 -0.04503762 -0.10830966]] probs:[[0.16880184 0.1630752  0.15660709 0.17721885 0.17243464 0.16186234]] entropy:[1.7909012]
DEBUG:chainerrl.agents.a3c:t:17091 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.06636567 -0.10081354 -0.14131305 -0.01761

DEBUG:chainerrl.agents.a3c:t:17117 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.08357835  0.02439511 -0.25717962  0.02887383  0.0572807  -0.25091287]] probs:[[0.16472162 0.18350288 0.13847028 0.18432659 0.18963781 0.13934076]] entropy:[1.7835997]
DEBUG:chainerrl.agents.a3c:t:17118 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.0835796   0.02439717 -0.25717965  0.0288772   0.05728466 -0.25090936]] probs:[[0.16472112 0.18350293 0.13847001 0.18432687 0.1896382  0.13934098]] entropy:[1.7835999]
DEBUG:chainerrl.agents.a3c:t:17119 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.08359021  0.02441462 -0.25717926  0.02890914  0.05732227 -0.25087577]] probs:[[0.16471617 0.18350257 0.13846739 0.18432918 0.18964167 0.13934298]] entropy:[1.7835987]
DEBUG:chainerrl.agents.a3c:pi_loss:[-1.6663008] v_loss:[[0.00060416]]
DEBUG:chainerrl.agents.a3c:grad norm:1.362100072661286
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents

DEBUG:chainerrl.agents.a3c:t:17145 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.06920239  0.01534426 -0.2577294  -0.17405897 -0.07052374 -0.25953665]] probs:[[0.17723528 0.1928716  0.14678237 0.15959215 0.17700124 0.14651734]] entropy:[1.7864983]
DEBUG:chainerrl.agents.a3c:t:17146 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.06891206  0.01598352 -0.2573556  -0.17456648 -0.07092175 -0.25894034]] probs:[[0.17725736 0.19296296 0.14681293 0.15948474 0.17690149 0.14658046]] entropy:[1.786497]
DEBUG:chainerrl.agents.a3c:t:17147 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.06694445  0.02059917 -0.2550289  -0.17851186 -0.07390991 -0.25451446]] probs:[[0.1774152  0.19364691 0.14699644 0.15868568 0.17618373 0.14707206]] entropy:[1.7864754]
DEBUG:chainerrl.agents.a3c:t:17148 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.05866823  0.04406666 -0.24344446 -0.19442979 -0.08491039 -0.23433788]] probs

DEBUG:chainerrl.agents.a3c:t:17174 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.10216349 -0.07126796 -0.10567433 -0.19944362 -0.1077293  -0.09066746]] probs:[[0.16831656 0.17359795 0.16772665 0.1527139  0.16738233 0.17026268]] entropy:[1.7909646]
DEBUG:chainerrl.agents.a3c:t:17175 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.10215851 -0.07120251 -0.10566567 -0.19938794 -0.10775004 -0.09064469]] probs:[[0.16831358 0.17360537 0.1677243  0.15271895 0.16737507 0.17026271]] entropy:[1.7909648]
DEBUG:chainerrl.agents.a3c:pi_loss:[-1.5935339] v_loss:[[0.00033644]]
DEBUG:chainerrl.agents.a3c:grad norm:0.3267894332036338
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:17176 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.11933455  0.14442752  0.12037955 -0.79716796 -0.10445878  0.11148112]] probs:[[0.15735556 0.20484836 0.19998094 0.07989191 0.15971385 0.19820932]] entropy:[1.7533056]
DEBUG:chainerrl.agent

DEBUG:chainerrl.agents.a3c:t:17202 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.09369595  0.07903737  0.09794092 -0.66689813 -0.0666603   0.06888412]] probs:[[0.16214173 0.19271341 0.19639103 0.09140212 0.16658513 0.19076665]] entropy:[1.765234]
DEBUG:chainerrl.agents.a3c:t:17203 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.09368913  0.07904183  0.09794045 -0.6668997  -0.06665252  0.06890263]] probs:[[0.16214176 0.19271299 0.19638963 0.09140137 0.16658533 0.1907689 ]] entropy:[1.7652335]
DEBUG:chainerrl.agents.a3c:t:17204 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.09371896  0.07902016  0.09794112 -0.6668894  -0.06668521  0.06883177]] probs:[[0.1621413  0.192714   0.19639505 0.09140477 0.16658437 0.19076054]] entropy:[1.7652358]
DEBUG:chainerrl.agents.a3c:t:17205 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.08174361  0.1194075   0.11096651 -0.629033   -0.09544979  0.05837347]] probs

DEBUG:chainerrl.agents.a3c:t:17231 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.01858321 -0.04157363  0.0165468  -0.5481328   0.01103566 -0.08611666]] probs:[[0.1796411  0.1755582  0.18606406 0.10578529 0.18504146 0.1679099 ]] entropy:[1.7761714]
DEBUG:chainerrl.agents.a3c:pi_loss:[-1.5030913] v_loss:[[9.677851e-05]]
DEBUG:chainerrl.agents.a3c:grad norm:0.20796608702595343
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:17232 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.02303462 -0.05063559  0.0078687  -0.5219755   0.00607541 -0.08562434]] probs:[[0.17920499 0.17432638 0.18482946 0.10880849 0.18449831 0.1683324 ]] entropy:[1.7777789]
DEBUG:chainerrl.agents.a3c:t:17233 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.02316676 -0.05068236  0.00783613 -0.5220998   0.0060102  -0.08567417]] probs:[[0.17919418 0.17433076 0.18483673 0.1088028  0.18449955 0.16833611]] entropy:[1.7777762]
DEBUG:chainerrl.ag

DEBUG:chainerrl.agents.a3c:t:17259 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.01907075 -0.05412871  0.0049933  -0.38992363 -0.00115467 -0.06119939]] probs:[[0.17677605 0.17068602 0.1810816  0.1220012  0.17997174 0.16948342]] entropy:[1.7836581]
DEBUG:chainerrl.agents.a3c:t:17260 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.01038212 -0.02182936  0.00805256 -0.36704233 -0.02653257 -0.05146146]] probs:[[0.1769609  0.17494673 0.18025337 0.12387446 0.17412585 0.16983874]] entropy:[1.7844765]
DEBUG:chainerrl.agents.a3c:t:17261 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-3.9344348e-02 -4.7745101e-02 -9.2127360e-05 -4.0834430e-01
  -2.1055171e-02 -4.4629924e-02]] probs:[[0.17432906 0.1728707  0.18130793 0.12053554 0.17754672 0.17341006]] entropy:[1.7832931]
DEBUG:chainerrl.agents.a3c:t:17262 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.02855522 -0.03274696 -0.00353163 -0.39930472 -0.0237118

DEBUG:chainerrl.agents.a3c:pi_loss:[1.9304672] v_loss:[[0.11126571]]
DEBUG:chainerrl.agents.a3c:grad norm:193.32578741543443
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:17288 r:0.25 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.06029082 -0.22948372  0.03188737 -0.32001698  0.2150961  -0.15378815]] probs:[[0.18591528 0.13914499 0.18070893 0.12710115 0.21704319 0.15008654]] entropy:[1.7747862]
DEBUG:chainerrl.agents.a3c:t:17289 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.06030471 -0.22948757  0.03192874 -0.32001373  0.21508408 -0.15376483]] probs:[[0.18591583 0.13914295 0.18071443 0.12710017 0.2170382  0.1500884 ]] entropy:[1.7747867]
DEBUG:chainerrl.agents.a3c:t:17290 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.06028963 -0.22948147  0.03188312 -0.32001925  0.21509932 -0.1537938 ]] probs:[[0.18591525 0.13914545 0.18070835 0.12710099 0.2170441  0.15008585]] entropy:[1.7747859]
DEBUG:chainerrl.agent

DEBUG:chainerrl.agents.a3c:t:17316 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.00750578 -0.18667303 -0.02260769 -0.20672157  0.17853963 -0.14532492]] probs:[[0.1771223  0.14586209 0.17186804 0.1429669  0.21016107 0.15201963]] entropy:[1.7823298]
DEBUG:chainerrl.agents.a3c:t:17317 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.00749937 -0.18666969 -0.02258478 -0.20673434  0.17850712 -0.14533795]] probs:[[0.17712244 0.14586364 0.17187323 0.1429661  0.21015577 0.15201876]] entropy:[1.7823306]
DEBUG:chainerrl.agents.a3c:t:17318 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.0074932  -0.18667981 -0.02257455 -0.20674     0.17848462 -0.14534952]] probs:[[0.1771228  0.14586335 0.17187639 0.14296646 0.21015276 0.15201825]] entropy:[1.7823315]
DEBUG:chainerrl.agents.a3c:t:17319 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.00750959 -0.18665539 -0.02260325 -0.20672344  0.17854553 -0.14531763]] prob

DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:17344 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.07283401 -0.20657872  0.00502975 -0.19831534  0.02214101 -0.07279201]] probs:[[0.18981725 0.14354506 0.17737347 0.14473614 0.18043467 0.16409339]] entropy:[1.7861167]
DEBUG:chainerrl.agents.a3c:t:17345 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.07252926 -0.20639624  0.00499673 -0.19873992  0.0217866  -0.07325669]] probs:[[0.18980479 0.14360559 0.17741004 0.1447093  0.18041387 0.16405639]] entropy:[1.7861223]
DEBUG:chainerrl.agents.a3c:t:17346 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.07282329 -0.2065733   0.00502818 -0.19832969  0.02212894 -0.0728075 ]] probs:[[0.1898168  0.14354704 0.17737468 0.14473529 0.18043399 0.16409221]] entropy:[1.786117]
DEBUG:chainerrl.agents.a3c:t:17347 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.07281995 -0.20657162  0.00502773 -0.198334

DEBUG:chainerrl.agents.a3c:t:17373 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.00676447 -0.15284956 -0.038181   -0.17727162  0.00775247 -0.06302765]] probs:[[0.17940423 0.15293722 0.17151934 0.14924742 0.17958157 0.16731018]] entropy:[1.7892]
DEBUG:chainerrl.agents.a3c:t:17374 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.00258255 -0.15018804 -0.03864587 -0.18068917  0.00409732 -0.06758294]] probs:[[0.1790756  0.15370534 0.17184272 0.14908794 0.17934707 0.16694134]] entropy:[1.7892792]
DEBUG:chainerrl.agents.a3c:t:17375 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.0021041  -0.14906022 -0.03785695 -0.17879699  0.00565053 -0.06611992]] probs:[[0.17880595 0.15372062 0.17180155 0.14921676 0.1794412  0.16701391]] entropy:[1.7893081]
DEBUG:chainerrl.agents.a3c:pi_loss:[-1.6672269] v_loss:[[0.00078575]]
DEBUG:chainerrl.agents.a3c:grad norm:1.0153688538429637
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a

DEBUG:chainerrl.agents.a3c:t:17401 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.0525884  -0.07340828  0.03556067 -0.05595548  0.01732046  0.00424138]] probs:[[0.16131449 0.15799066 0.17617977 0.16077225 0.17299534 0.17074746]] entropy:[1.7909025]
DEBUG:chainerrl.agents.a3c:t:17402 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.05317676 -0.07110422  0.03397692 -0.06072379  0.01381116  0.0047569 ]] probs:[[0.16142817 0.15855996 0.17612852 0.16021445 0.17261234 0.17105651]] entropy:[1.7909235]
DEBUG:chainerrl.agents.a3c:t:17403 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.05172336 -0.07101775  0.03452307 -0.05967152  0.01580923  0.00382262]] probs:[[0.16155006 0.15846293 0.17610167 0.16027112 0.17283677 0.17077741]] entropy:[1.7909251]
DEBUG:chainerrl.agents.a3c:t:17404 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.05128124 -0.07297825  0.03568886 -0.0565911   0.01845936  0.00220374]] prob

DEBUG:chainerrl.agents.a3c:t:17430 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.03935257 -0.23634182  0.00467354 -0.0933632   0.01858109 -0.15466742]] probs:[[0.17347977 0.1424615  0.18128802 0.16435854 0.18382691 0.15458532]] entropy:[1.7878296]
DEBUG:chainerrl.agents.a3c:t:17431 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.03699497 -0.23623222  0.0099505  -0.09551156  0.0162016  -0.15655707]] probs:[[0.17383659 0.14243396 0.18219201 0.16395617 0.18333448 0.15424676]] entropy:[1.7877481]
DEBUG:chainerrl.agents.a3c:pi_loss:[-1.4386519] v_loss:[[0.00185456]]
DEBUG:chainerrl.agents.a3c:grad norm:4.64542971112575
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:17432 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.03086832 -0.18327223 -0.05219623 -0.12387088 -0.00250706 -0.18222605]] probs:[[0.17739812 0.15232137 0.17365465 0.1616436  0.18250138 0.15248081]] entropy:[1.7892132]
DEBUG:chainerrl.agents.

DEBUG:chainerrl.agents.a3c:t:17458 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.04911065 -0.11928286 -0.04536889 -0.01951394 -0.00942485 -0.08405372]] probs:[[0.18158671 0.15344463 0.16521601 0.16954336 0.17126255 0.1589467 ]] entropy:[1.7902963]
DEBUG:chainerrl.agents.a3c:t:17459 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.05110991 -0.11971334 -0.0456732  -0.01681077 -0.00879443 -0.08425406]] probs:[[0.18180792 0.15325873 0.16503666 0.16986942 0.17123663 0.15879066]] entropy:[1.7902474]
DEBUG:chainerrl.agents.a3c:t:17460 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.03875485 -0.10858306 -0.00611978  0.00905691 -0.01850502 -0.0759231 ]] probs:[[0.1777506  0.15339917 0.16995043 0.17254938 0.16785853 0.1584919 ]] entropy:[1.7905133]
DEBUG:chainerrl.agents.a3c:t:17461 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.04958783 -0.11771576 -0.04710015 -0.01570581 -0.0102028  -0.08462851]] prob

DEBUG:chainerrl.agents.a3c:t:17487 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.09007877 -0.15281537 -0.03847119 -0.01905578 -0.03875146 -0.02762098]] probs:[[0.1876729  0.1472021  0.16503385 0.16826937 0.16498761 0.16683425]] entropy:[1.7892803]
DEBUG:chainerrl.agents.a3c:pi_loss:[-1.5876225] v_loss:[[0.00034274]]
DEBUG:chainerrl.agents.a3c:grad norm:0.9795622634939575
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:17488 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.06928834 -0.13901974 -0.02753635 -0.02274667 -0.02486869 -0.01889507]] probs:[[0.18323484 0.14877878 0.16632503 0.16712359 0.16676933 0.16776851]] entropy:[1.7899612]
DEBUG:chainerrl.agents.a3c:t:17489 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.06867316 -0.13794412 -0.02899556 -0.02253132 -0.02439103 -0.01836993]] probs:[[0.18312055 0.1489376  0.16608106 0.16715811 0.16684754 0.16785517]] entropy:[1.7899883]
DEBUG:chainerrl.agent

DEBUG:chainerrl.agents.a3c:t:17515 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.00214821 -0.12571047 -0.0940651  -0.14044595  0.0023617   0.07498184]] probs:[[0.174486   0.15354379 0.15848044 0.15129784 0.17452325 0.1876687 ]] entropy:[1.7886587]
DEBUG:chainerrl.agents.a3c:t:17516 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.00227499 -0.12475818 -0.093738   -0.14013577  0.00208107  0.07534051]] probs:[[0.1744583  0.1536462  0.15848702 0.15130156 0.17442447 0.1876824 ]] entropy:[1.7886721]
DEBUG:chainerrl.agents.a3c:t:17517 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.00232955 -0.1225272  -0.09300645 -0.13940671  0.00154001  0.0762176 ]] probs:[[0.1743546  0.15388943 0.15850009 0.15131366 0.174217   0.1877252 ]] entropy:[1.7887027]
DEBUG:chainerrl.agents.a3c:t:17518 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.0002781  -0.10512849 -0.08990023 -0.1319263  -0.00624122  0.08757295]] prob

DEBUG:chainerrl.agents.a3c:pi_loss:[-1.0441934] v_loss:[[0.00177633]]
DEBUG:chainerrl.agents.a3c:grad norm:4.557163832392801
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:17544 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.24184656 -0.03049498  0.09838878 -0.02922483 -0.22911151  0.06011409]] probs:[[0.13806689 0.17056045 0.1940224  0.17077723 0.13983642 0.18673657]] entropy:[1.7834766]
DEBUG:chainerrl.agents.a3c:t:17545 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.2415931  -0.03276493  0.09966844 -0.02905152 -0.23178333  0.06049997]] probs:[[0.13815366 0.17023753 0.19434367 0.17087086 0.13951558 0.18687867]] entropy:[1.7833756]
DEBUG:chainerrl.agents.a3c:t:17546 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.24303356 -0.02898401  0.09610304 -0.03067638 -0.22668496  0.05930566]] probs:[[0.13795935 0.17088804 0.19365838 0.17059909 0.14023334 0.18666178]] entropy:[1.7835851]
DEBUG:chainerrl.agents

DEBUG:chainerrl.agents.a3c:t:17572 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.04075331 -0.30463517  0.32282078 -0.18607979 -0.2886146   0.03587046]] probs:[[0.18033047 0.1276641  0.23909444 0.14373308 0.12972581 0.17945209]] entropy:[1.7658244]
DEBUG:chainerrl.agents.a3c:t:17573 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.04492873 -0.32782614  0.32270968 -0.2340721  -0.24354899  0.01113839]] probs:[[0.18242356 0.12565957 0.24083504 0.13801059 0.13670886 0.1763624 ]] entropy:[1.7652668]
DEBUG:chainerrl.agents.a3c:t:17574 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.03404013 -0.29251108  0.32746187 -0.23837602 -0.24035586  0.00249067]] probs:[[0.18008521 0.1299148  0.24149576 0.13714162 0.13687035 0.17449231]] entropy:[1.7663105]
DEBUG:chainerrl.agents.a3c:t:17575 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.02721873 -0.3496142   0.33345315 -0.22624922 -0.24946839 -0.00527069]] prob

DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:17600 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.0026014  -0.26940656  0.07099679 -0.13538775 -0.10524318  0.02535035]] probs:[[0.17703362 0.1355763  0.19055443 0.15501986 0.15976402 0.18205182]] entropy:[1.7854581]
DEBUG:chainerrl.agents.a3c:t:17601 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.00296603 -0.2684831   0.06995209 -0.13613622 -0.10601535  0.02432529]] probs:[[0.17706893 0.13577811 0.19046287 0.1549913  0.15973079 0.18196793]] entropy:[1.7855135]
DEBUG:chainerrl.agents.a3c:t:17602 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.00297033 -0.26872054  0.07023184 -0.13599063 -0.10584585  0.02463109]] probs:[[0.17704596 0.13572884 0.19049224 0.1549944  0.15973781 0.18200074]] entropy:[1.7854984]
DEBUG:chainerrl.agents.a3c:t:17603 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.00308733 -0.26807672  0.06950685 -0.13643

DEBUG:chainerrl.agents.a3c:t:17629 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.04870416 -0.21082929  0.03279173 -0.09674166 -0.13230202  0.03902033]] probs:[[0.1694964  0.1441287  0.18388814 0.16154669 0.15590298 0.18503708]] entropy:[1.7878654]
DEBUG:chainerrl.agents.a3c:t:17630 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.04872124 -0.21081512  0.03280013 -0.09673814 -0.13230753  0.03904245]] probs:[[0.16949274 0.14413008 0.18388884 0.16154653 0.15590142 0.18504032]] entropy:[1.7878652]
DEBUG:chainerrl.agents.a3c:t:17631 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.04872148 -0.21081503  0.03280014 -0.09673825 -0.13230723  0.03904242]] probs:[[0.1694927  0.1441301  0.18388885 0.16154653 0.15590146 0.18504032]] entropy:[1.7878652]
DEBUG:chainerrl.agents.a3c:pi_loss:[-1.5831636] v_loss:[[0.00026847]]
DEBUG:chainerrl.agents.a3c:grad norm:0.4956479641257652
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agent

DEBUG:chainerrl.agents.a3c:t:17657 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.10228476 -0.09273025 -0.02285909  0.00142571 -0.04789477  0.00413818]] probs:[[0.15699245 0.15849963 0.16997024 0.17414847 0.16576776 0.1746215 ]] entropy:[1.7908825]
DEBUG:chainerrl.agents.a3c:t:17658 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.10360764 -0.09518728 -0.02052615  0.00093782 -0.0543037   0.00289948]] probs:[[0.15702981 0.15835765 0.17063336 0.17433542 0.16496603 0.17467774]] entropy:[1.7908459]
DEBUG:chainerrl.agents.a3c:t:17659 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-1.05640680e-01 -9.87266153e-02 -1.33941295e-02  2.39465386e-03
  -6.27848729e-02 -1.01648155e-04]] probs:[[0.15691796 0.15800665 0.17208174 0.17482027 0.16378897 0.1743844 ]] entropy:[1.7907554]
DEBUG:chainerrl.agents.a3c:t:17660 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.10318661 -0.0930557  -0.02100555  0.00267912 -0.0

DEBUG:chainerrl.agents.a3c:t:17686 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.05503784 -0.09314348 -0.10246953  0.05986149 -0.2353646   0.12913755]] probs:[[0.16460171 0.15844746 0.15697664 0.1846437  0.137442   0.19788857]] entropy:[1.784833]
DEBUG:chainerrl.agents.a3c:t:17687 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.05258892 -0.09067727 -0.09812059  0.06535994 -0.20705064  0.13932171]] probs:[[0.16361946 0.15750466 0.15633665 0.18410242 0.14020164 0.19823517]] entropy:[1.785227]
DEBUG:chainerrl.agents.a3c:pi_loss:[-2.1406937] v_loss:[[0.00554124]]
DEBUG:chainerrl.agents.a3c:grad norm:34.63307408425476
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:17688 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.02272194 -0.0617667  -0.10805202 -0.01642266 -0.19649947  0.16259237]] probs:[[0.17509215 0.16090654 0.15362868 0.16837065 0.1406242  0.20137781]] entropy:[1.7853802]
DEBUG:chainerrl.agents.a

DEBUG:chainerrl.agents.a3c:t:17714 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.01890334 -0.18858097 -0.0182331  -0.04406145 -0.18854825  0.10725731]] probs:[[0.17793043 0.14459088 0.17144391 0.1670725  0.14459562 0.19436671]] entropy:[1.7860732]
DEBUG:chainerrl.agents.a3c:t:17715 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.01741916 -0.19480424 -0.02419685 -0.04659564 -0.18787633  0.10397431]] probs:[[0.17822671 0.1441469  0.17096183 0.16717507 0.145149   0.19434044]] entropy:[1.786084]
DEBUG:chainerrl.agents.a3c:t:17716 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.01847618 -0.1978045  -0.02284045 -0.0495779  -0.1829011   0.09254354]] probs:[[0.17877167 0.14400221 0.17153594 0.16701028 0.14616442 0.19251552]] entropy:[1.7864189]
DEBUG:chainerrl.agents.a3c:t:17717 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.02065923 -0.19958542 -0.02285057 -0.05196821 -0.18045521  0.08948524]] probs

DEBUG:chainerrl.agents.a3c:t:17743 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.16240565 -0.02853988 -0.1305271  -0.07714935 -0.06279528 -0.03891268]] probs:[[0.15382698 0.17586106 0.15880978 0.16751699 0.16993889 0.17404632]] entropy:[1.7906193]
DEBUG:chainerrl.agents.a3c:pi_loss:[-1.5918493] v_loss:[[0.00045891]]
DEBUG:chainerrl.agents.a3c:grad norm:2.8078244374186805
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:17744 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.10430211 -0.08444045 -0.15616134 -0.0746815  -0.08413634 -0.0015396 ]] probs:[[0.16318046 0.16645388 0.15493372 0.16808626 0.16650452 0.18084118]] entropy:[1.7907071]
DEBUG:chainerrl.agents.a3c:t:17745 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.10957336 -0.03122512 -0.16084686 -0.06248161 -0.03964092 -0.01337503]] probs:[[0.15991943 0.17295274 0.15192646 0.16763045 0.1715033  0.17606768]] entropy:[1.7904993]
DEBUG:chainerrl.agent

DEBUG:chainerrl.agents.a3c:t:17771 r:0.3 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.00459912 -0.21223924 -0.06895656 -0.05509169 -0.07756331 -0.00812745]] probs:[[0.1790614  0.14415517 0.16636315 0.16868582 0.16493745 0.17679699]] entropy:[1.7893925]
DEBUG:chainerrl.agents.a3c:t:17772 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.01325259 -0.24154353 -0.03986317 -0.05163473 -0.07200057  0.01072638]] probs:[[0.1754635  0.1396501  0.17085588 0.16885643 0.16545232 0.17972177]] entropy:[1.7886432]
DEBUG:chainerrl.agents.a3c:t:17773 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.0156189  -0.24796954 -0.03386036 -0.05660981 -0.06498746  0.00537383]] probs:[[0.17520884 0.13888246 0.17204174 0.16817206 0.16676907 0.17892583]] entropy:[1.7885549]
DEBUG:chainerrl.agents.a3c:t:17774 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.01946524 -0.25038838 -0.03122681 -0.05119685 -0.06727911  0.01573956]] prob

DEBUG:chainerrl.agents.a3c:pi_loss:[-1.6939999] v_loss:[[0.00079478]]
DEBUG:chainerrl.agents.a3c:grad norm:3.0252304586743426
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:17800 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.0718922  -0.11467743 -0.14999716  0.09565789  0.01855711 -0.0652076 ]] probs:[[0.16215965 0.15536796 0.14997618 0.19173831 0.17751068 0.16324726]] entropy:[1.7882626]
DEBUG:chainerrl.agents.a3c:t:17801 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.08031636 -0.12728678 -0.12995884  0.0897696   0.0205318  -0.03350212]] probs:[[0.16012481 0.1527776  0.1523699  0.18981303 0.17711544 0.16779917]] entropy:[1.78854]
DEBUG:chainerrl.agents.a3c:t:17802 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.07567427 -0.12639658 -0.13101202  0.08940271  0.0218369  -0.03462231]] probs:[[0.16075821 0.15280753 0.15210387 0.18961173 0.17722368 0.167495  ]] entropy:[1.7885638]
DEBUG:chainerrl.agents.

INFO: outdir:result global_step:36312 local_step:17821 R:2.9
INFO: statistics:[('average_value', 0.41592210516538614), ('average_entropy', 1.785580644451141)]


DEBUG:chainerrl.agents.a3c:t:17822 r:0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.4500447  -0.20797743 -0.16496083  0.3662175  -0.03623225  0.16203639]] probs:[[0.10842904 0.13812558 0.14419691 0.24526963 0.16400689 0.19997194]] entropy:[1.756644]
DEBUG:chainerrl.agents.a3c:t:17823 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.5545074  -0.23423973 -0.2547788   0.356045   -0.03914033  0.18524823]] probs:[[0.10017671 0.13799305 0.1351877  0.24900876 0.16772114 0.2099126 ]] entropy:[1.7476478]
DEBUG:chainerrl.agents.a3c:t:17824 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.56616455 -0.23391208 -0.26681018  0.35303116 -0.0365835   0.19145478]] probs:[[0.09918885 0.13827965 0.13380453 0.24869354 0.16844456 0.21158884]] entropy:[1.7466241]
DEBUG:chainerrl.agents.a3c:t:17825 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.5675357  -0.2332162  -0.26845834  0.35230863 -0.03586774  0.1928676 ]] probs:[

DEBUG:chainerrl.agents.a3c:t:17851 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.40338603 -0.21813057 -0.20535679  0.15561907 -0.04010708  0.2237887 ]] probs:[[0.11789942 0.14189501 0.14371918 0.20619804 0.16954373 0.22074467]] entropy:[1.7678733]
DEBUG:chainerrl.agents.a3c:t:17852 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.40338728 -0.2181294  -0.20535584  0.15562057 -0.04010591  0.22379048]] probs:[[0.11789914 0.14189501 0.14371914 0.20619811 0.16954373 0.2207448 ]] entropy:[1.7678729]
DEBUG:chainerrl.agents.a3c:t:17853 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.40338862 -0.21812959 -0.20535652  0.15562184 -0.04010452  0.22378969]] probs:[[0.11789899 0.141895   0.14371905 0.20619838 0.16954398 0.22074464]] entropy:[1.7678729]
DEBUG:chainerrl.agents.a3c:pi_loss:[-1.4234543] v_loss:[[1.6356711e-06]]
DEBUG:chainerrl.agents.a3c:grad norm:0.8953340860937224
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.ag

DEBUG:chainerrl.agents.a3c:t:17879 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.27352256 -0.1067896  -0.15745127  0.05040754 -0.02825627  0.03077858]] probs:[[0.13659887 0.1613832  0.15341091 0.18885486 0.1745681  0.18518399]] entropy:[1.7856488]
DEBUG:chainerrl.agents.a3c:t:17880 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.2723401  -0.10726239 -0.15740608  0.04975962 -0.02811886  0.03113285]] probs:[[0.13675237 0.16129732 0.1534087  0.18872131 0.17458172 0.18523858]] entropy:[1.7856867]
DEBUG:chainerrl.agents.a3c:t:17881 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.27407634 -0.10664802 -0.15776736  0.0505984  -0.02795788  0.03059611]] probs:[[0.13652967 0.16141364 0.15336964 0.18889979 0.1746284  0.1851589 ]] entropy:[1.785627]
DEBUG:chainerrl.agents.a3c:t:17882 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.27469018 -0.10645418 -0.15788749  0.0508981  -0.02788395  0.03040477]] probs

DEBUG:chainerrl.agents.a3c:t:17908 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.2135201  -0.0369759  -0.09796785  0.01703118  0.00578896  0.00246258]] probs:[[0.14161956 0.16896449 0.15896699 0.17834067 0.17634696 0.17576133]] entropy:[1.7886597]
DEBUG:chainerrl.agents.a3c:t:17909 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.21448432 -0.03761104 -0.09654343  0.01924512  0.00513654  0.00284853]] probs:[[0.14143628 0.16880134 0.15914093 0.17867681 0.17617364 0.17577101]] entropy:[1.7886261]
DEBUG:chainerrl.agents.a3c:pi_loss:[-1.4764324] v_loss:[[2.3007437e-05]]
DEBUG:chainerrl.agents.a3c:grad norm:0.11079637796041483
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:17910 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.19901793 -0.04096561 -0.08851676  0.01040614  0.00020997 -0.00363242]] probs:[[0.14373448 0.16834578 0.1605281  0.17721999 0.1754222  0.17474946]] entropy:[1.7892349]
DEBUG:chainerrl.a

DEBUG:chainerrl.agents.a3c:t:17936 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.2056427  -0.05231611 -0.10372344 -0.02014031  0.00773509 -0.05794242]] probs:[[0.14547908 0.16958578 0.16108812 0.17513107 0.1800816  0.16863431]] entropy:[1.789483]
DEBUG:chainerrl.agents.a3c:t:17937 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.20638742 -0.05094611 -0.10435097 -0.02068769  0.00726992 -0.05682181]] probs:[[0.14536604 0.16981274 0.16098182 0.17502953 0.17999199 0.1688179 ]] entropy:[1.7894695]
DEBUG:chainerrl.agents.a3c:t:17938 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.20644742 -0.050806   -0.10443549 -0.0207943   0.00719786 -0.05671043]] probs:[[0.14535898 0.16983847 0.16097005 0.17501287 0.17998107 0.16883862]] entropy:[1.789469]
DEBUG:chainerrl.agents.a3c:t:17939 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.20901358 -0.04446533 -0.09159169 -0.01335733  0.00313464 -0.05204058]] probs:

DEBUG:chainerrl.agents.a3c:t:17965 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.20261914  0.07380605 -0.01884454 -0.03484572 -0.0743731  -0.03845784]] probs:[[0.14248954 0.1878594  0.17123608 0.1685179  0.16198677 0.1679103 ]] entropy:[1.7884773]
DEBUG:chainerrl.agents.a3c:pi_loss:[-1.4793814] v_loss:[[2.6011217e-05]]
DEBUG:chainerrl.agents.a3c:grad norm:0.1773908268993054
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:17966 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.18032302  0.06329899 -0.01230507 -0.03659652 -0.08868775 -0.04162452]] probs:[[0.14581232 0.1860364  0.17248982 0.16835028 0.15980521 0.16750593]] entropy:[1.789058]
DEBUG:chainerrl.agents.a3c:t:17967 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.18045916  0.06344181 -0.01235256 -0.03668618 -0.08873928 -0.04151467]] probs:[[0.14579342 0.18606417 0.17248274 0.16833629 0.159798   0.16752543]] entropy:[1.7890525]
DEBUG:chainerrl.age

DEBUG:chainerrl.agents.a3c:t:17993 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.13255008  0.01903958 -0.02924698 -0.04796832 -0.06811486 -0.04614427]] probs:[[0.15343125 0.17854528 0.17012879 0.16697337 0.1636431  0.16727822]] entropy:[1.7907412]
DEBUG:chainerrl.agents.a3c:t:17994 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.13603085  0.01864377 -0.02099746 -0.04850691 -0.06643808 -0.03960832]] probs:[[0.15257967 0.1781029  0.17118078 0.16653588 0.16357632 0.16802442]] entropy:[1.7906679]
DEBUG:chainerrl.agents.a3c:t:17995 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.13649856  0.01904015 -0.02111863 -0.04858067 -0.06671134 -0.03955729]] probs:[[0.15251899 0.17818597 0.17117202 0.16653524 0.16354306 0.16804476]] entropy:[1.7906566]
DEBUG:chainerrl.agents.a3c:t:17996 r:0.1 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.13667999  0.01925602 -0.02118725 -0.04867127 -0.06676185 -0.03937874]] prob

DEBUG:chainerrl.agents.a3c:pi_loss:[-1.5001358] v_loss:[[8.684578e-05]]
DEBUG:chainerrl.agents.a3c:grad norm:0.25919347713105445
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:18022 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.17896828  0.03386717 -0.08731195 -0.07207683 -0.01787182  0.02430519]] probs:[[0.14606667 0.18071091 0.16008733 0.16254497 0.17159887 0.1789912 ]] entropy:[1.7891545]
DEBUG:chainerrl.agents.a3c:t:18023 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.17896828  0.0338672  -0.08731192 -0.07207689 -0.01787182  0.02430522]] probs:[[0.14606668 0.18071093 0.16008735 0.16254495 0.17159888 0.17899121]] entropy:[1.7891545]
DEBUG:chainerrl.agents.a3c:t:18024 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.17896816  0.0338672  -0.08731212 -0.07207687 -0.01787182  0.02430503]] probs:[[0.1460667  0.18071093 0.16008732 0.16254497 0.17159888 0.17899118]] entropy:[1.7891545]
DEBUG:chainerrl.ag

DEBUG:chainerrl.agents.a3c:t:18050 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.16091707  0.00985052 -0.02402593 -0.08882086 -0.06503737  0.02681237]] probs:[[0.14892453 0.17665651 0.17077225 0.16005795 0.16391031 0.17967848]] entropy:[1.7897851]
DEBUG:chainerrl.agents.a3c:t:18051 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.16091913  0.00984994 -0.02402667 -0.08882709 -0.06504121  0.02681365]] probs:[[0.1489245  0.17665675 0.17077245 0.16005723 0.16390999 0.17967907]] entropy:[1.7897849]
DEBUG:chainerrl.agents.a3c:t:18052 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.16091913  0.00984994 -0.02402667 -0.08882709 -0.06504121  0.02681365]] probs:[[0.1489245  0.17665675 0.17077245 0.16005723 0.16390999 0.17967907]] entropy:[1.7897849]
DEBUG:chainerrl.agents.a3c:t:18053 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.16091913  0.00984994 -0.02402667 -0.08882709 -0.06504121  0.02681365]] prob

DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:18078 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.10212556 -0.04602072 -0.02657519 -0.07086122 -0.04554797 -0.00760503]] probs:[[0.15809566 0.16721912 0.1705026  0.16311648 0.1672982  0.17376794]] entropy:[1.7913021]
DEBUG:chainerrl.agents.a3c:t:18079 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.10212556 -0.04602072 -0.02657519 -0.07086122 -0.04554797 -0.00760503]] probs:[[0.15809566 0.16721912 0.1705026  0.16311648 0.1672982  0.17376794]] entropy:[1.7913021]
DEBUG:chainerrl.agents.a3c:t:18080 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.10212561 -0.04602066 -0.02657519 -0.07086116 -0.045548   -0.00760506]] probs:[[0.15809564 0.16721913 0.1705026  0.16311648 0.16729818 0.17376794]] entropy:[1.7913021]
DEBUG:chainerrl.agents.a3c:t:18081 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.10341332 -0.04606793 -0.02688374 -0.07041

DEBUG:chainerrl.agents.a3c:t:18107 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.05835308  0.2363961  -0.1070749  -0.13566358 -0.1360622  -0.11651237]] probs:[[0.16421518 0.22050641 0.15640609 0.15199795 0.15193738 0.15493695]] entropy:[1.7817731]
DEBUG:chainerrl.agents.a3c:t:18108 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.058353    0.2363961  -0.10707487 -0.13566358 -0.13606219 -0.11651245]] probs:[[0.16421519 0.22050641 0.15640609 0.15199795 0.15193738 0.15493694]] entropy:[1.7817731]
DEBUG:chainerrl.agents.a3c:t:18109 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.05835309  0.23639604 -0.10707487 -0.13566355 -0.13606232 -0.11651251]] probs:[[0.16421519 0.22050643 0.15640612 0.15199798 0.15193738 0.15493695]] entropy:[1.7817731]
DEBUG:chainerrl.agents.a3c:pi_loss:[-1.570916] v_loss:[[0.00025129]]
DEBUG:chainerrl.agents.a3c:grad norm:0.7454890767379694
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents

DEBUG:chainerrl.agents.a3c:t:18135 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.0722834   0.08010976 -0.0424354  -0.0666595  -0.12678464 -0.09117185]] probs:[[0.16316403 0.19002378 0.16810757 0.16408424 0.15450938 0.16011104]] entropy:[1.7895458]
DEBUG:chainerrl.agents.a3c:t:18136 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.07219999  0.0803628  -0.04246693 -0.0666287  -0.12656514 -0.09111275]] probs:[[0.16316053 0.19005193 0.16808464 0.16407208 0.15452708 0.16010371]] entropy:[1.7895429]
DEBUG:chainerrl.agents.a3c:t:18137 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.07217267  0.08051156 -0.04250867 -0.06662123 -0.12642175 -0.09105765]] probs:[[0.16315556 0.19006921 0.16806789 0.16406381 0.15454032 0.16010328]] entropy:[1.7895416]
DEBUG:chainerrl.agents.a3c:t:18138 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.07245874  0.07946382 -0.04231581 -0.06671965 -0.12736739 -0.0913575 ]] prob

DEBUG:chainerrl.agents.a3c:t:18164 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.0803199   0.01466812 -0.04212607 -0.05063663 -0.09496322 -0.06101758]] probs:[[0.16197848 0.17811893 0.16828471 0.1668586  0.15962386 0.16513541]] entropy:[1.7911422]
DEBUG:chainerrl.agents.a3c:t:18165 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.0803199   0.01466812 -0.04212607 -0.05063663 -0.09496322 -0.06101758]] probs:[[0.16197848 0.17811893 0.16828471 0.1668586  0.15962386 0.16513541]] entropy:[1.7911422]
DEBUG:chainerrl.agents.a3c:pi_loss:[-1.8244054] v_loss:[[0.00160542]]
DEBUG:chainerrl.agents.a3c:grad norm:2.143833463017396
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:18166 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.08898091 -0.0049265  -0.03855123 -0.04932929 -0.09212287 -0.03991342]] probs:[[0.16059063 0.1746725  0.16889684 0.16708623 0.16008684 0.16866693]] entropy:[1.7912989]
DEBUG:chainerrl.agents

DEBUG:chainerrl.agents.a3c:t:18192 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.09865452 -0.01512452 -0.04802922 -0.04195739 -0.05654232 -0.05282731]] probs:[[0.15905112 0.17290731 0.16731043 0.1683294  0.16589215 0.16650958]] entropy:[1.791456]
DEBUG:chainerrl.agents.a3c:t:18193 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.09850729 -0.01429437 -0.04822422 -0.04181418 -0.05593035 -0.05273364]] probs:[[0.1590307  0.17300323 0.16723172 0.16830713 0.16594796 0.1664793 ]] entropy:[1.7914523]
DEBUG:chainerrl.agents.a3c:t:18194 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.09856105 -0.01419039 -0.0481304  -0.04173457 -0.05583943 -0.05286666]] probs:[[0.15901715 0.17301576 0.16724214 0.16831522 0.16595781 0.1664519 ]] entropy:[1.791451]
DEBUG:chainerrl.agents.a3c:t:18195 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.09855357 -0.01418792 -0.04814258 -0.0417401  -0.05583896 -0.05285214]] probs:

DEBUG:chainerrl.agents.a3c:t:18221 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.09649238 -0.02720821 -0.04605689 -0.05882766 -0.05955533 -0.05927404]] probs:[[0.1603232  0.1718249  0.16861656 0.16647689 0.1663558  0.1664026 ]] entropy:[1.7915462]
DEBUG:chainerrl.agents.a3c:pi_loss:[-1.4772522] v_loss:[[2.791469e-05]]
DEBUG:chainerrl.agents.a3c:grad norm:0.06060711077524589
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:18222 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.09496871 -0.02632463 -0.05444547 -0.05218688 -0.06007663 -0.05953078]] probs:[[0.16057289 0.17198238 0.16721344 0.16759154 0.16627449 0.16636528]] entropy:[1.7915585]
DEBUG:chainerrl.agents.a3c:t:18223 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.09496884 -0.02632495 -0.05444558 -0.05218668 -0.06007669 -0.05953116]] probs:[[0.16057289 0.17198235 0.16721345 0.1675916  0.1662745  0.16636524]] entropy:[1.7915587]
DEBUG:chainerrl.ag

DEBUG:chainerrl.agents.a3c:t:18249 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.09291464 -0.03690911 -0.06221196 -0.0548518  -0.04966307 -0.04929453]] probs:[[0.16086572 0.17013215 0.16588132 0.16710675 0.16797607 0.168038  ]] entropy:[1.7916083]
DEBUG:chainerrl.agents.a3c:t:18250 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.09291466 -0.03690905 -0.0622119  -0.05485184 -0.04966311 -0.0492944 ]] probs:[[0.16086571 0.17013215 0.16588132 0.16710673 0.16797605 0.168038  ]] entropy:[1.7916083]
DEBUG:chainerrl.agents.a3c:t:18251 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.09291466 -0.03690905 -0.0622119  -0.05485184 -0.04966311 -0.0492944 ]] probs:[[0.16086571 0.17013215 0.16588132 0.16710673 0.16797605 0.168038  ]] entropy:[1.7916083]
DEBUG:chainerrl.agents.a3c:t:18252 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.09291464 -0.03690911 -0.06221196 -0.0548518  -0.04966307 -0.04929453]] prob

INFO: outdir:result global_step:37219 local_step:18265 R:0.5
INFO: statistics:[('average_value', 0.33027951732202737), ('average_entropy', 1.7860681928122835)]


DEBUG:chainerrl.agents.a3c:t:18266 r:0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.05034078 -0.16101749 -0.00079162 -0.00668753 -0.01242733 -0.04979932]] probs:[[0.18024266 0.14590354 0.17125808 0.17025132 0.16927692 0.16306746]] entropy:[1.78971]
DEBUG:chainerrl.agents.a3c:t:18267 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.01340193 -0.18052061 -0.07272235 -0.05917706 -0.01180355 -0.08526405]] probs:[[0.18010394 0.14835553 0.16524181 0.16749528 0.17562108 0.16318233]] entropy:[1.7899058]
DEBUG:chainerrl.agents.a3c:t:18268 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.00675327 -0.18279013 -0.08483026 -0.06622741 -0.01340255 -0.09301544]] probs:[[0.18003313 0.14894804 0.16427755 0.16736218 0.17644075 0.1629384 ]] entropy:[1.7899185]
DEBUG:chainerrl.agents.a3c:t:18269 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.00567757 -0.18296021 -0.08649786 -0.06714002 -0.01366906 -0.09411149]] probs:[[

DEBUG:chainerrl.agents.a3c:t:18295 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.01177127 -0.14453365 -0.09923435 -0.07328498 -0.03428885 -0.09239743]] probs:[[0.17753984 0.15546687 0.16267136 0.16694781 0.17358674 0.16378734]] entropy:[1.790812]
DEBUG:chainerrl.agents.a3c:t:18296 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.01177129 -0.14453368 -0.09923435 -0.07328498 -0.03428882 -0.09239738]] probs:[[0.17753984 0.15546685 0.16267136 0.16694781 0.17358674 0.16378735]] entropy:[1.790812]
DEBUG:chainerrl.agents.a3c:t:18297 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.01177129 -0.14453365 -0.09923441 -0.07328494 -0.03428875 -0.09239742]] probs:[[0.17753984 0.15546687 0.16267134 0.16694783 0.17358676 0.16378734]] entropy:[1.790812]
DEBUG:chainerrl.agents.a3c:pi_loss:[-1.4418988] v_loss:[[1.2557251e-06]]
DEBUG:chainerrl.agents.a3c:grad norm:0.04120939830275738
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agen

DEBUG:chainerrl.agents.a3c:t:18323 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.02363097 -0.1340146  -0.09305088 -0.07004178 -0.05158984 -0.10322649]] probs:[[0.17608754 0.15768473 0.16427822 0.16810192 0.17123254 0.16261506]] entropy:[1.7911166]
DEBUG:chainerrl.agents.a3c:t:18324 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.01040734 -0.13894124 -0.09352497 -0.07192807 -0.04283087 -0.09996681]] probs:[[0.17786029 0.15640746 0.16367468 0.167248   0.17218593 0.16262369]] entropy:[1.7909014]
DEBUG:chainerrl.agents.a3c:t:18325 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.02837345 -0.13279468 -0.092916   -0.07023071 -0.05405379 -0.10489946]] probs:[[0.1754902  0.15808961 0.16452141 0.16829628 0.17104092 0.16256164]] entropy:[1.7911757]
DEBUG:chainerrl.agents.a3c:t:18326 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.0326329  -0.13197376 -0.09275639 -0.07041382 -0.05641175 -0.10637175]] prob

DEBUG:chainerrl.agents.a3c:t:18352 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.06118331 -0.10314853 -0.06970392 -0.04096638 -0.00476813 -0.06431862]] probs:[[0.1659545  0.1591343  0.16454649 0.16934374 0.17558599 0.165435  ]] entropy:[1.7913111]
DEBUG:chainerrl.agents.a3c:t:18353 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.06117759 -0.10315645 -0.06971835 -0.04096752 -0.0047483  -0.06433616]] probs:[[0.16595584 0.15913339 0.16454448 0.16934393 0.17558987 0.16543248]] entropy:[1.7913108]
DEBUG:chainerrl.agents.a3c:pi_loss:[-1.531405] v_loss:[[9.5399504e-05]]
DEBUG:chainerrl.agents.a3c:grad norm:0.12698468233883048
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:18354 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.09615435 -0.11023164 -0.08857624 -0.05186226  0.01860035 -0.08106823]] probs:[[0.16192457 0.15966108 0.16315632 0.16925776 0.18161432 0.16438591]] entropy:[1.790818]
DEBUG:chainerrl.age

DEBUG:chainerrl.agents.a3c:t:18380 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.05047495 -0.03189226 -0.11037246 -0.08329139 -0.03729684 -0.11074683]] probs:[[0.16997819 0.17316638 0.16009584 0.16449064 0.17223302 0.16003591]] entropy:[1.7912304]
DEBUG:chainerrl.agents.a3c:t:18381 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.05049765 -0.03183987 -0.11044654 -0.08368254 -0.03742434 -0.11072887]] probs:[[0.16998965 0.17319106 0.1600984  0.16444115 0.17222658 0.16005321]] entropy:[1.7912298]
DEBUG:chainerrl.agents.a3c:t:18382 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.05049929 -0.03185347 -0.1104349  -0.08360284 -0.03739773 -0.11073801]] probs:[[0.16998672 0.173186   0.1600978  0.16445169 0.17222847 0.16004927]] entropy:[1.79123]
DEBUG:chainerrl.agents.a3c:t:18383 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.05046758 -0.0318974  -0.1103699  -0.08325501 -0.0372814  -0.11075111]] probs:

DEBUG:chainerrl.agents.a3c:t:18409 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.0368411   0.01683151 -0.02561695 -0.08633531 -0.06446129 -0.07044855]] probs:[[0.16784589 0.17710076 0.16974044 0.15974072 0.1632734  0.16229875]] entropy:[1.7911686]
DEBUG:chainerrl.agents.a3c:pi_loss:[-0.98679984] v_loss:[[0.0026899]]
DEBUG:chainerrl.agents.a3c:grad norm:24.15094784728789
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:18410 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.05341006  0.01395781 -0.0034462  -0.10084091 -0.11126529  0.04955364]] probs:[[0.16321637 0.17459075 0.17157847 0.15565561 0.15404142 0.18091738]] entropy:[1.790011]
DEBUG:chainerrl.agents.a3c:t:18411 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.05319994  0.01408223 -0.00324577 -0.10063779 -0.11133249  0.04971325]] probs:[[0.16322774 0.17458794 0.17158873 0.15566535 0.15400943 0.18092084]] entropy:[1.790009]
DEBUG:chainerrl.agents.a

DEBUG:chainerrl.agents.a3c:t:18437 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.01856621 -0.02949061 -0.06934891 -0.07371384 -0.05388928 -0.00657857]] probs:[[0.1758692  0.16761735 0.1610678  0.16036628 0.1635772  0.17150213]] entropy:[1.7911932]
DEBUG:chainerrl.agents.a3c:t:18438 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.01856685 -0.02949179 -0.0693508  -0.07371263 -0.05388831 -0.00658143]] probs:[[0.17586942 0.16761725 0.16106759 0.16036656 0.16357745 0.17150176]] entropy:[1.7911934]
DEBUG:chainerrl.agents.a3c:t:18439 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.018568   -0.02949485 -0.06935551 -0.07370935 -0.05388637 -0.00658856]] probs:[[0.17586987 0.16761698 0.16106707 0.16036734 0.163578   0.17150079]] entropy:[1.7911932]
DEBUG:chainerrl.agents.a3c:t:18440 r:0.15 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.01856797 -0.02949509 -0.06935574 -0.07370913 -0.05388622 -0.00658906]] pro

DEBUG:chainerrl.agents.a3c:pi_loss:[-1.608866] v_loss:[[0.00037465]]
DEBUG:chainerrl.agents.a3c:grad norm:0.7705943766647904
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:18466 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.07010044 -0.08912788 -0.04536675 -0.0959738   0.31267542 -0.23617832]] probs:[[0.15888193 0.15588738 0.16286066 0.15482384 0.2329764  0.13456985]] entropy:[1.7757093]
DEBUG:chainerrl.agents.a3c:t:18467 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.07134356 -0.089485   -0.04514649 -0.09563133  0.31205124 -0.23655961]] probs:[[0.1587418  0.15588796 0.16295533 0.15493277 0.23291506 0.1345671 ]] entropy:[1.7757325]
DEBUG:chainerrl.agents.a3c:t:18468 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.07110965 -0.08941783 -0.04518792 -0.09569578  0.31216866 -0.23648788]] probs:[[0.15876816 0.15588786 0.16293752 0.15491226 0.23292659 0.13456762]] entropy:[1.7757281]
DEBUG:chainerrl.agents

DEBUG:chainerrl.agents.a3c:t:18494 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.07412767 -0.05329834 -0.01716154 -0.06359041  0.1391356  -0.17310205]] probs:[[0.16041742 0.16379385 0.16982108 0.16211672 0.19855031 0.14530063]] entropy:[1.7872195]
DEBUG:chainerrl.agents.a3c:t:18495 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.07412893 -0.05329803 -0.01716088 -0.06359016  0.13913472 -0.17310087]] probs:[[0.16041723 0.1637939  0.1698212  0.16211677 0.19855015 0.1453008 ]] entropy:[1.7872195]
DEBUG:chainerrl.agents.a3c:t:18496 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.07412893 -0.05329803 -0.01716088 -0.06359016  0.13913472 -0.17310087]] probs:[[0.16041723 0.1637939  0.1698212  0.16211677 0.19855015 0.1453008 ]] entropy:[1.7872195]
DEBUG:chainerrl.agents.a3c:t:18497 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.07412893 -0.05329803 -0.01716088 -0.06359016  0.13913472 -0.17310087]] prob

DEBUG:chainerrl.agents.a3c:grad norm:0.11356277018448048
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:18522 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.07588048 -0.02696603 -0.01988657 -0.05677175  0.0430399  -0.11211336]] probs:[[0.1608309  0.16889344 0.17009336 0.16393372 0.18114068 0.15510786]] entropy:[1.7905623]
DEBUG:chainerrl.agents.a3c:t:18523 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.07588051 -0.02696604 -0.0198866  -0.05677174  0.04303987 -0.11211336]] probs:[[0.1608309  0.16889344 0.17009336 0.16393374 0.18114068 0.15510786]] entropy:[1.7905623]
DEBUG:chainerrl.agents.a3c:t:18524 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.07587953 -0.0269656  -0.01988588 -0.05677126  0.04303961 -0.112114  ]] probs:[[0.16083103 0.16889347 0.17009343 0.16393377 0.18114059 0.1551077 ]] entropy:[1.7905622]
DEBUG:chainerrl.agents.a3c:t:18525 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0

DEBUG:chainerrl.agents.a3c:t:18550 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.15981376 -0.13016193 -0.12569073 -0.16543141  0.15631114  0.18929146]] probs:[[0.14601086 0.15040517 0.15107918 0.14519292 0.20029794 0.20701396]] entropy:[1.7796861]
DEBUG:chainerrl.agents.a3c:t:18551 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.16000195 -0.13002256 -0.12575331 -0.1654543   0.15625863  0.18925242]] probs:[[0.14598891 0.15043183 0.15107545 0.1451951  0.20029499 0.20701371]] entropy:[1.7796865]
DEBUG:chainerrl.agents.a3c:t:18552 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.16035463 -0.12974973 -0.12587436 -0.16549276  0.15616658  0.18917114]] probs:[[0.14594759 0.15048334 0.15106766 0.14519961 0.20029049 0.20701128]] entropy:[1.7796872]
DEBUG:chainerrl.agents.a3c:t:18553 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.15981421 -0.13016202 -0.12569085 -0.16543128  0.15631098  0.18929121]] prob

DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:18578 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.1545813  -0.07575585  0.05223214 -0.13204202 -0.02963159  0.05475031]] probs:[[0.1492424  0.16148257 0.1835313  0.15264441 0.16910529 0.18399404]] entropy:[1.7884119]
DEBUG:chainerrl.agents.a3c:t:18579 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.15459086 -0.07574245  0.05223621 -0.13203298 -0.02960255  0.05475325]] probs:[[0.14923972 0.16148339 0.18353051 0.15264452 0.16910878 0.18399304]] entropy:[1.7884117]
DEBUG:chainerrl.agents.a3c:t:18580 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.15460035 -0.07575157  0.05225263 -0.13202418 -0.0295813   0.05476595]] probs:[[0.14923722 0.16148072 0.18353218 0.15264472 0.16911113 0.18399402]] entropy:[1.7884111]
DEBUG:chainerrl.agents.a3c:t:18581 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.15439506 -0.07610737  0.05217875 -0.13222

DEBUG:chainerrl.agents.a3c:t:18607 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.1524152  -0.06927573  0.015172   -0.11182727 -0.03831498  0.02567699]] probs:[[0.15091231 0.16399541 0.17844602 0.15716353 0.16915224 0.18033047]] entropy:[1.7897218]
DEBUG:chainerrl.agents.a3c:t:18608 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.15177432 -0.07059842  0.01421423 -0.10843523 -0.03607792  0.02307261]] probs:[[0.15098597 0.1637536  0.17824793 0.15767343 0.16950515 0.17983393]] entropy:[1.7898004]
DEBUG:chainerrl.agents.a3c:t:18609 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.15141612 -0.07133058  0.01374668 -0.10661124 -0.03482779  0.0215947 ]] probs:[[0.15102719 0.16361979 0.17814942 0.15794782 0.16970271 0.17955303]] entropy:[1.7898414]
DEBUG:chainerrl.agents.a3c:pi_loss:[-1.372273] v_loss:[[6.972774e-05]]
DEBUG:chainerrl.agents.a3c:grad norm:0.14293610609046686
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.age

DEBUG:chainerrl.agents.a3c:t:18635 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.12915058 -0.0683428  -0.01914596 -0.11349316  0.00747068 -0.0101778 ]] probs:[[0.15461944 0.16431326 0.17259909 0.15705945 0.1772548  0.17415395]] entropy:[1.7904147]
DEBUG:chainerrl.agents.a3c:t:18636 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.13338903 -0.06783217 -0.01216087 -0.11029687  0.00449264  0.00304999]] probs:[[0.15351498 0.16391616 0.17330037 0.15710123 0.1762106  0.17595658]] entropy:[1.7902708]
DEBUG:chainerrl.agents.a3c:t:18637 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.13146068 -0.06718675 -0.0130794  -0.11107073  0.00412341  0.00184938]] probs:[[0.1538351  0.16404738 0.17316806 0.157004   0.17617281 0.17577264]] entropy:[1.7903104]
DEBUG:chainerrl.agents.a3c:t:18638 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.12410575 -0.06469414 -0.01734436 -0.11526522  0.00483412 -0.00437707]] prob

DEBUG:chainerrl.agents.a3c:t:18664 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.04756086 -0.11041601  0.08389975 -0.23435706 -0.01514133 -0.01060698]] probs:[[0.16723019 0.15704244 0.19072486 0.13873631 0.17274055 0.1735256 ]] entropy:[1.7870848]
DEBUG:chainerrl.agents.a3c:t:18665 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.04874482 -0.11034884  0.08451589 -0.23333463 -0.01369688 -0.00864652]] probs:[[0.16692176 0.15694904 0.1907161  0.13878632 0.17287575 0.17375104]] entropy:[1.7870762]
DEBUG:chainerrl.agents.a3c:pi_loss:[-1.6425374] v_loss:[[0.00058634]]
DEBUG:chainerrl.agents.a3c:grad norm:2.1182955809852753
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:18666 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.04764987 -0.10378386  0.07641976 -0.19509695 -0.0325263  -0.02884991]] probs:[[0.16737434 0.15823779 0.1894836  0.14442867 0.16992489 0.17055073]] entropy:[1.7884152]
DEBUG:chainerrl.agent

DEBUG:chainerrl.agents.a3c:t:18692 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.01119748 -0.11011601  0.02712253 -0.12883002 -0.03776842 -0.05197363]] probs:[[0.17337854 0.15704915 0.18015136 0.15413746 0.16883239 0.16645104]] entropy:[1.7903143]
DEBUG:chainerrl.agents.a3c:t:18693 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.01096268 -0.11012243  0.02759187 -0.1287806  -0.03767338 -0.0521374 ]] probs:[[0.17339833 0.1570292  0.18021418 0.15412648 0.16882806 0.1664037 ]] entropy:[1.7903067]
DEBUG:chainerrl.agents.a3c:t:18694 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.01520054 -0.10293544  0.02765269 -0.1302683  -0.02283607 -0.04924267]] probs:[[0.17211649 0.15765934 0.17965256 0.15340841 0.17080729 0.1663559 ]] entropy:[1.790337]
DEBUG:chainerrl.agents.a3c:t:18695 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.01738767 -0.09924314  0.02729389 -0.13090299 -0.01684951 -0.04795914]] probs

DEBUG:chainerrl.agents.a3c:t:18721 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.04114053 -0.09447189  0.01653654 -0.09952427 -0.04238395 -0.06007433]] probs:[[0.16861334 0.15985656 0.1786244  0.15905094 0.16840383 0.1654509 ]] entropy:[1.7909979]
DEBUG:chainerrl.agents.a3c:pi_loss:[-1.4335542] v_loss:[[3.468261e-05]]
DEBUG:chainerrl.agents.a3c:grad norm:0.029738034994222944
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:18722 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.04681034 -0.08798584  0.00856438 -0.09555604 -0.04003747 -0.05970767]] probs:[[0.16770077 0.16093583 0.17724907 0.15972212 0.16884042 0.16555175]] entropy:[1.7911628]
DEBUG:chainerrl.agents.a3c:t:18723 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.04717223 -0.08798047  0.00862514 -0.09544642 -0.04021366 -0.05982413]] probs:[[0.1676536  0.16094966 0.17727414 0.15975252 0.1688243  0.16554582]] entropy:[1.7911636]
DEBUG:chainerrl.a

DEBUG:chainerrl.agents.a3c:t:18749 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.03882124 -0.06475475 -0.03181798 -0.07981057 -0.01654615 -0.05155095]] probs:[[0.16803494 0.16373323 0.16921587 0.16128656 0.17181994 0.16590947]] entropy:[1.7915401]
DEBUG:chainerrl.agents.a3c:t:18750 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.0388744  -0.06479912 -0.03172277 -0.07976998 -0.01672518 -0.05163673]] probs:[[0.1680325  0.16373229 0.16923852 0.16129933 0.17179582 0.16590165]] entropy:[1.7915411]
DEBUG:chainerrl.agents.a3c:t:18751 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.03858065 -0.06485406 -0.03184677 -0.07989509 -0.01679711 -0.05156247]] probs:[[0.16808201 0.16372342 0.16921766 0.16127928 0.1717836  0.16591409]] entropy:[1.7915404]
DEBUG:chainerrl.agents.a3c:t:18752 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.03883477 -0.06485524 -0.03171613 -0.07978217 -0.01687904 -0.05166729]] prob

DEBUG:chainerrl.agents.a3c:pi_loss:[-1.3470036] v_loss:[[8.4295825e-05]]
DEBUG:chainerrl.agents.a3c:grad norm:0.31313538279574715
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:18778 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.07957942 -0.05836659 -0.03491381 -0.03955168 -0.02811829 -0.01239296]] probs:[[0.16050693 0.16394812 0.1678386  0.16706198 0.16898303 0.17166135]] entropy:[1.7915272]
DEBUG:chainerrl.agents.a3c:t:18779 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.0795901  -0.05835987 -0.03492545 -0.03955264 -0.02810604 -0.01236922]] probs:[[0.16050467 0.16394866 0.16783606 0.16706125 0.16898452 0.17166483]] entropy:[1.791527]
DEBUG:chainerrl.agents.a3c:t:18780 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.07958493 -0.05836354 -0.03491929 -0.03955222 -0.02811228 -0.01238127]] probs:[[0.1605058  0.16394834 0.1678374  0.16706161 0.16898377 0.17166308]] entropy:[1.791527]
DEBUG:chainerrl.age

DEBUG:chainerrl.agents.a3c:t:18806 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.06585591 -0.04668933 -0.02475385 -0.03556897 -0.05330169 -0.05740101]] probs:[[0.16358088 0.16674641 0.17044449 0.16861103 0.16564746 0.1649698 ]] entropy:[1.7916651]
DEBUG:chainerrl.agents.a3c:t:18807 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.06421044 -0.04664025 -0.0212755  -0.03431251 -0.05435066 -0.06030089]] probs:[[0.16377948 0.16668256 0.1709645  0.16875009 0.16540231 0.16442105]] entropy:[1.7916462]
DEBUG:chainerrl.agents.a3c:t:18808 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.06464529 -0.04621405 -0.02026779 -0.03235037 -0.05383593 -0.05618722]] probs:[[0.1635012  0.16654268 0.17092037 0.16886765 0.16527814 0.16488998]] entropy:[1.7916456]
DEBUG:chainerrl.agents.a3c:t:18809 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.06424998 -0.04660063 -0.02117747 -0.03412715 -0.05430318 -0.05990379]] prob

DEBUG:chainerrl.agents.a3c:grad norm:0.009721999500737342
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:18834 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.07152475 -0.04853196 -0.03153933 -0.02912245 -0.0447722  -0.03632795]] probs:[[0.16206652 0.16583605 0.16867812 0.16908629 0.16646072 0.16787231]] entropy:[1.7916592]
DEBUG:chainerrl.agents.a3c:t:18835 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.07152455 -0.04853228 -0.03153979 -0.02912329 -0.0447724  -0.03632972]] probs:[[0.16206665 0.1658361  0.16867815 0.16908625 0.1664608  0.16787212]] entropy:[1.7916594]
DEBUG:chainerrl.agents.a3c:t:18836 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.07152605 -0.04853054 -0.03153757 -0.02911853 -0.04477162 -0.03632018]] probs:[[0.16206592 0.16583589 0.16867802 0.16908655 0.16646042 0.16787322]] entropy:[1.7916591]
DEBUG:chainerrl.agents.a3c:t:18837 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=

DEBUG:chainerrl.agents.a3c:t:18862 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.09699073 -0.03799564 -0.04805299 -0.0525512   0.03578101 -0.0548422 ]] probs:[[0.15769376 0.16727681 0.1656029  0.16485964 0.18008459 0.16448238]] entropy:[1.7909586]
DEBUG:chainerrl.agents.a3c:t:18863 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.09696078 -0.03795823 -0.04801825 -0.05252185  0.03574    -0.05486639]] probs:[[0.15769686 0.16728134 0.16560692 0.16486278 0.18007533 0.16447671]] entropy:[1.7909595]
DEBUG:chainerrl.agents.a3c:t:18864 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.097009   -0.03797384 -0.04809496 -0.0525656   0.03578441 -0.0548077 ]] probs:[[0.15769123 0.16728082 0.1655963  0.16485763 0.18008558 0.16448842]] entropy:[1.7909584]
DEBUG:chainerrl.agents.a3c:t:18865 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.09698977 -0.03794423 -0.0480738  -0.05254005  0.03575253 -0.0548322 ]] prob

DEBUG:chainerrl.agents.a3c:grad norm:0.09014315205525661
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:18890 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.11458696 -0.0511939   0.01619327 -0.04874458 -0.03140809 -0.02904544]] probs:[[0.15505646 0.16520421 0.17672053 0.16560934 0.16850546 0.16890405]] entropy:[1.7910131]
DEBUG:chainerrl.agents.a3c:t:18891 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.11214361 -0.05408704  0.01787222 -0.04615917 -0.03109644 -0.02870964]] probs:[[0.1553213  0.16460563 0.17688711 0.16591579 0.16843385 0.16883634]] entropy:[1.7910192]
DEBUG:chainerrl.agents.a3c:t:18892 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.11198308 -0.0542697   0.01798631 -0.04598513 -0.03107964 -0.02869514]] probs:[[0.1553386  0.16456747 0.17689858 0.1659365  0.16842839 0.16883048]] entropy:[1.7910194]
DEBUG:chainerrl.agents.a3c:t:18893 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0

DEBUG:chainerrl.agents.a3c:t:18918 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.05677111 -0.07678246 -0.02929269 -0.05878155 -0.00895126 -0.0187667 ]] probs:[[0.16410239 0.16085114 0.1686742  0.1637728  0.1721404  0.17045905]] entropy:[1.7914679]
DEBUG:chainerrl.agents.a3c:t:18919 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.05237228 -0.08054467 -0.0290411  -0.06081813 -0.01019338 -0.01850615]] probs:[[0.16488202 0.16030172 0.16877413 0.1634953  0.1719853  0.17056155]] entropy:[1.791454]
DEBUG:chainerrl.agents.a3c:t:18920 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.05382825 -0.0815579  -0.03155258 -0.06048279 -0.01256668 -0.02071337]] probs:[[0.16489846 0.1603887  0.1686129  0.16380477 0.17184474 0.17045046]] entropy:[1.7914721]
DEBUG:chainerrl.agents.a3c:t:18921 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.0144773  -0.08226063  0.00783569 -0.05960853  0.01413961  0.0184921 ]] probs

DEBUG:chainerrl.agents.a3c:grad norm:0.12574903603194626
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:18946 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.13354035 -0.0710221  -0.03527653 -0.02503889 -0.06819387 -0.03292207]] probs:[[0.15490048 0.16489372 0.17089455 0.1726531  0.16536073 0.17129739]] entropy:[1.7910962]
DEBUG:chainerrl.agents.a3c:t:18947 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.13357131 -0.07104041 -0.03529472 -0.0250141  -0.06819652 -0.03294132]] probs:[[0.1548973  0.16489242 0.17089322 0.17265917 0.16536203 0.17129588]] entropy:[1.7910961]
DEBUG:chainerrl.agents.a3c:t:18948 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.12508403 -0.03465095 -0.00746285 -0.01613329 -0.08632611 -0.02585726]] probs:[[0.15435888 0.16896868 0.17362563 0.17212674 0.16045895 0.17046109]] entropy:[1.7908858]
DEBUG:chainerrl.agents.a3c:t:18949 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0

DEBUG:chainerrl.agents.a3c:t:18974 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.17374651  0.06625986 -0.14784278 -0.01156784  0.09175561 -0.11400025]] probs:[[0.14621544 0.18587744 0.15005246 0.17195965 0.19067746 0.15521751]] entropy:[1.7863883]
DEBUG:chainerrl.agents.a3c:t:18975 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.17533462  0.06815226 -0.1488745  -0.01193187  0.08981021 -0.11079193]] probs:[[0.14597891 0.18622378 0.14989309 0.17189175 0.190301   0.15571149]] entropy:[1.7863889]
DEBUG:chainerrl.agents.a3c:t:18976 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.17459871  0.06713638 -0.14832348 -0.01204524  0.0909389  -0.11183198]] probs:[[0.14608131 0.18602824 0.14997052 0.17186631 0.1905093  0.15554424]] entropy:[1.7863939]
DEBUG:chainerrl.agents.a3c:t:18977 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.1748924   0.06751831 -0.14852674 -0.01204566  0.09052696 -0.11132878]] prob

DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:19002 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.10938389  0.0092051  -0.11980721 -0.04071937 -0.01402137  0.02300843]] probs:[[0.15556386 0.17515045 0.1539508  0.16662085 0.17112921 0.17758487]] entropy:[1.7902617]
DEBUG:chainerrl.agents.a3c:t:19003 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.10937843  0.00920561 -0.11980227 -0.04072395 -0.01401829  0.02300819]] probs:[[0.15556449 0.17515029 0.15395133 0.16661985 0.1711295  0.17758457]] entropy:[1.7902619]
DEBUG:chainerrl.agents.a3c:t:19004 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.10799614  0.00652217 -0.12169169 -0.0417926  -0.01659464  0.02146742]] probs:[[0.15600376 0.1749322  0.15388177 0.1666813  0.1709347  0.17756623]] entropy:[1.7903028]
DEBUG:chainerrl.agents.a3c:t:19005 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.10713632  0.00500832 -0.1226342  -0.04248

DEBUG:chainerrl.agents.a3c:t:19031 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.09696748 -0.00094866 -0.10376602 -0.02728181 -0.04992666  0.02275101]] probs:[[0.1576909  0.17358296 0.15662247 0.16907163 0.16528606 0.17774595]] entropy:[1.7906792]
DEBUG:chainerrl.agents.a3c:t:19032 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.09670224 -0.00122041 -0.10368583 -0.02720274 -0.050465    0.02228219]] probs:[[0.15775667 0.17356212 0.1566588  0.16911066 0.16522217 0.1776896 ]] entropy:[1.7906888]
DEBUG:chainerrl.agents.a3c:t:19033 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.09553435 -0.00239486 -0.10440871 -0.02752104 -0.05206252  0.02066613]] probs:[[0.15805751 0.17348628 0.15666106 0.16918153 0.1650801  0.17753352]] entropy:[1.7907157]
DEBUG:chainerrl.agents.a3c:pi_loss:[-0.10680953] v_loss:[[0.01708038]]
DEBUG:chainerrl.agents.a3c:grad norm:107.34168669433015
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agen

DEBUG:chainerrl.agents.a3c:t:19059 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.11167119 -0.03165967  0.01566517 -0.01874168 -0.05774389 -0.00382174]] probs:[[0.15418367 0.1670271  0.17512165 0.16919875 0.16272666 0.17174211]] entropy:[1.790926]
DEBUG:chainerrl.agents.a3c:t:19060 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.10146742 -0.03834575  0.0108939  -0.01305362 -0.04620297  0.00068492]] probs:[[0.15525815 0.16537422 0.17372099 0.16961023 0.16407993 0.1719565 ]] entropy:[1.7910784]
DEBUG:chainerrl.agents.a3c:t:19061 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.08582199 -0.04530817  0.0134578  -0.01664442 -0.0575182   0.01523933]] probs:[[0.15742083 0.1639295  0.17385167 0.16869633 0.16194008 0.17416166]] entropy:[1.7910769]
DEBUG:chainerrl.agents.a3c:t:19062 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.10026593 -0.04107826  0.01312387 -0.02048479 -0.05516098  0.0079317 ]] probs

DEBUG:chainerrl.agents.a3c:pi_loss:[-5.0227942] v_loss:[[0.20576024]]
DEBUG:chainerrl.agents.a3c:grad norm:238.64439130542465
DEBUG:chainerrl.agents.a3c:update


INFO: outdir:result global_step:38889 local_step:19087 R:1.2000000000000002
INFO: statistics:[('average_value', 0.2677636239991501), ('average_entropy', 1.7882076540507064)]
INFO: Starting new video recorder writing to /home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000027.mp4
DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000027.mp4
DEBUG: Starting ffmpeg with "ffmpeg -nostats -loglevel error -y -r 30 -f rawvideo -s:v 160x210 -pix_fmt rgb24 -i - -vf scale=trunc(iw/2)*2:trunc(ih/2)*2 -vcodec libx264 -pix_fmt yuv420p /home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000027.mp4"


DEBUG:chainerrl.agents.a3c:t:19088 r:0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.12652935 -0.23084795 -0.0262224  -0.00246371 -0.07670669 -0.0404216 ]] probs:[[0.1961108  0.13718118 0.16833028 0.17237748 0.1600432  0.16595702]] entropy:[1.7862824]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000027.mp4


DEBUG:chainerrl.agents.a3c:t:19089 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.10102964 -0.25812936 -0.08737589 -0.04166374 -0.0881241  -0.07010612]] probs:[[0.19747514 0.13788965 0.16356446 0.17121488 0.16344212 0.16641371]] entropy:[1.7863095]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000027.mp4


DEBUG:chainerrl.agents.a3c:t:19090 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.09670661 -0.26324046 -0.09640565 -0.04535137 -0.09190217 -0.07589401]] probs:[[0.19765747 0.13790824 0.16294672 0.17148186 0.1636822  0.16632354]] entropy:[1.7862663]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000027.mp4


DEBUG:chainerrl.agents.a3c:t:19091 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.09574393 -0.26433146 -0.09793361 -0.04650911 -0.09326456 -0.07641307]] probs:[[0.19768412 0.13790913 0.1628766  0.17147155 0.16363886 0.16641979]] entropy:[1.7862598]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000027.mp4


DEBUG:chainerrl.agents.a3c:t:19092 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.09410144 -0.26334262 -0.09815042 -0.04685989 -0.09490968 -0.07668126]] probs:[[0.1974776  0.13812804 0.16293858 0.17151381 0.16346747 0.16647455]] entropy:[1.7863336]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000027.mp4


DEBUG:chainerrl.agents.a3c:t:19093 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.09386305 -0.2636896  -0.09836234 -0.0472368  -0.09547388 -0.07655391]] probs:[[0.19748287 0.13811673 0.16294724 0.17149465 0.16341859 0.1665399 ]] entropy:[1.7863303]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000027.mp4


DEBUG:chainerrl.agents.a3c:t:19094 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.094777   -0.26571682 -0.09886716 -0.04800957 -0.09550405 -0.07595772]] probs:[[0.19770685 0.13786729 0.16290076 0.17139979 0.16344953 0.1666758 ]] entropy:[1.7862473]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000027.mp4


DEBUG:chainerrl.agents.a3c:t:19095 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.09433263 -0.26631126 -0.09923827 -0.04865852 -0.09652483 -0.0757181 ]] probs:[[0.19771157 0.1378499  0.16291659 0.17136884 0.16335927 0.16679384]] entropy:[1.7862425]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000027.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-1.3928775] v_loss:[[5.901202e-05]]
DEBUG:chainerrl.agents.a3c:grad norm:0.3716245481199164
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:19096 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.04225788 -0.24036285 -0.08457796 -0.0504652  -0.10459962 -0.06311106]] probs:[[0.18834026 0.14197195 0.16590483 0.17166194 0.16261618 0.1695048 ]] entropy:[1.788331]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000027.mp4


DEBUG:chainerrl.agents.a3c:t:19097 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.04226498 -0.24033137 -0.08455669 -0.05041121 -0.10453933 -0.06312827]] probs:[[0.1883368  0.14197281 0.16590415 0.17166685 0.16262186 0.16949758]] entropy:[1.7883316]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000027.mp4


DEBUG:chainerrl.agents.a3c:t:19098 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.04226378 -0.24033284 -0.0845575  -0.05041352 -0.10454212 -0.06312787]] probs:[[0.18833682 0.14197278 0.16590422 0.17166668 0.1626216  0.16949786]] entropy:[1.7883316]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000027.mp4


DEBUG:chainerrl.agents.a3c:t:19099 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.04226265 -0.2403343  -0.08455824 -0.05041576 -0.10454476 -0.06312749]] probs:[[0.18833686 0.14197277 0.16590433 0.17166652 0.1626214  0.16949815]] entropy:[1.7883316]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000027.mp4


DEBUG:chainerrl.agents.a3c:t:19100 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.04226775 -0.24034871 -0.0845707  -0.05044479 -0.10457598 -0.06311601]] probs:[[0.18833993 0.14197232 0.16590412 0.17166346 0.16261815 0.16950199]] entropy:[1.7883312]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000027.mp4


DEBUG:chainerrl.agents.a3c:t:19101 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.04226587 -0.24035156 -0.08457221 -0.05044894 -0.10458073 -0.063115  ]] probs:[[0.18834002 0.14197224 0.16590425 0.17166315 0.16261776 0.16950257]] entropy:[1.788331]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000027.mp4


DEBUG:chainerrl.agents.a3c:t:19102 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.04225741 -0.2403413  -0.08456204 -0.05042636 -0.10455714 -0.06312516]] probs:[[0.18833701 0.14197262 0.1659047  0.17166573 0.16262037 0.16949956]] entropy:[1.7883315]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000027.mp4


DEBUG:chainerrl.agents.a3c:t:19103 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[ 0.04225642 -0.24034277 -0.08456273 -0.05042853 -0.10455957 -0.0631246 ]] probs:[[0.18833704 0.14197257 0.16590478 0.17166556 0.16262016 0.16949986]] entropy:[1.7883315]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000027.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-1.4244179] v_loss:[[8.970116e-07]]
DEBUG:chainerrl.agents.a3c:grad norm:0.1229531057677493
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:19104 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.17599124 -0.14200826 -0.1272411  -0.04935364 -0.13154063  0.12459229]] probs:[[0.15115717 0.15638222 0.15870868 0.17156424 0.15802777 0.20415993]] entropy:[1.7862659]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000027.mp4


DEBUG:chainerrl.agents.a3c:t:19105 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.17599174 -0.14202741 -0.12725575 -0.04938569 -0.13157561  0.12460466]] probs:[[0.1511592  0.1563814  0.15870854 0.17156112 0.15802443 0.2041653 ]] entropy:[1.7862648]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000027.mp4


DEBUG:chainerrl.agents.a3c:t:19106 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.17599285 -0.14202885 -0.12725662 -0.0493876  -0.13157797  0.12460517]] probs:[[0.1511592  0.15638135 0.15870859 0.17156099 0.15802425 0.20416564]] entropy:[1.7862647]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000027.mp4


DEBUG:chainerrl.agents.a3c:t:19107 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.17599608 -0.14201467 -0.1272445  -0.04936181 -0.13155094  0.12459453]] probs:[[0.15115719 0.156382   0.15870892 0.17156368 0.15802692 0.20416139]] entropy:[1.7862654]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000027.mp4


DEBUG:chainerrl.agents.a3c:t:19108 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.17599668 -0.14201546 -0.1272449  -0.0493628  -0.13155223  0.12459482]] probs:[[0.15115717 0.15638193 0.15870893 0.1715636  0.15802678 0.20416155]] entropy:[1.7862656]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000027.mp4


DEBUG:chainerrl.agents.a3c:t:19109 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.17599726 -0.14201617 -0.12724535 -0.04936377 -0.13155347  0.12459508]] probs:[[0.15115717 0.15638192 0.15870896 0.17156354 0.1580267  0.20416173]] entropy:[1.7862651]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000027.mp4


DEBUG:chainerrl.agents.a3c:t:19110 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.17599781 -0.14201699 -0.12724572 -0.04936475 -0.13155466  0.12459534]] probs:[[0.15115717 0.1563819  0.15870899 0.17156346 0.1580266  0.20416191]] entropy:[1.7862653]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000027.mp4


DEBUG:chainerrl.agents.a3c:t:19111 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.17599624 -0.14203341 -0.12725903 -0.04939355 -0.13158533  0.12460674]] probs:[[0.15115923 0.15638119 0.15870878 0.17156057 0.15802364 0.20416668]] entropy:[1.7862644]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000027.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-1.5196054] v_loss:[[0.00011291]]
DEBUG:chainerrl.agents.a3c:grad norm:1.171859666206553
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:19112 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.19880058 -0.14289832 -0.08916219 -0.01818219 -0.13741286  0.09342056]] probs:[[0.14762247 0.15610991 0.16472813 0.1768455  0.15696861 0.19772542]] entropy:[1.7869527]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000027.mp4


DEBUG:chainerrl.agents.a3c:t:19113 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.19881646 -0.14289638 -0.08915721 -0.0181806  -0.13741241  0.09341617]] probs:[[0.14762037 0.15611048 0.16472924 0.17684607 0.15696894 0.19772486]] entropy:[1.7869527]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000027.mp4


DEBUG:chainerrl.agents.a3c:t:19114 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.1988165  -0.14289635 -0.08915718 -0.01818058 -0.1374124   0.09341619]] probs:[[0.14762035 0.15611048 0.16472924 0.17684607 0.15696894 0.19772486]] entropy:[1.7869527]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000027.mp4


DEBUG:chainerrl.agents.a3c:t:19115 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.1988165  -0.14289635 -0.08915718 -0.01818058 -0.1374124   0.09341619]] probs:[[0.14762035 0.15611048 0.16472924 0.17684607 0.15696894 0.19772486]] entropy:[1.7869527]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000027.mp4


DEBUG:chainerrl.agents.a3c:t:19116 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.19879739 -0.14289868 -0.08916313 -0.01818244 -0.13741295  0.0934214 ]] probs:[[0.14762288 0.1561098  0.16472793 0.17684539 0.15696853 0.1977255 ]] entropy:[1.786953]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000027.mp4


DEBUG:chainerrl.agents.a3c:t:19117 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.19879733 -0.1428987  -0.08916316 -0.01818243 -0.13741294  0.09342144]] probs:[[0.14762288 0.1561098  0.16472791 0.17684539 0.15696853 0.1977255 ]] entropy:[1.786953]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000027.mp4


DEBUG:chainerrl.agents.a3c:t:19118 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.19881646 -0.14289638 -0.08915722 -0.01818061 -0.13741241  0.09341618]] probs:[[0.14762037 0.15611048 0.16472924 0.17684607 0.15696894 0.19772486]] entropy:[1.7869527]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000027.mp4


DEBUG:chainerrl.agents.a3c:t:19119 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.1988165  -0.14289635 -0.08915718 -0.01818058 -0.1374124   0.09341619]] probs:[[0.14762035 0.15611048 0.16472924 0.17684607 0.15696894 0.19772486]] entropy:[1.7869527]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000027.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-1.5352819] v_loss:[[0.00013416]]
DEBUG:chainerrl.agents.a3c:grad norm:0.2895561786860625
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:19120 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.18651554 -0.13390975 -0.08526399 -0.02511175 -0.12970899  0.06767723]] probs:[[0.14961997 0.15770155 0.16556273 0.17582732 0.15836543 0.19292304]] entropy:[1.7881824]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000027.mp4


DEBUG:chainerrl.agents.a3c:t:19121 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.18649696 -0.13391195 -0.08526992 -0.02511357 -0.1297095   0.06768219]] probs:[[0.14962246 0.1577009  0.16556141 0.17582665 0.15836501 0.1929236 ]] entropy:[1.7881824]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000027.mp4


DEBUG:chainerrl.agents.a3c:t:19122 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.18649688 -0.13391192 -0.08526988 -0.02511355 -0.12970954  0.06768224]] probs:[[0.14962246 0.1577009  0.16556141 0.17582664 0.15836501 0.1929236 ]] entropy:[1.7881825]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000027.mp4


DEBUG:chainerrl.agents.a3c:t:19123 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.18651552 -0.13390978 -0.08526404 -0.02511174 -0.12970899  0.0676772 ]] probs:[[0.14961997 0.15770154 0.16556272 0.17582731 0.15836541 0.19292302]] entropy:[1.7881823]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000027.mp4


DEBUG:chainerrl.agents.a3c:t:19124 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.18651554 -0.13390975 -0.08526399 -0.02511175 -0.12970899  0.06767723]] probs:[[0.14961997 0.15770155 0.16556273 0.17582732 0.15836543 0.19292304]] entropy:[1.7881824]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000027.mp4


DEBUG:chainerrl.agents.a3c:t:19125 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.18649696 -0.13391195 -0.08526992 -0.02511357 -0.1297095   0.06768219]] probs:[[0.14962246 0.1577009  0.16556141 0.17582665 0.15836501 0.1929236 ]] entropy:[1.7881824]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000027.mp4


DEBUG:chainerrl.agents.a3c:t:19126 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.18649688 -0.13391192 -0.08526988 -0.02511355 -0.12970954  0.06768224]] probs:[[0.14962246 0.1577009  0.16556141 0.17582664 0.15836501 0.1929236 ]] entropy:[1.7881825]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000027.mp4


DEBUG:chainerrl.agents.a3c:t:19127 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.18649688 -0.13391192 -0.08526988 -0.02511355 -0.12970954  0.06768224]] probs:[[0.14962246 0.1577009  0.16556141 0.17582664 0.15836501 0.1929236 ]] entropy:[1.7881825]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000027.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-1.5268729] v_loss:[[0.00011172]]
DEBUG:chainerrl.agents.a3c:grad norm:0.23557632958050984
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:19128 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.17548406 -0.12912174 -0.08466044 -0.02458678 -0.12613557  0.04723122]] probs:[[0.15138996 0.15857399 0.16578348 0.17604794 0.15904823 0.18915641]] entropy:[1.7889454]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000027.mp4


DEBUG:chainerrl.agents.a3c:t:19129 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.17548414 -0.1291218  -0.0846604  -0.02458678 -0.12613554  0.04723126]] probs:[[0.15138994 0.15857399 0.1657835  0.17604794 0.15904824 0.18915643]] entropy:[1.7889454]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000027.mp4


DEBUG:chainerrl.agents.a3c:t:19130 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.13183303 -0.13199657 -0.10854395 -0.04835024 -0.1084668   0.03632121]] probs:[[0.15829733 0.15827145 0.1620272  0.17207973 0.1620397  0.18728459]] entropy:[1.7898878]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000027.mp4


DEBUG:chainerrl.agents.a3c:t:19131 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.13075487 -0.13207793 -0.10876156 -0.0487413  -0.10863627  0.03661342]] probs:[[0.158455   0.1582455  0.16197856 0.17199825 0.16199885 0.18732385]] entropy:[1.78989]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000027.mp4


DEBUG:chainerrl.agents.a3c:t:19132 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.12953795 -0.13217473 -0.10922115 -0.04910019 -0.10878474  0.03702848]] probs:[[0.15863284 0.1582151  0.16188872 0.17192015 0.16195938 0.18738377]] entropy:[1.7898891]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000027.mp4


DEBUG:chainerrl.agents.a3c:t:19133 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.13142118 -0.1320424  -0.10860489 -0.04849062 -0.10856851  0.03644273]] probs:[[0.15835775 0.1582594  0.16201243 0.17205037 0.16201831 0.18730168]] entropy:[1.7898883]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000027.mp4


DEBUG:chainerrl.agents.a3c:t:19134 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.1319745  -0.13202299 -0.10842678 -0.04825465 -0.10855421  0.03636865]] probs:[[0.15827437 0.1582667  0.1620456  0.17209555 0.16202496 0.1872928 ]] entropy:[1.7898852]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000027.mp4


DEBUG:chainerrl.agents.a3c:t:19135 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.13182902 -0.13065167 -0.1089458  -0.04882006 -0.10899789  0.03736033]] probs:[[0.15827006 0.1584565  0.16193354 0.17196858 0.1619251  0.18744625]] entropy:[1.7898747]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000027.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-1.453229] v_loss:[[3.478001e-05]]
DEBUG:chainerrl.agents.a3c:grad norm:0.17629432156644062
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:19136 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.27448553 -0.25121737  0.02120292  0.07674078  0.02643473 -0.09389204]] probs:[[0.13628748 0.13949583 0.18317738 0.19363846 0.18413824 0.16326262]] entropy:[1.7826735]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000027.mp4


DEBUG:chainerrl.agents.a3c:t:19137 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.27457458 -0.25117314  0.02125878  0.07677893  0.02637747 -0.09381887]] probs:[[0.13627358 0.13950017 0.18318522 0.19364333 0.1841253  0.16327243]] entropy:[1.7826717]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000027.mp4


DEBUG:chainerrl.agents.a3c:t:19138 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.27453056 -0.25126946  0.02126974  0.07679792  0.02640188 -0.09388775]] probs:[[0.13628072 0.13948792 0.18318878 0.19364864 0.18413134 0.16326256]] entropy:[1.7826688]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000027.mp4


DEBUG:chainerrl.agents.a3c:t:19139 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.2745469  -0.2512447   0.0212789   0.07680169  0.02638847 -0.0938521 ]] probs:[[0.13627754 0.1394904  0.18318917 0.19364801 0.18412758 0.16326724]] entropy:[1.7826691]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000027.mp4


DEBUG:chainerrl.agents.a3c:t:19140 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.27327606 -0.2515114   0.02111533  0.07650697  0.02641517 -0.09356894]] probs:[[0.13643716 0.13943923 0.18314086 0.19357155 0.18411405 0.1632971 ]] entropy:[1.7827101]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000027.mp4


DEBUG:chainerrl.agents.a3c:t:19141 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.31406948 -0.25017473  0.04547006  0.09990149  0.00883398 -0.08254718]] probs:[[0.1306658  0.13928714 0.1872009  0.19767292 0.1804667  0.1647065 ]] entropy:[1.7806691]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000027.mp4


DEBUG:chainerrl.agents.a3c:t:19142 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.30616447 -0.25014806  0.04053825  0.0949421   0.01259879 -0.0856088 ]] probs:[[0.13179255 0.13938577 0.18640687 0.19682905 0.18127085 0.16431491]] entropy:[1.7811182]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000027.mp4


DEBUG:chainerrl.agents.a3c:t:19143 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.31252533 -0.25018686  0.04451321  0.09899368  0.00956062 -0.08311627]] probs:[[0.13088353 0.13930228 0.18704447 0.19751742 0.1806197  0.1646327 ]] entropy:[1.7807562]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000027.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-1.3783851] v_loss:[[3.3392593e-05]]
DEBUG:chainerrl.agents.a3c:grad norm:0.7146829457141051
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:19144 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.2476583  -0.22465888  0.00597211  0.05147588  0.01669438 -0.09509624]] probs:[[0.14029074 0.14355473 0.18079202 0.18920879 0.18274096 0.16341272]] entropy:[1.7850447]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000027.mp4


DEBUG:chainerrl.agents.a3c:t:19145 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.24765861 -0.22466098  0.00596995  0.05147434  0.01669801 -0.0950974 ]] probs:[[0.14029078 0.14355452 0.18079175 0.18920861 0.18274175 0.16341263]] entropy:[1.7850449]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000027.mp4


DEBUG:chainerrl.agents.a3c:t:19146 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.24765967 -0.22466286  0.00596741  0.05147339  0.01670093 -0.09509915]] probs:[[0.14029074 0.14355436 0.18079144 0.18920858 0.1827424  0.16341247]] entropy:[1.7850447]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000027.mp4


DEBUG:chainerrl.agents.a3c:t:19147 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.24765691 -0.22465904  0.00597177  0.05147465  0.01669438 -0.0950959 ]] probs:[[0.14029095 0.14355473 0.18079199 0.18920858 0.18274099 0.1634128 ]] entropy:[1.7850449]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000027.mp4


DEBUG:chainerrl.agents.a3c:t:19148 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.24765342 -0.2246617   0.00596807  0.0514673   0.01670275 -0.0950958 ]] probs:[[0.14029148 0.1435544  0.18079138 0.18920726 0.18274258 0.16341287]] entropy:[1.7850449]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000027.mp4


DEBUG:chainerrl.agents.a3c:t:19149 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.24765193 -0.22466154  0.00596797  0.0514639   0.01670824 -0.09509507]] probs:[[0.1402916  0.14355433 0.18079126 0.1892065  0.18274346 0.16341288]] entropy:[1.7850449]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000027.mp4


DEBUG:chainerrl.agents.a3c:t:19150 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.24766198 -0.22466294  0.00596522  0.05147709  0.01669888 -0.09510186]] probs:[[0.14029054 0.14355448 0.1807912  0.18920946 0.1827422  0.16341217]] entropy:[1.7850446]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000027.mp4


DEBUG:chainerrl.agents.a3c:t:19151 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.24772868 -0.2247027   0.00592902  0.05150568  0.01676277 -0.09515007]] probs:[[0.14028291 0.14355054 0.1807869  0.18921721 0.18275614 0.16340631]] entropy:[1.7850406]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000027.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-1.4258231] v_loss:[[6.146233e-08]]
DEBUG:chainerrl.agents.a3c:grad norm:0.22392933891468522
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:19152 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.22608952 -0.20230384 -0.00669654  0.02904271  0.0056536  -0.09425794]] probs:[[0.14364234 0.14709993 0.17888075 0.18538943 0.18110366 0.16388386]] entropy:[1.7868111]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000027.mp4


DEBUG:chainerrl.agents.a3c:t:19153 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.22395393 -0.20392339 -0.0059981   0.02933759  0.00613534 -0.09436645]] probs:[[0.14390364 0.14681518 0.17894882 0.18538514 0.1811333  0.16381396]] entropy:[1.7868061]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000027.mp4


DEBUG:chainerrl.agents.a3c:t:19154 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.23296969 -0.19708462 -0.00885184  0.02826238  0.00405923 -0.09401762]] probs:[[0.14279939 0.1480168  0.17867322 0.18542914 0.18099505 0.16408636]] entropy:[1.7868177]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000027.mp4


DEBUG:chainerrl.agents.a3c:t:19155 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.24908143 -0.18629636 -0.01087659  0.02845711 -0.00071196 -0.09372411]] probs:[[0.14077266 0.14989442 0.17863613 0.18580258 0.18046117 0.16443305]] entropy:[1.7867118]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000027.mp4


DEBUG:chainerrl.agents.a3c:t:19156 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.23286512 -0.20024548 -0.01017429  0.02453229  0.00039101 -0.09362341]] probs:[[0.14309719 0.14784195 0.17879054 0.18510468 0.18068953 0.16447619]] entropy:[1.7868994]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000027.mp4


DEBUG:chainerrl.agents.a3c:t:19157 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.24834603 -0.18674433 -0.01074486  0.02854082 -0.0004144  -0.0937734 ]] probs:[[0.14085916 0.14980917 0.17863804 0.18579565 0.18049304 0.16440505]] entropy:[1.786715]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000027.mp4


DEBUG:chainerrl.agents.a3c:t:19158 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.2254005  -0.20427847 -0.00825664  0.02546607  0.00292659 -0.09400962]] probs:[[0.14396946 0.14704274 0.17888534 0.18502071 0.18089709 0.16418472]] entropy:[1.786913]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000027.mp4


DEBUG:chainerrl.agents.a3c:t:19159 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.24867424 -0.18638422 -0.01097171  0.02830689 -0.00055216 -0.09381095]] probs:[[0.14082803 0.14987917 0.17861667 0.18577209 0.18048751 0.16441648]] entropy:[1.7867218]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000027.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-1.4380307] v_loss:[[1.0219395e-06]]
DEBUG:chainerrl.agents.a3c:grad norm:0.18681930733016675
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:19160 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.2251864  -0.17335689 -0.01889802  0.01328919 -0.02012671 -0.08924171]] probs:[[0.14440781 0.15208976 0.17749274 0.18329868 0.1772748  0.16543627]] entropy:[1.7880622]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000027.mp4


DEBUG:chainerrl.agents.a3c:t:19161 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.21990873 -0.17596167 -0.02170506  0.00873374 -0.02047925 -0.0898871 ]] probs:[[0.14533637 0.15186591 0.17719567 0.18267222 0.17741302 0.16551678]] entropy:[1.7882406]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000027.mp4


DEBUG:chainerrl.agents.a3c:t:19162 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.22480494 -0.17302872 -0.01862441  0.01396505 -0.01930506 -0.08930757]] probs:[[0.14440337 0.15207696 0.17746812 0.18334699 0.17734738 0.16535719]] entropy:[1.7880522]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000027.mp4


DEBUG:chainerrl.agents.a3c:t:19163 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.20024902 -0.19011317 -0.01334599  0.01534015 -0.01297524 -0.08984039]] probs:[[0.14751478 0.14901757 0.17783059 0.18300574 0.17789653 0.16473481]] entropy:[1.788122]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000027.mp4


DEBUG:chainerrl.agents.a3c:t:19164 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.1998163  -0.19021244 -0.01417587  0.01494998 -0.01294721 -0.09033756]] probs:[[0.14761506 0.14903955 0.17772694 0.18297951 0.17794545 0.16469358]] entropy:[1.7881421]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000027.mp4


DEBUG:chainerrl.agents.a3c:t:19165 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.20018494 -0.19016354 -0.01346876  0.0152985  -0.01295649 -0.08994077]] probs:[[0.14753023 0.14901613 0.17781599 0.18300556 0.1779071  0.16472498]] entropy:[1.7881237]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000027.mp4


DEBUG:chainerrl.agents.a3c:t:19166 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.20022309 -0.19021885 -0.01335392  0.01537462 -0.01288211 -0.08994891]] probs:[[0.14751983 0.14900304 0.17783065 0.18301356 0.17791457 0.1647183 ]] entropy:[1.7881188]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000027.mp4


DEBUG:chainerrl.agents.a3c:t:19167 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.2001997  -0.19021954 -0.01338397  0.01534609 -0.01288072 -0.08996389]] probs:[[0.14752467 0.14900436 0.177827   0.18301007 0.17791651 0.16471739]] entropy:[1.7881199]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000027.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-1.421732] v_loss:[[1.636435e-06]]
DEBUG:chainerrl.agents.a3c:grad norm:0.13045006641922371
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:19168 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.18254116 -0.1764592  -0.02132262  0.00053244 -0.02169326 -0.08877806]] probs:[[0.15026858 0.1511853  0.17655684 0.18045796 0.17649141 0.16503993]] entropy:[1.7890472]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000027.mp4


DEBUG:chainerrl.agents.a3c:t:19169 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.18292691 -0.17645033 -0.02121066  0.0007168  -0.02162744 -0.08886746]] probs:[[0.15021165 0.15118766 0.17657778 0.18049245 0.1765042  0.16502626]] entropy:[1.7890368]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000027.mp4


DEBUG:chainerrl.agents.a3c:t:19170 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.18288173 -0.17643963 -0.02125869  0.00069631 -0.02166286 -0.08885643]] probs:[[0.15021965 0.1511905  0.17657074 0.18049023 0.1764994  0.16502944]] entropy:[1.7890387]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000027.mp4


DEBUG:chainerrl.agents.a3c:t:19171 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.18249416 -0.17632514 -0.02157388  0.00040629 -0.02179083 -0.08876899]] probs:[[0.15028399 0.15121396 0.17652227 0.18044522 0.17648397 0.16505058]] entropy:[1.7890551]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000027.mp4


DEBUG:chainerrl.agents.a3c:t:19172 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.18211165 -0.17603697 -0.02228015 -0.00074922 -0.02164338 -0.08922627]] probs:[[0.1503838  0.15130012 0.1764473  0.18028757 0.17655969 0.16502155]] entropy:[1.7890859]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000027.mp4


DEBUG:chainerrl.agents.a3c:t:19173 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.1822971  -0.17635703 -0.02258764 -0.00019498 -0.02164361 -0.08957855]] probs:[[0.15036927 0.15126513 0.17640871 0.18040353 0.17657532 0.16497807]] entropy:[1.7890728]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000027.mp4


DEBUG:chainerrl.agents.a3c:t:19174 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.18200748 -0.17637277 -0.02329311 -0.00054741 -0.02164065 -0.08998223]] probs:[[0.15044485 0.15129495 0.17632183 0.18037836 0.17661344 0.1649466 ]] entropy:[1.7890878]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000027.mp4


DEBUG:chainerrl.agents.a3c:t:19175 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.18154308 -0.1756255  -0.02462751 -0.00290888 -0.02168466 -0.09044975]] probs:[[0.15059942 0.15149325 0.17618579 0.18005416 0.17670505 0.16496228]] entropy:[1.7891505]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000027.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-1.4269547] v_loss:[[2.2903558e-07]]
DEBUG:chainerrl.agents.a3c:grad norm:0.11351370210825926
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:19176 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.16338128 -0.1590805  -0.04347626 -0.02800361 -0.02806363 -0.09476452]] probs:[[0.1540216  0.15468545 0.1736424  0.17635    0.1763394  0.1649611 ]] entropy:[1.7901217]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000027.mp4


DEBUG:chainerrl.agents.a3c:t:19177 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.16463365 -0.15754338 -0.04289472 -0.02987438 -0.02736989 -0.09488929]] probs:[[0.1538414  0.15493606 0.17375758 0.17603476 0.1764762  0.16495399]] entropy:[1.7901306]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000027.mp4


DEBUG:chainerrl.agents.a3c:t:19178 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.16461127 -0.15746339 -0.04321327 -0.03020803 -0.02719778 -0.0952644 ]] probs:[[0.1538648  0.15496854 0.17372477 0.17599887 0.17652947 0.16491352]] entropy:[1.7901347]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000027.mp4


DEBUG:chainerrl.agents.a3c:t:19179 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.16518338 -0.15821213 -0.0412401  -0.02771745 -0.02776352 -0.09424604]] probs:[[0.15367751 0.15475257 0.17395552 0.17632382 0.1763157  0.16497494]] entropy:[1.790089]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000027.mp4


DEBUG:chainerrl.agents.a3c:t:19180 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.16512324 -0.15895244 -0.03964083 -0.02576152 -0.02831916 -0.09305549]] probs:[[0.15359198 0.15454268 0.17412649 0.17656009 0.17610909 0.16506961]] entropy:[1.7900579]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000027.mp4


DEBUG:chainerrl.agents.a3c:t:19181 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.16570139 -0.15994646 -0.03642286 -0.02205822 -0.02939547 -0.09111313]] probs:[[0.15333371 0.15421867 0.17449485 0.17701949 0.1757254  0.16520794]] entropy:[1.7899901]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000027.mp4


DEBUG:chainerrl.agents.a3c:t:19182 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.16559571 -0.15985028 -0.0373507  -0.022928   -0.0288951  -0.0919271 ]] probs:[[0.1534007  0.1542846  0.17439075 0.17692417 0.17587158 0.16512819]] entropy:[1.7900027]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000027.mp4


DEBUG:chainerrl.agents.a3c:t:19183 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.16702935 -0.16195053 -0.03191611 -0.01616307 -0.0296224  -0.08914647]] probs:[[0.15288438 0.15366283 0.17500162 0.17778026 0.17540348 0.16526741]] entropy:[1.7898536]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000027.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-1.4283415] v_loss:[[1.7553461e-07]]
DEBUG:chainerrl.agents.a3c:grad norm:0.2363407942633994
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:19184 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.15045679 -0.14405511 -0.04965024 -0.04067628 -0.03987052 -0.09316576]] probs:[[0.1561423  0.15714508 0.17270316 0.17425998 0.17440043 0.16534907]] entropy:[1.7906823]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000027.mp4


DEBUG:chainerrl.agents.a3c:t:19185 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.15080947 -0.1441971  -0.04941656 -0.04035491 -0.03962613 -0.09350326]] probs:[[0.15608634 0.15712185 0.17274253 0.17431496 0.17444205 0.16529231]] entropy:[1.7906711]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000027.mp4


DEBUG:chainerrl.agents.a3c:t:19186 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.15128767 -0.14390343 -0.04927056 -0.04038274 -0.03956651 -0.0937467 ]] probs:[[0.15601762 0.15717396 0.17277429 0.17431672 0.17445907 0.16525833]] entropy:[1.7906672]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000027.mp4


DEBUG:chainerrl.agents.a3c:t:19187 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.15132234 -0.14369161 -0.04929059 -0.04055867 -0.03969854 -0.09363147]] probs:[[0.15601382 0.15720886 0.1727726  0.17428786 0.17443782 0.16527908]] entropy:[1.7906716]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000027.mp4


DEBUG:chainerrl.agents.a3c:t:19188 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.15137574 -0.14361803 -0.04926701 -0.04060009 -0.03972696 -0.09360506]] probs:[[0.15600556 0.15722051 0.17277676 0.17428072 0.17443295 0.16528352]] entropy:[1.7906721]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000027.mp4


DEBUG:chainerrl.agents.a3c:t:19189 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.15127239 -0.14381903 -0.04926344 -0.04043803 -0.03962753 -0.09365589]] probs:[[0.1560182  0.15718539 0.17277353 0.17430505 0.17444639 0.16527143]] entropy:[1.7906691]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000027.mp4


DEBUG:chainerrl.agents.a3c:t:19190 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.15098932 -0.1444235  -0.04932516 -0.04001387 -0.03928224 -0.09392466]] probs:[[0.15605795 0.15708597 0.17275797 0.17437407 0.1745017  0.16522233]] entropy:[1.7906606]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000027.mp4


DEBUG:chainerrl.agents.a3c:t:19191 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.15080608 -0.1440504  -0.04943328 -0.04037897 -0.03964943 -0.09337589]] probs:[[0.15608162 0.15713963 0.17273384 0.17430493 0.17443213 0.16530782]] entropy:[1.790673]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000027.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-1.4319276] v_loss:[[6.0593504e-09]]
DEBUG:chainerrl.agents.a3c:grad norm:0.0471255197375534
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:19192 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.14049305 -0.13278034 -0.05249419 -0.05158248 -0.04402417 -0.09399174]] probs:[[0.15768923 0.15891016 0.17219459 0.17235164 0.17365927 0.16519515]] entropy:[1.7909923]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000027.mp4


DEBUG:chainerrl.agents.a3c:t:19193 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.14052483 -0.13225904 -0.05236745 -0.05171566 -0.04399995 -0.09368546]] probs:[[0.15766348 0.1589721  0.17219375 0.17230603 0.17364062 0.16522403]] entropy:[1.7909962]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000027.mp4


DEBUG:chainerrl.agents.a3c:t:19194 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.14017119 -0.132171   -0.05253746 -0.05187113 -0.04394334 -0.0934315 ]] probs:[[0.15770891 0.15897568 0.17215319 0.17226794 0.17363907 0.16525516]] entropy:[1.7910018]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000027.mp4


DEBUG:chainerrl.agents.a3c:t:19195 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.14044204 -0.13208434 -0.05236055 -0.05178542 -0.04398398 -0.09351144]] probs:[[0.15766683 0.15899009 0.17218433 0.1722834  0.17363271 0.16524261]] entropy:[1.7909987]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000027.mp4


DEBUG:chainerrl.agents.a3c:t:19196 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.14013983 -0.1320867  -0.0525292  -0.05189909 -0.04393485 -0.09335037]] probs:[[0.15770917 0.15898435 0.1721495  0.172258   0.17363538 0.16526365]] entropy:[1.7910031]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000027.mp4


DEBUG:chainerrl.agents.a3c:t:19197 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.14005856 -0.1320823  -0.05257384 -0.05193456 -0.04392221 -0.09329967]] probs:[[0.15772036 0.15898341 0.17214003 0.1722501  0.17363578 0.16527033]] entropy:[1.7910041]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000027.mp4


DEBUG:chainerrl.agents.a3c:t:19198 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.14041361 -0.1320439  -0.05236365 -0.05179816 -0.04397505 -0.09346462]] probs:[[0.15766858 0.15899375 0.1721808  0.17227821 0.17363124 0.16524747]] entropy:[1.7909995]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000027.mp4


DEBUG:chainerrl.agents.a3c:t:19199 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.14012913 -0.13205948 -0.05252653 -0.05190357 -0.04392799 -0.09332057]] probs:[[0.15770899 0.15898679 0.17214791 0.17225519 0.17363451 0.16526662]] entropy:[1.7910032]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000027.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-1.4343195] v_loss:[[2.6979997e-08]]
DEBUG:chainerrl.agents.a3c:grad norm:0.03980897901094271
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:19200 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.13071004 -0.12454822 -0.05806006 -0.05761103 -0.05032049 -0.09140976]] probs:[[0.15920617 0.1601902  0.171203   0.17127989 0.17253318 0.16558759]] entropy:[1.7912323]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000027.mp4


DEBUG:chainerrl.agents.a3c:t:19201 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.13046321 -0.12453368 -0.05818159 -0.0577295  -0.0503013  -0.09123008]] probs:[[0.15924011 0.16018713 0.17117643 0.17125383 0.17253068 0.16561177]] entropy:[1.7912354]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000027.mp4


DEBUG:chainerrl.agents.a3c:t:19202 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.13040693 -0.12453977 -0.05820961 -0.05773697 -0.05028835 -0.09120318]] probs:[[0.15924771 0.1601848  0.17117018 0.1712511  0.17253144 0.16561481]] entropy:[1.791236]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000027.mp4


DEBUG:chainerrl.agents.a3c:t:19203 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.13039993 -0.12452713 -0.05821143 -0.05775352 -0.05029163 -0.09118133]] probs:[[0.15924832 0.16018632 0.17116934 0.17124772 0.17253034 0.16561791]] entropy:[1.7912362]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000027.mp4


DEBUG:chainerrl.agents.a3c:t:19204 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.13039447 -0.12453368 -0.05821403 -0.05773936 -0.05028328 -0.091188  ]] probs:[[0.15924886 0.16018493 0.17116852 0.17124979 0.17253141 0.16561647]] entropy:[1.7912362]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000027.mp4


DEBUG:chainerrl.agents.a3c:t:19205 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.13039424 -0.12453214 -0.05821354 -0.0577381  -0.05028203 -0.0911855 ]] probs:[[0.15924872 0.16018498 0.17116842 0.1712498  0.17253143 0.16561668]] entropy:[1.7912362]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000027.mp4


DEBUG:chainerrl.agents.a3c:t:19206 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.1303973  -0.12452015 -0.05820992 -0.05775025 -0.05028713 -0.0911704 ]] probs:[[0.15924796 0.16018665 0.17116874 0.17124745 0.17253026 0.16561891]] entropy:[1.7912363]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000027.mp4


DEBUG:chainerrl.agents.a3c:t:19207 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.13039829 -0.12451056 -0.05820525 -0.05775088 -0.0502893  -0.09116291]] probs:[[0.15924735 0.16018772 0.17116904 0.17124684 0.17252938 0.16561966]] entropy:[1.7912364]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000027.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-1.4346251] v_loss:[[3.0609797e-08]]
DEBUG:chainerrl.agents.a3c:grad norm:0.017976233164867326
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:19208 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.12348145 -0.11895269 -0.06179863 -0.06175086 -0.05539947 -0.09073171]] probs:[[0.16036989 0.16109781 0.1705734  0.17058155 0.17166843 0.1657089 ]] entropy:[1.7913767]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000027.mp4


DEBUG:chainerrl.agents.a3c:t:19209 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.12350351 -0.11856351 -0.06162033 -0.06192409 -0.05564502 -0.09050371]] probs:[[0.16035742 0.16115154 0.17059433 0.17054252 0.17161673 0.16573747]] entropy:[1.7913803]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000027.mp4


DEBUG:chainerrl.agents.a3c:t:19210 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.12364056 -0.11585962 -0.0603619  -0.06307815 -0.05737932 -0.08896403]] probs:[[0.16027287 0.1615248  0.17074248 0.17027932 0.17125249 0.16592805]] entropy:[1.7914027]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000027.mp4


DEBUG:chainerrl.agents.a3c:t:19211 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.12519568 -0.09840076 -0.05256587 -0.0709041  -0.06699059 -0.07931241]] probs:[[0.15961267 0.1639473  0.17163667 0.16851784 0.16917863 0.16710684]] entropy:[1.7914826]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000027.mp4


DEBUG:chainerrl.agents.a3c:t:19212 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.12641825 -0.05155564 -0.02546147 -0.08871753 -0.0892167  -0.04960877]] probs:[[0.15772562 0.16998659 0.17448063 0.1637855  0.16370377 0.17031786]] entropy:[1.7912108]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000027.mp4


DEBUG:chainerrl.agents.a3c:t:19213 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.12570459 -0.02198156 -0.01009515 -0.09684349 -0.09744576 -0.0271378 ]] probs:[[0.15641172 0.17350647 0.17558114 0.16099171 0.16089478 0.17261413]] entropy:[1.7907609]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000027.mp4


DEBUG:chainerrl.agents.a3c:t:19214 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.1257387  -0.01622605 -0.00686341 -0.09841071 -0.09937147 -0.02178273]] probs:[[0.15610516 0.17417186 0.17581023 0.16043003 0.16027597 0.17320673]] entropy:[1.7906358]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000027.mp4


DEBUG:chainerrl.agents.a3c:t:19215 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.12570487 -0.01545017 -0.00658096 -0.09861975 -0.09955717 -0.02108724]] probs:[[0.15607184 0.17426395 0.1758164  0.16035682 0.16020659 0.17328438]] entropy:[1.7906206]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000027.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-1.4384097] v_loss:[[3.0537925e-07]]
DEBUG:chainerrl.agents.a3c:grad norm:0.013138965112871315
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:19216 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.11909367 -0.01575979 -0.01091295 -0.10363373 -0.10517056 -0.02255761]] probs:[[0.15737969 0.17451227 0.17536016 0.15983166 0.15958622 0.17333   ]] entropy:[1.790665]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000027.mp4


DEBUG:chainerrl.agents.a3c:t:19217 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.1168737  -0.01824443 -0.01887905 -0.10013603 -0.09817392 -0.02846958]] probs:[[0.15785824 0.17422137 0.17411083 0.16052265 0.16083792 0.172449  ]] entropy:[1.790873]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000027.mp4


DEBUG:chainerrl.agents.a3c:t:19218 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.10789074 -0.03208625 -0.04778646 -0.08918504 -0.0777231  -0.047437  ]] probs:[[0.15993536 0.17253052 0.16984291 0.16295521 0.16483374 0.16990228]] entropy:[1.7914068]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000027.mp4


DEBUG:chainerrl.agents.a3c:t:19219 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.10111943 -0.04540851 -0.07022391 -0.07677004 -0.05964832 -0.06277944]] probs:[[0.16142713 0.1706756  0.16649234 0.16540603 0.16826245 0.16773643]] entropy:[1.7916126]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000027.mp4


DEBUG:chainerrl.agents.a3c:t:19220 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.09894423 -0.04806454 -0.07527161 -0.07335919 -0.05469191 -0.06634495]] probs:[[0.16180055 0.17024593 0.16567649 0.16599362 0.16912138 0.16716203]] entropy:[1.7916266]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000027.mp4


DEBUG:chainerrl.agents.a3c:t:19221 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.09862123 -0.04847609 -0.07599741 -0.07284388 -0.0539905  -0.06689987]] probs:[[0.16185711 0.17018041 0.16556068 0.1660836  0.16924454 0.16707374]] entropy:[1.7916278]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000027.mp4


DEBUG:chainerrl.agents.a3c:t:19222 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.09740257 -0.04700048 -0.07565175 -0.07163925 -0.0515208  -0.06761511]] probs:[[0.16189174 0.17026055 0.16545159 0.1661168  0.16949266 0.16678663]] entropy:[1.7916228]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000027.mp4


DEBUG:chainerrl.agents.a3c:t:19223 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.09739825 -0.04701016 -0.07566586 -0.07163139 -0.05151113 -0.06762447]] probs:[[0.16189276 0.17025924 0.16544957 0.16611843 0.16949463 0.16678539]] entropy:[1.7916229]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000027.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-1.4369664] v_loss:[[1.6693213e-07]]
DEBUG:chainerrl.agents.a3c:grad norm:0.007244622146944392
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:19224 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.09267393 -0.04760551 -0.07785676 -0.07228496 -0.05394533 -0.06828649]] probs:[[0.16271272 0.17021368 0.16514161 0.16606432 0.16913798 0.16672966]] entropy:[1.7916484]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000027.mp4


DEBUG:chainerrl.agents.a3c:t:19225 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.09267306 -0.04760447 -0.07785636 -0.07228326 -0.05394282 -0.06828701]] probs:[[0.1627127  0.17021368 0.16514152 0.16606444 0.16913822 0.16672939]] entropy:[1.7916484]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000027.mp4


DEBUG:chainerrl.agents.a3c:t:19226 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.09267306 -0.04760423 -0.07785615 -0.07228288 -0.05394248 -0.06828655]] probs:[[0.16271266 0.1702137  0.16514152 0.16606447 0.16913825 0.16672945]] entropy:[1.7916485]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000027.mp4


DEBUG:chainerrl.agents.a3c:t:19227 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.09191209 -0.0466494  -0.07759766 -0.07159667 -0.05244667 -0.06867446]] probs:[[0.16273373 0.17026873 0.16507992 0.16607355 0.1692845  0.16655956]] entropy:[1.7916448]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000027.mp4


DEBUG:chainerrl.agents.a3c:t:19228 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.09191287 -0.04665017 -0.07759779 -0.07159735 -0.05244815 -0.06867389]] probs:[[0.1627337  0.17026871 0.16508    0.16607353 0.16928436 0.16655976]] entropy:[1.7916449]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000027.mp4


DEBUG:chainerrl.agents.a3c:t:19229 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.09191296 -0.04665012 -0.07759775 -0.07159728 -0.0524481  -0.0686738 ]] probs:[[0.16273367 0.17026871 0.16508    0.16607353 0.16928434 0.16655976]] entropy:[1.7916449]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000027.mp4


DEBUG:chainerrl.agents.a3c:t:19230 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.09267485 -0.04760573 -0.07785627 -0.0722841  -0.05394536 -0.0682847 ]] probs:[[0.16271254 0.17021361 0.16514166 0.16606443 0.16913792 0.16672991]] entropy:[1.7916486]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000027.mp4


DEBUG:chainerrl.agents.a3c:t:19231 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.09267327 -0.04760359 -0.07785543 -0.07228185 -0.05394157 -0.06828535]] probs:[[0.16271251 0.17021368 0.16514151 0.1660645  0.16913828 0.16672951]] entropy:[1.7916484]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000027.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-1.4366976] v_loss:[[1.3341933e-07]]
DEBUG:chainerrl.agents.a3c:grad norm:0.005503635888009555
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:19232 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.08760569 -0.10133405 -0.07318549  0.0257879  -0.04351909 -0.11626225]] probs:[[0.1629252  0.1607038  0.16529164 0.18248807 0.1702687  0.15832259]] entropy:[1.7906243]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000027.mp4


DEBUG:chainerrl.agents.a3c:t:19233 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.08750879 -0.10105554 -0.07311884  0.02569502 -0.04346506 -0.11605227]] probs:[[0.16292518 0.16073295 0.16528662 0.18245341 0.17026138 0.15834047]] entropy:[1.7906294]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000027.mp4


DEBUG:chainerrl.agents.a3c:t:19234 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.08750904 -0.10105462 -0.07311866  0.02569353 -0.04346649 -0.11605079]] probs:[[0.16292515 0.16073312 0.16528666 0.18245316 0.17026116 0.1583407 ]] entropy:[1.7906294]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000027.mp4


DEBUG:chainerrl.agents.a3c:t:19235 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.08760945 -0.1013337  -0.07318483  0.02578018 -0.04352931 -0.11625595]] probs:[[0.162925   0.16070426 0.16529217 0.18248713 0.1702674  0.15832399]] entropy:[1.7906244]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000027.mp4


DEBUG:chainerrl.agents.a3c:t:19236 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.08760803 -0.10133789 -0.07318611  0.0257878  -0.04352199 -0.11626346]] probs:[[0.16292512 0.16070347 0.16529185 0.18248838 0.17026852 0.15832269]] entropy:[1.7906243]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000027.mp4


DEBUG:chainerrl.agents.a3c:t:19237 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.08922418 -0.10482616 -0.07540059  0.0283615  -0.04440244 -0.11770093]] probs:[[0.16284038 0.16031948 0.16510706 0.18315928 0.17030522 0.15826862]] entropy:[1.7905406]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000027.mp4


DEBUG:chainerrl.agents.a3c:t:19238 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.08930387 -0.1045855  -0.07525776  0.0283852  -0.04442585 -0.11731505]] probs:[[0.16280939 0.16034031 0.16511236 0.18314335 0.17028238 0.15831219]] entropy:[1.7905447]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000027.mp4


DEBUG:chainerrl.agents.a3c:t:19239 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.08881474 -0.10560613 -0.07539011  0.03011283 -0.04298816 -0.11876464]] probs:[[0.16285215 0.16014047 0.16505311 0.18341847 0.17048874 0.15804705]] entropy:[1.7904929]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000027.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-1.4897883] v_loss:[[3.8766055e-05]]
DEBUG:chainerrl.agents.a3c:grad norm:0.06382952146025123
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:19240 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.08239855 -0.09835208 -0.07183959  0.01167304 -0.05387405 -0.10824099]] probs:[[0.16401911 0.16142319 0.16576016 0.1801977  0.16876505 0.15983476]] entropy:[1.7909625]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000027.mp4


DEBUG:chainerrl.agents.a3c:t:19241 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.08238271 -0.09824736 -0.07170504  0.01198753 -0.05359609 -0.10815145]] probs:[[0.16399553 0.16141433 0.165756   0.18022561 0.16878502 0.15982357]] entropy:[1.7909586]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000027.mp4


DEBUG:chainerrl.agents.a3c:t:19242 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.08241147 -0.09796724 -0.07141461  0.01257011 -0.05314465 -0.10782774]] probs:[[0.16393808 0.16140763 0.16575083 0.18027264 0.16880693 0.1598239 ]] entropy:[1.7909538]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000027.mp4


DEBUG:chainerrl.agents.a3c:t:19243 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.08250566 -0.09763331 -0.07124335  0.01249253 -0.05320378 -0.10736406]] probs:[[0.16390346 0.16144265 0.16575982 0.18023756 0.1687772  0.15987931]] entropy:[1.7909598]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000027.mp4


DEBUG:chainerrl.agents.a3c:t:19244 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.08267322 -0.09654823 -0.07034433  0.01365177 -0.05223866 -0.10602025]] probs:[[0.16373128 0.1614752  0.1657624  0.18028727 0.16879098 0.15995292]] entropy:[1.7909567]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000027.mp4


DEBUG:chainerrl.agents.a3c:t:19245 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.08251955 -0.0966804  -0.07026171  0.01374443 -0.05207606 -0.10582744]] probs:[[0.16374129 0.16143891 0.16576076 0.1802873  0.1688028  0.15996897]] entropy:[1.7909565]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000027.mp4


DEBUG:chainerrl.agents.a3c:t:19246 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.08282542 -0.09543232 -0.06949338  0.01478125 -0.05134325 -0.10478538]] probs:[[0.16356747 0.16151834 0.16576277 0.18033789 0.16879885 0.16001469]] entropy:[1.7909535]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000027.mp4


DEBUG:chainerrl.agents.a3c:t:19247 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.08285666 -0.09508527 -0.06921663  0.01516528 -0.05103575 -0.10438313]] probs:[[0.16351618 0.16152878 0.16576183 0.18035622 0.1688031  0.16003387]] entropy:[1.7909523]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000027.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-1.4927021] v_loss:[[4.3688633e-05]]
DEBUG:chainerrl.agents.a3c:grad norm:0.05826840154861615
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:19248 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.07990733 -0.0874838  -0.07064918  0.00754656 -0.05332961 -0.10355725]] probs:[[0.16402298 0.16278496 0.16554858 0.17901336 0.16844077 0.16018936]] entropy:[1.7911056]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000027.mp4


DEBUG:chainerrl.agents.a3c:t:19249 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.07990208 -0.08746927 -0.07064224  0.00753521 -0.05332085 -0.10355004]] probs:[[0.16402303 0.16278651 0.1655489  0.17901044 0.16844143 0.16018972]] entropy:[1.7911059]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000027.mp4


DEBUG:chainerrl.agents.a3c:t:19250 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.07974273 -0.08774339 -0.07064863  0.00757305 -0.0532461  -0.10348447]] probs:[[0.16404746 0.16274022 0.16554613 0.17901535 0.16845226 0.16019857]] entropy:[1.7911052]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000027.mp4


DEBUG:chainerrl.agents.a3c:t:19251 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.07991029 -0.08753181 -0.07066508  0.00757509 -0.05334502 -0.10356961]] probs:[[0.1640242  0.16277882 0.16554765 0.17902032 0.16843994 0.16018903]] entropy:[1.791105]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000027.mp4


DEBUG:chainerrl.agents.a3c:t:19252 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.07988009 -0.08757195 -0.07066286  0.0075757  -0.05332791 -0.10355442]] probs:[[0.16402847 0.16277163 0.16554734 0.17901969 0.16844212 0.16019082]] entropy:[1.791105]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000027.mp4


DEBUG:chainerrl.agents.a3c:t:19253 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.07991849 -0.08752291 -0.07066622  0.00757678 -0.05334979 -0.10357422]] probs:[[0.16402309 0.16278051 0.16554771 0.17902087 0.16843937 0.16018853]] entropy:[1.7911052]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000027.mp4


DEBUG:chainerrl.agents.a3c:t:19254 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.079918   -0.08752374 -0.07066615  0.00757688 -0.05334949 -0.10357393]] probs:[[0.16402313 0.16278034 0.16554768 0.17902085 0.16843939 0.16018854]] entropy:[1.7911049]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000027.mp4


DEBUG:chainerrl.agents.a3c:t:19255 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.07993607 -0.08754141 -0.07067525  0.00761351 -0.05329863 -0.10362501]] probs:[[0.16402024 0.16277754 0.16554625 0.17902748 0.16844803 0.16018043]] entropy:[1.7911041]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000027.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-1.5572455] v_loss:[[0.00017017]]
DEBUG:chainerrl.agents.a3c:grad norm:0.25275751553191955
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:19256 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.07124379 -0.07913032 -0.08240253  0.00253883 -0.04697122 -0.10882655]] probs:[[0.16541763 0.16411819 0.16358204 0.17808412 0.16948186 0.15931615]] entropy:[1.7911359]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000027.mp4


DEBUG:chainerrl.agents.a3c:t:19257 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.07132705 -0.07907502 -0.08240882  0.00244373 -0.04698621 -0.10877877]] probs:[[0.16540676 0.16413015 0.16358387 0.1780703  0.1694823  0.15932657]] entropy:[1.7911375]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000027.mp4


DEBUG:chainerrl.agents.a3c:t:19258 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.07136201 -0.07906346 -0.0824149   0.00242212 -0.04697109 -0.10877908]] probs:[[0.16540201 0.16413307 0.16358392 0.17806756 0.16948593 0.1593275 ]] entropy:[1.7911373]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000027.mp4


DEBUG:chainerrl.agents.a3c:t:19259 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.07136307 -0.07906836 -0.08241639  0.00242891 -0.04696218 -0.10878745]] probs:[[0.16540182 0.16413224 0.16358364 0.17806876 0.1694874  0.15932615]] entropy:[1.7911375]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000027.mp4


DEBUG:chainerrl.agents.a3c:t:19260 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.07108077 -0.07974002 -0.08251382  0.00359132 -0.04595505 -0.1098464 ]] probs:[[0.16542704 0.16400075 0.16354647 0.17825273 0.16963618 0.15913686]] entropy:[1.7911114]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000027.mp4


DEBUG:chainerrl.agents.a3c:t:19261 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.07127542 -0.07928146 -0.08245595  0.00278145 -0.04665332 -0.10911886]] probs:[[0.16541046 0.16409145 0.16357137 0.17812523 0.16953376 0.15926771]] entropy:[1.7911295]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000027.mp4


DEBUG:chainerrl.agents.a3c:t:19262 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.07120819 -0.07944927 -0.082477    0.00307286 -0.04639969 -0.10938285]] probs:[[0.1654161  0.1640585  0.16356254 0.17817126 0.16957116 0.15922043]] entropy:[1.7911229]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000027.mp4


DEBUG:chainerrl.agents.a3c:t:19263 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.07130069 -0.07924282 -0.08244959  0.00270793 -0.04671078 -0.10905596]] probs:[[0.16540787 0.16409938 0.163574   0.17811385 0.16952565 0.15927927]] entropy:[1.7911309]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000027.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-1.4985558] v_loss:[[4.7085497e-05]]
DEBUG:chainerrl.agents.a3c:grad norm:0.1350749813188471
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:19264 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.06788416 -0.0715761  -0.08732244 -0.00721536 -0.05473469 -0.09796467]] probs:[[0.16602428 0.16541247 0.16282822 0.1764086  0.16822185 0.16110456]] entropy:[1.7913285]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000027.mp4


DEBUG:chainerrl.agents.a3c:t:19265 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.0678857  -0.07157265 -0.08732188 -0.00722079 -0.05473919 -0.09795972]] probs:[[0.16602413 0.16541313 0.16282842 0.17640774 0.16822118 0.16110545]] entropy:[1.7913287]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000027.mp4


DEBUG:chainerrl.agents.a3c:t:19266 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.0678906  -0.07156021 -0.08732001 -0.00724097 -0.05475678 -0.09794115]] probs:[[0.16602364 0.16541553 0.16282904 0.17640454 0.16821855 0.16110876]] entropy:[1.791329]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000027.mp4


DEBUG:chainerrl.agents.a3c:t:19267 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.06789081 -0.07156017 -0.08731975 -0.00724103 -0.05475678 -0.0979409 ]] probs:[[0.16602358 0.1654155  0.16282906 0.17640449 0.16821852 0.16110876]] entropy:[1.791329]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000027.mp4


DEBUG:chainerrl.agents.a3c:t:19268 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.06789082 -0.07156017 -0.08731975 -0.00724105 -0.05475678 -0.0979409 ]] probs:[[0.16602358 0.1654155  0.16282906 0.17640449 0.16821852 0.16110878]] entropy:[1.791329]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000027.mp4


DEBUG:chainerrl.agents.a3c:t:19269 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.0678893  -0.07156033 -0.08732136 -0.00724066 -0.0547566  -0.09794285]] probs:[[0.1660239  0.16541554 0.16282886 0.17640463 0.16821861 0.16110852]] entropy:[1.791329]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000027.mp4


DEBUG:chainerrl.agents.a3c:t:19270 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.06789099 -0.07156014 -0.08731957 -0.00724112 -0.05475678 -0.09794071]] probs:[[0.16602357 0.16541553 0.16282909 0.17640449 0.16821854 0.16110882]] entropy:[1.791329]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000027.mp4


DEBUG:chainerrl.agents.a3c:t:19271 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.06789094 -0.07156014 -0.08731952 -0.00724117 -0.05475685 -0.09794071]] probs:[[0.16602357 0.16541551 0.1628291  0.17640448 0.16821852 0.1611088 ]] entropy:[1.791329]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000027.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-1.5086279] v_loss:[[6.236445e-05]]
DEBUG:chainerrl.agents.a3c:grad norm:0.12712509562757152
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:19272 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.06655144 -0.06993542 -0.08335166 -0.01911147 -0.05759403 -0.09040638]] probs:[[0.16627951 0.16571778 0.16350931 0.1743579  0.16777563 0.16235985]] entropy:[1.7914917]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000027.mp4


DEBUG:chainerrl.agents.a3c:t:19273 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.06655239 -0.06993394 -0.08334825 -0.01911315 -0.05759689 -0.09040428]] probs:[[0.16627932 0.16571799 0.16350983 0.17435758 0.16777512 0.16236016]] entropy:[1.7914919]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000027.mp4


DEBUG:chainerrl.agents.a3c:t:19274 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.06655944 -0.06992278 -0.08332291 -0.0191257  -0.05761833 -0.09038819]] probs:[[0.16627787 0.16571957 0.16351372 0.1743551  0.16777125 0.16236252]] entropy:[1.791492]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000027.mp4


DEBUG:chainerrl.agents.a3c:t:19275 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.06660282 -0.06984317 -0.08312995 -0.0192235  -0.05777113 -0.09026907]] probs:[[0.16626829 0.16573039 0.16354294 0.17433557 0.16774322 0.16237955]] entropy:[1.791494]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000027.mp4


DEBUG:chainerrl.agents.a3c:t:19276 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.06688891 -0.06919555 -0.08172057 -0.01999304 -0.05883237 -0.0893923 ]] probs:[[0.16620064 0.16581772 0.1637538  0.1741804  0.16754505 0.16250233]] entropy:[1.7915095]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000027.mp4


DEBUG:chainerrl.agents.a3c:t:19277 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.06832526 -0.06540538 -0.07346958 -0.02462846 -0.06551461 -0.08336487]] probs:[[0.16582796 0.16631286 0.16497707 0.17323475 0.1662947  0.16335262]] entropy:[1.791587]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000027.mp4


DEBUG:chainerrl.agents.a3c:t:19278 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.07584322 -0.05073041 -0.04387778 -0.03671196 -0.08992188 -0.06504627]] probs:[[0.1640771  0.16824971 0.16940662 0.17062493 0.16178331 0.16585824]] entropy:[1.7915883]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000027.mp4


DEBUG:chainerrl.agents.a3c:t:19279 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.08089503 -0.04007744 -0.0209263  -0.04490841 -0.10613987 -0.04996628]] probs:[[0.16269182 0.1694699  0.17274672 0.16865316 0.1586361  0.16780229]] entropy:[1.7913663]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000027.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-1.4468827] v_loss:[[2.7962942e-06]]
DEBUG:chainerrl.agents.a3c:grad norm:0.007861313324066851
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:19280 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.08294639 -0.03214993 -0.01420242 -0.05159329 -0.11261214 -0.04488971]] probs:[[0.16221443 0.17066722 0.17375791 0.16738091 0.1574729  0.16850674]] entropy:[1.7912332]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000027.mp4


DEBUG:chainerrl.agents.a3c:t:19281 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.08320042 -0.0318371  -0.01345202 -0.05183561 -0.11312169 -0.04434976]] probs:[[0.1621549  0.17070132 0.1738687  0.16732146 0.15737489 0.16857871]] entropy:[1.7912197]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000027.mp4


DEBUG:chainerrl.agents.a3c:t:19282 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.08323555 -0.03179467 -0.01334962 -0.0518696  -0.11319126 -0.04427711]] probs:[[0.16214678 0.17070602 0.17388391 0.16731328 0.1573616  0.16858844]] entropy:[1.7912179]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000027.mp4


DEBUG:chainerrl.agents.a3c:t:19283 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.08324087 -0.03178892 -0.01333613 -0.0518747  -0.11320016 -0.0442677 ]] probs:[[0.16214561 0.17070667 0.17388593 0.1673121  0.1573599  0.1685897 ]] entropy:[1.7912177]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000027.mp4


DEBUG:chainerrl.agents.a3c:t:19284 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.08324167 -0.03178811 -0.01333426 -0.05187533 -0.11320136 -0.04426658]] probs:[[0.16214547 0.1707068  0.17388624 0.16731198 0.15735967 0.16858988]] entropy:[1.7912178]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000027.mp4


DEBUG:chainerrl.agents.a3c:t:19285 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.08324963 -0.03178627 -0.01333763 -0.05187634 -0.11319588 -0.04426977]] probs:[[0.16214438 0.17070733 0.17388588 0.16731203 0.15736075 0.16858955]] entropy:[1.7912176]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000027.mp4


DEBUG:chainerrl.agents.a3c:t:19286 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.08325559 -0.03178495 -0.01334037 -0.05187691 -0.11319165 -0.04427245]] probs:[[0.1621436  0.17070776 0.1738856  0.16731212 0.1573616  0.1685893 ]] entropy:[1.7912178]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000027.mp4


DEBUG:chainerrl.agents.a3c:t:19287 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.08325724 -0.03178458 -0.01334122 -0.0518771  -0.11319047 -0.04427344]] probs:[[0.16214338 0.17070787 0.1738855  0.16731215 0.15736184 0.16858917]] entropy:[1.7912176]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000027.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-1.4701505] v_loss:[[1.7110327e-05]]
DEBUG:chainerrl.agents.a3c:grad norm:0.0347814079823376
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:19288 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.08298077 -0.03146319 -0.01688929 -0.05191417 -0.10519984 -0.04832457]] probs:[[0.16217874 0.17075276 0.17325951 0.16729616 0.15861502 0.16789776]] entropy:[1.7913166]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000027.mp4


DEBUG:chainerrl.agents.a3c:t:19289 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.08298228 -0.03146289 -0.01689032 -0.05191423 -0.10519877 -0.04832579]] probs:[[0.16217856 0.17075288 0.1732594  0.16729622 0.15861526 0.16789764]] entropy:[1.7913166]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000027.mp4


DEBUG:chainerrl.agents.a3c:t:19290 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.08298472 -0.03146245 -0.01689262 -0.05191432 -0.1051966  -0.04832865]] probs:[[0.16217831 0.1707531  0.17325917 0.16729636 0.15861574 0.16789731]] entropy:[1.7913167]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000027.mp4


DEBUG:chainerrl.agents.a3c:t:19291 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.08298459 -0.03146299 -0.01689999 -0.05191265 -0.10519445 -0.04834081]] probs:[[0.16217878 0.1707535  0.17325836 0.16729711 0.15861653 0.16789573]] entropy:[1.791317]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000027.mp4


DEBUG:chainerrl.agents.a3c:t:19292 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.08298581 -0.03146261 -0.01689928 -0.05191307 -0.1051939  -0.04833933]] probs:[[0.16217855 0.17075351 0.17325845 0.16729699 0.15861657 0.16789593]] entropy:[1.7913169]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000027.mp4


DEBUG:chainerrl.agents.a3c:t:19293 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.08298651 -0.03146245 -0.01689993 -0.05191306 -0.10519329 -0.04834008]] probs:[[0.16217846 0.17075357 0.17325836 0.16729702 0.15861669 0.16789584]] entropy:[1.7913166]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000027.mp4


DEBUG:chainerrl.agents.a3c:t:19294 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.08299064 -0.03146101 -0.01689499 -0.05191494 -0.10519242 -0.04833077]] probs:[[0.1621775  0.17075351 0.17325892 0.1672964  0.15861654 0.1678971 ]] entropy:[1.7913167]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000027.mp4


DEBUG:chainerrl.agents.a3c:t:19295 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.08299159 -0.03146123 -0.01690128 -0.05191379 -0.10519007 -0.04834073]] probs:[[0.16217774 0.1707539  0.17325824 0.167297   0.15861732 0.16789584]] entropy:[1.7913169]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000027.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-1.4653413] v_loss:[[1.2731993e-05]]
DEBUG:chainerrl.agents.a3c:grad norm:0.03337608090853719
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:19296 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.07799223 -0.03412428 -0.02257637 -0.05196401 -0.09780852 -0.0511692 ]] probs:[[0.162979   0.1702877  0.17226556 0.16727674 0.15978116 0.16740975]] entropy:[1.7914394]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000027.mp4


DEBUG:chainerrl.agents.a3c:t:19297 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.07799405 -0.03412384 -0.02257743 -0.05196416 -0.09780721 -0.05117034]] probs:[[0.1629788  0.17028786 0.17226548 0.16727681 0.15978146 0.16740966]] entropy:[1.7914393]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000027.mp4


DEBUG:chainerrl.agents.a3c:t:19298 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.07847923 -0.03124601 -0.02086857 -0.04757695 -0.09556451 -0.0487566 ]] probs:[[0.16254131 0.17040287 0.17218041 0.16764262 0.15978783 0.16744497]] entropy:[1.7914275]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000027.mp4


DEBUG:chainerrl.agents.a3c:t:19299 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.07879096 -0.0306114  -0.01973498 -0.04704275 -0.09476793 -0.04774126]] probs:[[0.16238676 0.17040202 0.17226548 0.16762497 0.15981293 0.1675079 ]] entropy:[1.7914219]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000027.mp4


DEBUG:chainerrl.agents.a3c:t:19300 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.08002976 -0.02691197 -0.01606027 -0.04204466 -0.09132141 -0.04342488]] probs:[[0.16167144 0.17049125 0.17235143 0.16793068 0.15985619 0.16769905]] entropy:[1.7913953]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000027.mp4


DEBUG:chainerrl.agents.a3c:t:19301 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.08012339 -0.02665693 -0.01577048 -0.04170504 -0.09107368 -0.04308229]] probs:[[0.16161874 0.1704951  0.17236133 0.16794868 0.15985861 0.16771753]] entropy:[1.7913932]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000027.mp4


DEBUG:chainerrl.agents.a3c:t:19302 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.08166058 -0.0216774  -0.00995555 -0.03515641 -0.08598828 -0.03644085]] probs:[[0.16062455 0.17055413 0.1725651  0.16827066 0.1599309  0.16805466]] entropy:[1.7913492]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000027.mp4


DEBUG:chainerrl.agents.a3c:t:19303 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.08199851 -0.02035339 -0.00864778 -0.03324634 -0.08459009 -0.03492328]] probs:[[0.1603781  0.17057571 0.17258413 0.1683906  0.15996301 0.16810846]] entropy:[1.7913382]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000027.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-1.4614724] v_loss:[[9.437022e-06]]
DEBUG:chainerrl.agents.a3c:grad norm:0.07607431508788895
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:19304 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.07876784 -0.02503163 -0.01295775 -0.0343029  -0.07778979 -0.03458795]] probs:[[0.16090502 0.16978797 0.17185038 0.16822109 0.16106246 0.16817315]] entropy:[1.7914407]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000027.mp4


DEBUG:chainerrl.agents.a3c:t:19305 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.07878564 -0.02502769 -0.01279591 -0.03439751 -0.07775616 -0.03442774]] probs:[[0.16089536 0.16978149 0.17187096 0.1681981  0.1610611  0.168193  ]] entropy:[1.7914395]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000027.mp4


DEBUG:chainerrl.agents.a3c:t:19306 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.07881843 -0.02492726 -0.01250793 -0.03437626 -0.07770368 -0.03403482]] probs:[[0.16086768 0.16977488 0.1718965  0.16817823 0.1610471  0.16823564]] entropy:[1.7914374]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000027.mp4


DEBUG:chainerrl.agents.a3c:t:19307 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.07880937 -0.02496712 -0.01260532 -0.03439589 -0.07771371 -0.03418097]] probs:[[0.16087742 0.16977686 0.17188863 0.1681836  0.16105378 0.16821975]] entropy:[1.7914383]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000027.mp4


DEBUG:chainerrl.agents.a3c:t:19308 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.07815234 -0.0259911  -0.01499429 -0.03494368 -0.07780197 -0.03779186]] probs:[[0.16117509 0.1698053  0.17168292 0.16829188 0.16123156 0.16781324]] entropy:[1.7914624]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000027.mp4


DEBUG:chainerrl.agents.a3c:t:19309 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.0779421  -0.02613132 -0.01532748 -0.03497782 -0.07775796 -0.03838576]] probs:[[0.16123243 0.16980618 0.1716507  0.16831063 0.16126212 0.167738  ]] entropy:[1.7914667]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000027.mp4


DEBUG:chainerrl.agents.a3c:t:19310 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.0779083  -0.02615376 -0.01538091 -0.03498334 -0.07775088 -0.03848086]] probs:[[0.16124162 0.16980633 0.1716455  0.16831361 0.161267   0.16772595]] entropy:[1.7914673]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000027.mp4


DEBUG:chainerrl.agents.a3c:t:19311 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.0779024  -0.02615768 -0.01539017 -0.03498435 -0.07774975 -0.03849748]] probs:[[0.16124322 0.16980635 0.17164461 0.16831411 0.16126783 0.16772383]] entropy:[1.7914674]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000027.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-1.7961811] v_loss:[[0.00130522]]
DEBUG:chainerrl.agents.a3c:grad norm:9.30474201219987
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:19312 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.08486766 -0.02083276 -0.03966012 -0.0189922  -0.06920422 -0.03594524]] probs:[[0.16009237 0.17067921 0.16749583 0.17099364 0.1626197  0.16811922]] entropy:[1.7914666]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000027.mp4


DEBUG:chainerrl.agents.a3c:t:19313 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.08486751 -0.0208328  -0.03966031 -0.01899226 -0.06920416 -0.03594565]] probs:[[0.1600924  0.17067923 0.16749582 0.17099366 0.16261972 0.16811916]] entropy:[1.7914665]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000027.mp4


DEBUG:chainerrl.agents.a3c:t:19314 r:0.0 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.08486754 -0.02083275 -0.03966036 -0.01899225 -0.06920417 -0.03594567]] probs:[[0.1600924  0.17067923 0.1674958  0.17099366 0.16261972 0.16811916]] entropy:[1.7914665]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000027.mp4


DEBUG:chainerrl.agents.a3c:t:19315 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.08486754 -0.02083278 -0.03966036 -0.01899227 -0.06920417 -0.03594566]] probs:[[0.1600924  0.17067923 0.16749582 0.17099366 0.16261972 0.16811916]] entropy:[1.7914665]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000027.mp4


DEBUG:chainerrl.agents.a3c:t:19316 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.08486751 -0.0208328  -0.03966034 -0.01899224 -0.06920423 -0.03594566]] probs:[[0.1600924  0.17067923 0.16749582 0.17099366 0.16261972 0.16811916]] entropy:[1.7914665]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000027.mp4


DEBUG:chainerrl.agents.a3c:t:19317 r:0.0 a:1 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.08486752 -0.0208328  -0.03966032 -0.01899226 -0.06920417 -0.03594564]] probs:[[0.1600924  0.17067923 0.16749582 0.17099366 0.16261972 0.16811916]] entropy:[1.7914665]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000027.mp4


DEBUG:chainerrl.agents.a3c:t:19318 r:0.0 a:5 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.08487071 -0.02083191 -0.03966348 -0.01899117 -0.06920197 -0.03594833]] probs:[[0.16009203 0.17067951 0.16749541 0.17099397 0.16262022 0.16811885]] entropy:[1.7914667]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000027.mp4


DEBUG:chainerrl.agents.a3c:t:19319 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.08487171 -0.02083195 -0.03966292 -0.01899235 -0.06920121 -0.03594826]] probs:[[0.16009188 0.17067952 0.16749553 0.17099379 0.16262037 0.16811888]] entropy:[1.7914667]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000027.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-1.4253368] v_loss:[[9.99485e-07]]
DEBUG:chainerrl.agents.a3c:grad norm:0.010331227007940268
DEBUG:chainerrl.agents.a3c:update
DEBUG:chainerrl.agents.a3c:t:19320 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.08142922 -0.0220579  -0.04294021 -0.02061858 -0.06473914 -0.03711202]] probs:[[0.16063611 0.1704621  0.16693936 0.17070763 0.16333964 0.16791515]] entropy:[1.7915192]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000027.mp4


DEBUG:chainerrl.agents.a3c:t:19321 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.08143949 -0.02205834 -0.04293666 -0.0206291  -0.0647312  -0.03711121]] probs:[[0.1606347  0.17046227 0.1669402  0.17070608 0.16334118 0.16791554]] entropy:[1.7915193]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000027.mp4


DEBUG:chainerrl.agents.a3c:t:19322 r:0.0 a:0 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.08144695 -0.02205853 -0.04293451 -0.02063622 -0.06472531 -0.03711106]] probs:[[0.16063367 0.17046241 0.16694075 0.17070505 0.16334233 0.16791575]] entropy:[1.7915192]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000027.mp4


DEBUG:chainerrl.agents.a3c:t:19323 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.08145618 -0.02205754 -0.04293677 -0.02064003 -0.06471854 -0.03711412]] probs:[[0.16063249 0.1704629  0.16694067 0.17070472 0.16334373 0.16791555]] entropy:[1.7915192]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000027.mp4


DEBUG:chainerrl.agents.a3c:t:19324 r:0.05 a:2 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.08747166 -0.02133077 -0.04589841 -0.02179322 -0.06015584 -0.04032045]] probs:[[0.1598802  0.17081237 0.16666704 0.17073339 0.16430765 0.1675993 ]] entropy:[1.7914996]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000027.mp4


DEBUG:chainerrl.agents.a3c:t:19325 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.08796529 -0.02132439 -0.0459927  -0.02200073 -0.05971745 -0.04050612]] probs:[[0.15981539 0.17082852 0.16666602 0.17071302 0.1643942  0.16758296]] entropy:[1.7914984]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000027.mp4


DEBUG:chainerrl.agents.a3c:t:19326 r:0.0 a:4 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.0908429  -0.02117709 -0.04723999 -0.02257232 -0.05711843 -0.04209162]] probs:[[0.15944815 0.1709523  0.16655436 0.17071396 0.16491716 0.16741405]] entropy:[1.7914864]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000027.mp4


DEBUG:chainerrl.agents.a3c:t:19327 r:0.0 a:3 pout:SoftmaxDistribution(beta=1.0, min_prob=0.0) logits:[[-0.09130958 -0.021124   -0.04760187 -0.02251048 -0.05668168 -0.04248112]] probs:[[0.159391   0.17097987 0.16651212 0.17074299 0.16500705 0.16736695]] entropy:[1.7914835]


DEBUG: Capturing video frame: path=/home/yashima/ML/RL/chainer_book/A3C/result/openaigym.video.0.25760.video000027.mp4


DEBUG:chainerrl.agents.a3c:pi_loss:[-1.0878015] v_loss:[[0.00229733]]
