# 벽돌 깨기 게임
- 중요한 포인트 4가지
    1. No-operation ==> Breakout 실행 환경을 새로 시작했을 때, 이후 30단계는 아무행동도 취하지 않는다. 이유는 게임을 시작한 후 초기 상태를 다양하게 만들어 특정한 상태에만 특화된 학습이 일어나지 않도록 하기 위해서이다.
    2. Episodic Life ==> 이 게임은 목숨이 5개이다. 게임 중에 실패를 허용하는 것은 학습하기 어려우므로 한번만 실패해도 게임이 종료되도록 한다. 또한 매번 게임을 완전히 다시 시작하면 초기 상태만 학습이 되므로 다양한 상태를 학습할 수 없다. 따라서 목숨이 날아가더라도 초기화되신 블록이 부서진 채로 다시 시작하게 된다.
    3. Max and Skip ==> 이 게임은 60Hz로 진행된다. 속도가 너무 빠르기 때문에 4프레임마다 행동을 판단하고 행동은 4프레임 동안 지속시킨다. 따라서 에이전트의 행동은 15Hz가 된다. 아타리 게임 중에서는 홀수 프레임과 짝수 프레임에 출력되는 내용이 서로 다른 게임이 있으므로 여기서도 3번째 프레임과 4번째 프레임 이미지의 최댓 값으로 만든 이미지를 판단에 사용한다.
    4. Wrap frame ==> 이 게임의 이미지는 (210, 160, 3)으로 되어 있다. 이 이미지를 논문의 구현에 맞춰 가로세로 84픽셀 그레이스케일로 변환한다.    

In [83]:
import numpy as np
from collections import deque
from tqdm import tqdm

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

import gym
from gym import spaces
from gym.spaces.box import Box

In [84]:
import cv2
cv2.ocl.setUseOpenCL(False)

class NoopResetEnv(gym.Wrapper):
    def __init__(self, env, noop_max=30):
        '''첫 번째 트릭 No-Operation. 초기화 후 일정 단계에 이를때까지 아무 행동도 하지않고
        게임 초기 상태를 다양하게 하여 특정 시작 상태만 학습하는 것을 방지한다'''

        gym.Wrapper.__init__(self, env)
        self.noop_max = noop_max
        self.override_num_noops = None
        self.noop_action = 0
        assert env.unwrapped.get_action_meanings()[0] == 'NOOP'

    def reset(self, **kwargs):
        """ Do no-op action for a number of steps in [1, noop_max]."""
        self.env.reset(**kwargs)
        if self.override_num_noops is not None:
            noops = self.override_num_noops
        else:
            noops = self.unwrapped.np_random.randint(
                1, self.noop_max + 1)  # pylint: disable=E1101
        assert noops > 0
        obs = None
        for _ in range(noops):
            obs, _, done, _ = self.env.step(self.noop_action)
            if done:
                obs = self.env.reset(**kwargs)
        return obs

    def step(self, ac):
        return self.env.step(ac)


class EpisodicLifeEnv(gym.Wrapper):
    def __init__(self, env):
        '''두 번째 트릭 Episodic Life. 한번 실패를 게임 종료로 간주하나, 다음 게임을 같은 블록 상태로 시작'''
        gym.Wrapper.__init__(self, env)
        self.lives = 0
        self.was_real_done = True

    def step(self, action):
        obs, reward, done, info = self.env.step(action)
        self.was_real_done = done
        # check current lives, make loss of life terminal,
        # then update lives to handle bonus lives
        lives = self.env.unwrapped.ale.lives()
        if lives < self.lives and lives > 0:
            # for Qbert sometimes we stay in lives == 0 condtion for a few frames
            # so its important to keep lives > 0, so that we only reset once
            # the environment advertises done.
            done = True
        self.lives = lives
        return obs, reward, done, info

    def reset(self, **kwargs):
        '''5번 실패하면 게임을 완전히 다시 시작'''
        if self.was_real_done:
            obs = self.env.reset(**kwargs)
        else:
            # no-op step to advance from terminal/lost life state
            obs, _, _, _ = self.env.step(0)
        self.lives = self.env.unwrapped.ale.lives()
        return obs


class MaxAndSkipEnv(gym.Wrapper):
    def __init__(self, env, skip=4):
        '''세 번째 트릭 Max and Skip. 4프레임 동안 같은 행동을 지속하되, 3번째와 4번째 프레임의 최댓값 이미지를 관측 obs로 삼는다'''
        gym.Wrapper.__init__(self, env)
        # most recent raw observations (for max pooling across time steps)
        self._obs_buffer = np.zeros(
            (2,)+env.observation_space.shape, dtype=np.uint8)
        self._skip = skip

    def step(self, action):
        """Repeat action, sum reward, and max over last observations."""
        total_reward = 0.0
        done = None
        for i in range(self._skip):
            obs, reward, done, info = self.env.step(action)
            if i == self._skip - 2:
                self._obs_buffer[0] = obs
            if i == self._skip - 1:
                self._obs_buffer[1] = obs
            total_reward += reward
            if done:
                break
        # Note that the observation on the done=True frame
        # doesn't matter
        max_frame = self._obs_buffer.max(axis=0)

        return max_frame, total_reward, done, info

    def reset(self, **kwargs):
        return self.env.reset(**kwargs)


class WarpFrame(gym.ObservationWrapper):
    def __init__(self, env):
        '''네 번째 트릭 Warp frame. DQN 네이처 논문 구현과 같이 84*84 흑백 이미지를 사용'''
        gym.ObservationWrapper.__init__(self, env)
        self.width = 84
        self.height = 84
        self.observation_space = spaces.Box(low=0, high=255,
                                            shape=(self.height, self.width, 1), dtype=np.uint8)

    def observation(self, frame):
        frame = cv2.cvtColor(frame, cv2.COLOR_RGB2GRAY)
        frame = cv2.resize(frame, (self.width, self.height),
                           interpolation=cv2.INTER_AREA)
        return frame[:, :, None]


class WrapPyTorch(gym.ObservationWrapper):
    def __init__(self, env=None):
        '''인덱스 순서를 파이토치 미니배치와 같이 조정하는 래퍼'''
        super(WrapPyTorch, self).__init__(env)
        obs_shape = self.observation_space.shape
        self.observation_space = Box(
            self.observation_space.low[0, 0, 0],
            self.observation_space.high[0, 0, 0],
            [obs_shape[2], obs_shape[1], obs_shape[0]],
            dtype=self.observation_space.dtype)

    def observation(self, observation):
        return observation.transpose(2, 0, 1)

In [85]:
# 실행환경 생성함수
# 병렬 실행환경
from stable_baselines3.common.vec_env import DummyVecEnv
def make_env(env_id, seed, rank):
    def _thunk():
        env = gym.make(env_id)
        env = NoopResetEnv(env, noop_max=30)
        env = MaxAndSkipEnv(env, skip=4)
        env.seed(seed + rank) # 난수 시드 설정
        env = EpisodicLifeEnv(env)
        env = WarpFrame(env)
        env = WrapPyTorch(env)
        return env
    return _thunk

In [86]:
ENV_NAME = "BreakoutNoFrameskip-v4"
# v0는 2~4개 프레임을 자동으로 생략하므로 이 기능이 없는 버전을 사용한다.
NUM_SKIP_FRAME = 4 # 생략할 프레임 수
NUM_STACK_FRAME = 4 # 하나의 상태로 사용할 프레임 수
NOOP_MAX = 30 # 초기화후 No-operation을 적용할 최초 프레임 수의 최댓값
NUM_PROCESSES = 16 # 병렬로 실행할 프로세스 수
NUM_ADVANCED_STEP = 5 # Advanced 학습할 단계 수
GAMMA = 0.99 # 시간 학습률

TOTAL_FRAMES = 10e6 # 학습에 사용되는 프레임 수
NUM_UPDATES = int(TOTAL_FRAMES / NUM_ADVANCED_STEP / NUM_PROCESSES) # 신경망의 수정 총 횟수, 약 125,000이 됨

# A2C 손실 함수를 계산하기 위한 상수
value_loss_coef = 0.5
entropy_coef = 0.01
max_grad_norm = 0.5

# 최적화 기법 RMSprop에 대한 설정
lr = 7e-4
eps = 1e-5
alpha = 0.99

In [87]:
use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")
print(device)

cuda


In [88]:
# 메모리 클래스 정의
class RolloutStorage(object):
    def __init__(self, num_steps, num_processes, obs_shape):
        self.observations = torch.zeros(num_steps + 1, num_processes, *obs_shape).to(device)
        self.masks = torch.ones(num_steps + 1, num_processes, 1).to(device) # 해당 단계가 에피소드의 끝인지 여부를 표시하기 위한 변수 다음 단계가 존재하면 1 아니면 0값을 가짐
        self.rewards = torch.zeros(num_steps, num_processes, 1).to(device)
        self.actions = torch.zeros(num_steps, num_processes, 1).long().to(device) # long() 형으로 변환
        # 할인 총 보상 저장
        self.returns = torch.zeros(num_steps + 1, num_processes, 1).to(device)
        self.index = 0 # insert할 인덱스
        
    def insert(self, current_obs, action, reward, mask):
        # 이 함수에서 아래의 코드는 각 단계에서 얻은 값들을 복사하여 저장을 한다.
        # 예를 들어 actions의 shape : (num_steps, num_processes, 1)인데 이를 (index, num_processes, 1)처럼 접근하여 저장을 하겠다는 의미이다.
        self.observations[self.index + 1].copy_(current_obs) # 현재 상태에서 어떤 행동을 취했을 때 다음 상태 저장
        self.masks[self.index + 1].copy_(mask) # 에피소드가 끝났는지 확인
        self.rewards[self.index].copy_(reward) # 현재 상태에서 얻음 보상
        self.actions[self.index].copy_(action) # 현재 상태에서 취한 행동
        
        self.index = (self.index + 1) % NUM_ADVANCED_STEP
        
    def after_update(self): 
        # Advantage만큼 학습 단계가 진행되면 가장 새로운 transition을 index0에 저장
        self.observations[0].copy_(self.observations[-1]) # (5, 1, 2, 3, 4)
        self.masks[0].copy_(self.masks[-1]) # (5, 1, 2, 3, 4)
        
    def compute_returns(self, next_value):
        # 주의 : 5번쩨 단계 부터 거슬러 올라오며 계산
        # 주의 : 5번째 단계가 Advantage1, 4번째 단계는 Advantage2가 됨
        self.returns[-1] = next_value
        for ad_step in reversed(range(self.rewards.size(0))):
            self.returns[ad_step] = self.returns[ad_step + 1] * GAMMA * self.masks[ad_step + 1] + self.rewards[ad_step]

In [105]:
# A2C 신경망 구성
def init(module, gain):
    # 정확한 해를 찾기 위해 Orthogonal Matrix를 생성한다.
    # Orthogonal Matrix는 자기자신 행렬과 전치행렬을 곱했을 때 단위 행렬이 되는 행렬을 의미한다.
    nn.init.orthogonal_(module.weight.data, gain=gain)
    nn.init.constant_(module.bias.data, 0)
    return module

class Flatten(nn.Module):
    '''합성곱층의 출력 이미지를 1차원으로 변환하는 층'''
    def forward(self, x):
        return x.reshape(x.size(0), -1)

class Net(nn.Module):
    def __init__(self, n_out):
        super(Net, self).__init__()
        
        # 결합 가중치 초기화함수
        def init_(module): return init(module, gain=nn.init.calculate_gain('relu'))
        
        # 합성곱층을 정의
        self.conv = nn.Sequential(
            # 이미지 크기의 변화1 : (84, 84) ==> (20, 20)처럼 나오는 이유는 input 크기가 84인데 kernel_size가 8이고 stride가 4이므로 
            # 4x + 8 = 84를 구하면 x는 19가 나온다. 따라서 처음 값 1번을 더하면 20이 나온다. 출력인 32는 fileter수와 동일하다.
            # 이미지 크기의 변화2 : (20, 20) ==> (9, 9)
            # 앞서 계산 한것 처럼 2x + 4 = 20이므로 x = 8이 나오고 처음 횟수 1번을 더하면 9가 나온다. fileter의 수는 64이다.
            # 이미지 크기의 변화3: (9, 9) ==> (7, 7) 
            # 앞서 계산한 것처럼 x + 3 = 9이므로 x = 6이 되고 처음 횟수 1번을 더하면 7이 나온다. fileter의 수는 64이다.
            init_(nn.Conv2d(NUM_STACK_FRAME, 32, kernel_size=8, stride=4)), # 이미지 크기의 변화 (84*84 => 20 * 20)
            nn.ReLU(),
            init_(nn.Conv2d(32, 64, kernel_size=4, stride=2)),
            nn.ReLU(),
            init_(nn.Conv2d(64, 64, kernel_size=3, stride=1)),
            nn.ReLU(),
            Flatten(),
            init_(nn.Linear(64 * 7 * 7, 512)), # filter의 수는 64이고 이미지의 크기는 7*7이다.
            nn.ReLU())
        
        # 결합 가중치 초기화 함수
        def init_(module): return init(module, gain=1.0)
        
        # Critic을 정의
        self.critic = init_(nn.Linear(512, 1)) # 출력은 상태가치이므로 1개
        
        # 결합 가중치 초기화 함수
        def init_(module): return init(module, gain=0.01)
        
        # Actor를 정의
        self.actor = init_(nn.Linear(512, n_out)) # 출력이 행동이므로 출력 수는 행동의 가짓수
        
        self.train()
    
    def forward(self, x):
        inputs = x / 255.0
        conv_output = self.conv(inputs)
        critic_output = self.critic(conv_output)
        actor_output = self.actor(conv_output)
        
        return critic_output, actor_output
    
    def act(self, x):
        value, actor_output = self(x)
        probs = F.softmax(actor_output, dim=1)
        action = probs.multinomial(num_samples=1)
        return action
    
    def get_value(self, x):
        value, _ = self(x)
        return value
    
    def evaluate_actions(self, x, actions):
        value, actor_output = self(x)
        
        log_probs = F.log_softmax(actor_output, dim=1)
        action_log_probs = log_probs.gather(1, actions)
        
        probs = F.softmax(actor_output, dim=1)
        dist_entropy = -(log_probs * probs).sum(-1).mean()
        
        return value, action_log_probs, dist_entropy

In [106]:
class Brain(object):
    def __init__(self, actor_critic):
        self.actor_critic = actor_critic
        self.optimizer = optim.RMSprop(actor_critic.parameters(), lr=lr, eps=eps, alpha=alpha)
        
    def update(self, rollouts):
        obs_shape = rollouts.observations.size()[2:]
        num_steps = NUM_ADVANCED_STEP
        num_processes = NUM_PROCESSES
        
        # Tensor의 크기에 *을 붙이면 unpack되어 크기를 얻을 수 있다.
        values, action_log_probs, dist_entropy = self.actor_critic.evaluate_actions(
            rollouts.observations[:-1].reshape(-1, *obs_shape),
            rollouts.actions.reshape(-1, 1))
        
        values = values.reshape(num_steps, num_processes, 1) # torch.Size([5, 16, 1])
        action_log_probs = action_log_probs.reshape(num_steps, num_processes, 1)
        
        advantages = rollouts.returns[:-1] - values
        value_loss = advantages.pow(2).mean()
        
        # Advantages는 detach()해서 정수로 취급한다.
        action_gain = (advantages.detach() * action_log_probs).mean()
        total_loss = (value_loss * value_loss_coef - action_gain - dist_entropy * entropy_coef)
        nn.utils.clip_grad_norm_(self.actor_critic.parameters(), max_grad_norm)
        # 한번에 결합 가중치가 너무 크게 변화하지 않도록 경사의 최댓값ㅇ르 0.5로 제한한다.
        
        self.optimizer.step() # 결합 가중치 수정

In [109]:
class Environment:
    def run(self):
        seed_num = 1
        torch.manual_seed(seed_num)
        if use_cuda:
            torch.cuda.manual_seed(seed_num)
            
        # 실행환경 구축
        torch.set_num_threads(seed_num)
        envs = [make_env(ENV_NAME, seed_num, i) for i in range(NUM_PROCESSES)]
        envs = DummyVecEnv(envs) # 멀티프로세스 실행환경
        
        # 모든 에이전트가 공유하는 두뇌 역할 클래스 Brain객체 생성
        n_out = envs.action_space.n # 행동의 가짓수는 4개
        actor_critic = Net(n_out).to(device) # GPU사용
        global_brain = Brain(actor_critic)
        
        # 정보 저장용 변수 생성
        obs_shape = envs.observation_space.shape # (1, 84, 84)
        obs_shape = (obs_shape[0] * NUM_STACK_FRAME, *obs_shape[1:]) # (4, 84, 84)
        current_obs = torch.zeros(NUM_PROCESSES, *obs_shape).to(device) # (16, 4, 84, 84)
        rollouts = RolloutStorage(NUM_ADVANCED_STEP, NUM_PROCESSES, obs_shape) # rollout객체 생성
        episode_rewards = torch.zeros([NUM_PROCESSES, 1])
        final_rewards = torch.zeros([NUM_PROCESSES, 1])
        
        # 초기 상태로 시작
        obs = envs.reset()
        obs = torch.from_numpy(obs).float() # torch.Size([16, 1, 84, 84])
        current_obs[:, -1:] = obs # 제일 마지막 4번째 프레임에 가장 최근 관측 결과인 obs를 넣음
        
        # Advanted 핛브에 사용할 객체인 rollouts에 첫번째 상태로 현재 상태를 저장
        rollouts.observations[0].copy_(current_obs)
        
        #주 반복문
        for j in tqdm(range(NUM_UPDATES)):
            for step in range(NUM_ADVANCED_STEP):
                # 행동을 결정
                with torch.no_grad():
                    action = actor_critic.act(rollouts.observations[step])
                cpu_actions = action.squeeze(1).cpu().numpy()
                
                # 1단계를 병렬로 실행, 반환 값 obs의 크기는 (16, 1, 84, 84)
                obs, reward, done, info = envs.step(cpu_actions)
                
                # 보상을 텐서로 변환한 다음 에피소드 총 보상에 더함
                # 크기가 (16,) ==> (16, 1)로 변경
                reward = np.expand_dims(np.stack(reward), 1)
                reward = torch.from_numpy(reward).float()
                episode_rewards += reward
                
                # 각 프로세스 마다 done이 True면 0, False면 1
                masks = torch.FloatTensor([[0.0] if done_ else [1.0] for done_ in done])
                
                # 마지막 에피소드의 총 보상을 업데이트
                final_rewards *= masks
                final_rewards += (1 - masks) * episode_rewards
                
                # episode의 총 보상을 업데이트
                episode_rewards *= masks 
                
                masks = masks.to(device) # masks를 GPU로 보냄
                
                # masks의 크기를 torch.Size([16, 1]) ==> torch.Size([16, 1, 1, 1])로 변경하고 곱함
                current_obs *= masks.unsqueeze(2).unsqueeze(2)
                
                # 프레임을 모음
                # torch.Size([16, 1, 84, 84])
                obs = torch.from_numpy(obs).float()
                current_obs[:, :-1] = current_obs[:, 1:] # 0~2번째 프레임을 1~3번째 프레임으로 덮어씀
                current_obs[:, -1:] = obs # 4번째 프레임에 가장 최근 obs를 저장
                
                # 메모리 객체에 현 단계의 transition을 저장
                rollouts.insert(current_obs, action.data, reward, masks)
            # Advaned 학습의 for 문 끝
            
            # advanced학습 대상 단계 중 마지막 단계의 상태에서 예상되는 상태 가치를 계산
            with torch.no_grad():
                next_value = actor_critic.get_value(rollouts.observations[-1]).detach()
            # 모든 단계의 할인 총 보상을 계산하고, rollouts의 변수 returns를 업데이트
            rollouts.compute_returns(next_value)
            
            # 신경망 수정 및 rollout 업데이트
            global_brain.update(rollouts)
            rollouts.after_update()
            
            if j % 100 == 0:
                print("finished frames {}, mean/median reward {:.1f}/{:.1f}, min/max reward {:.1f}/{:.1f}".format(
                j*NUM_PROCESSES*NUM_ADVANCED_STEP,
                final_rewards.mean(),
                final_rewards.median(),
                final_rewards.min(),
                final_rewards.max()))
                
            if j % 12500 == 0:
                torch.save(global_brain.actor_critic.state_dict(), 'weight_'+str(j)+'.pth')
        
        torch.save(global_brain.actor_critic.state_dict(), 'weight_end.pth')

In [110]:
breakout_env = Environment()
breakout_env.run()

  0%|          | 2/125000 [00:00<4:28:00,  7.77it/s]

finished frames 0, mean/median reward 0.0/0.0, min/max reward 0.0/0.0


  0%|          | 102/125000 [00:11<3:31:13,  9.86it/s]

finished frames 8000, mean/median reward 0.4/0.0, min/max reward 0.0/2.0


  0%|          | 202/125000 [00:21<3:51:10,  9.00it/s]

finished frames 16000, mean/median reward 0.2/0.0, min/max reward 0.0/1.0


  0%|          | 302/125000 [00:32<3:50:00,  9.04it/s]

finished frames 24000, mean/median reward 0.3/0.0, min/max reward 0.0/1.0


  0%|          | 402/125000 [00:43<3:54:13,  8.87it/s]

finished frames 32000, mean/median reward 0.4/0.0, min/max reward 0.0/4.0


  0%|          | 502/125000 [00:54<4:13:03,  8.20it/s]

finished frames 40000, mean/median reward 0.5/0.0, min/max reward 0.0/2.0


  0%|          | 602/125000 [01:05<3:29:51,  9.88it/s]

finished frames 48000, mean/median reward 0.1/0.0, min/max reward 0.0/1.0


  1%|          | 702/125000 [01:15<4:10:29,  8.27it/s]

finished frames 56000, mean/median reward 0.1/0.0, min/max reward 0.0/1.0


  1%|          | 802/125000 [01:26<3:59:11,  8.65it/s]

finished frames 64000, mean/median reward 0.2/0.0, min/max reward 0.0/1.0


  1%|          | 902/125000 [01:37<3:31:18,  9.79it/s]

finished frames 72000, mean/median reward 0.1/0.0, min/max reward 0.0/1.0


  1%|          | 1002/125000 [01:48<3:46:23,  9.13it/s]

finished frames 80000, mean/median reward 0.2/0.0, min/max reward 0.0/2.0


  1%|          | 1103/125000 [01:58<3:30:27,  9.81it/s]

finished frames 88000, mean/median reward 0.3/0.0, min/max reward 0.0/2.0


  1%|          | 1202/125000 [02:09<4:12:39,  8.17it/s]

finished frames 96000, mean/median reward 0.4/0.0, min/max reward 0.0/2.0


  1%|          | 1302/125000 [02:20<3:42:32,  9.26it/s]

finished frames 104000, mean/median reward 0.4/0.0, min/max reward 0.0/3.0


  1%|          | 1402/125000 [02:31<3:52:38,  8.85it/s]

finished frames 112000, mean/median reward 0.4/0.0, min/max reward 0.0/5.0


  1%|          | 1502/125000 [02:42<3:53:34,  8.81it/s]

finished frames 120000, mean/median reward 0.4/0.0, min/max reward 0.0/3.0


  1%|▏         | 1602/125000 [02:53<3:46:47,  9.07it/s]

finished frames 128000, mean/median reward 0.5/0.0, min/max reward 0.0/4.0


  1%|▏         | 1702/125000 [03:04<3:50:59,  8.90it/s]

finished frames 136000, mean/median reward 0.8/0.0, min/max reward 0.0/4.0


  1%|▏         | 1802/125000 [03:14<3:29:16,  9.81it/s]

finished frames 144000, mean/median reward 0.1/0.0, min/max reward 0.0/1.0


  2%|▏         | 1902/125000 [03:25<3:47:35,  9.01it/s]

finished frames 152000, mean/median reward 0.4/0.0, min/max reward 0.0/1.0


  2%|▏         | 2002/125000 [03:36<3:53:43,  8.77it/s]

finished frames 160000, mean/median reward 0.5/0.0, min/max reward 0.0/2.0


  2%|▏         | 2102/125000 [03:47<3:45:18,  9.09it/s]

finished frames 168000, mean/median reward 0.4/0.0, min/max reward 0.0/2.0


  2%|▏         | 2202/125000 [03:58<3:50:53,  8.86it/s]

finished frames 176000, mean/median reward 0.6/0.0, min/max reward 0.0/3.0


  2%|▏         | 2302/125000 [04:08<3:34:22,  9.54it/s]

finished frames 184000, mean/median reward 0.6/0.0, min/max reward 0.0/4.0


  2%|▏         | 2402/125000 [04:19<3:38:22,  9.36it/s]

finished frames 192000, mean/median reward 0.0/0.0, min/max reward 0.0/0.0


  2%|▏         | 2502/125000 [04:30<3:40:18,  9.27it/s]

finished frames 200000, mean/median reward 0.5/0.0, min/max reward 0.0/4.0


  2%|▏         | 2602/125000 [04:41<3:17:55, 10.31it/s]

finished frames 208000, mean/median reward 0.4/0.0, min/max reward 0.0/2.0


  2%|▏         | 2702/125000 [04:52<3:50:16,  8.85it/s]

finished frames 216000, mean/median reward 0.1/0.0, min/max reward 0.0/1.0


  2%|▏         | 2802/125000 [05:02<3:49:06,  8.89it/s]

finished frames 224000, mean/median reward 0.2/0.0, min/max reward 0.0/3.0


  2%|▏         | 2902/125000 [05:13<3:21:53, 10.08it/s]

finished frames 232000, mean/median reward 0.6/0.0, min/max reward 0.0/4.0


  2%|▏         | 3002/125000 [05:24<3:57:36,  8.56it/s]

finished frames 240000, mean/median reward 0.4/0.0, min/max reward 0.0/2.0


  2%|▏         | 3103/125000 [05:34<3:27:41,  9.78it/s]

finished frames 248000, mean/median reward 0.4/0.0, min/max reward 0.0/2.0


  3%|▎         | 3202/125000 [05:45<3:35:40,  9.41it/s]

finished frames 256000, mean/median reward 0.2/0.0, min/max reward 0.0/1.0


  3%|▎         | 3302/125000 [05:56<4:08:13,  8.17it/s]

finished frames 264000, mean/median reward 0.2/0.0, min/max reward 0.0/2.0


  3%|▎         | 3403/125000 [06:06<3:34:19,  9.46it/s]

finished frames 272000, mean/median reward 0.3/0.0, min/max reward 0.0/2.0


  3%|▎         | 3502/125000 [06:17<3:22:38,  9.99it/s]

finished frames 280000, mean/median reward 0.8/0.0, min/max reward 0.0/4.0


  3%|▎         | 3602/125000 [06:28<3:44:31,  9.01it/s]

finished frames 288000, mean/median reward 0.4/0.0, min/max reward 0.0/1.0


  3%|▎         | 3702/125000 [06:38<3:47:11,  8.90it/s]

finished frames 296000, mean/median reward 0.2/0.0, min/max reward 0.0/1.0


  3%|▎         | 3802/125000 [06:49<3:30:30,  9.60it/s]

finished frames 304000, mean/median reward 0.1/0.0, min/max reward 0.0/1.0


  3%|▎         | 3902/125000 [06:59<3:42:06,  9.09it/s]

finished frames 312000, mean/median reward 0.2/0.0, min/max reward 0.0/2.0


  3%|▎         | 4002/125000 [07:10<3:49:01,  8.81it/s]

finished frames 320000, mean/median reward 0.3/0.0, min/max reward 0.0/1.0


  3%|▎         | 4101/125000 [07:21<3:19:21, 10.11it/s]

finished frames 328000, mean/median reward 0.4/0.0, min/max reward 0.0/3.0


  3%|▎         | 4202/125000 [07:32<3:40:07,  9.15it/s]

finished frames 336000, mean/median reward 0.2/0.0, min/max reward 0.0/2.0


  3%|▎         | 4302/125000 [07:42<3:48:35,  8.80it/s]

finished frames 344000, mean/median reward 0.4/0.0, min/max reward 0.0/2.0


  4%|▎         | 4401/125000 [07:53<3:20:21, 10.03it/s]

finished frames 352000, mean/median reward 0.2/0.0, min/max reward 0.0/1.0


  4%|▎         | 4502/125000 [08:04<3:50:41,  8.71it/s]

finished frames 360000, mean/median reward 0.4/0.0, min/max reward 0.0/3.0


  4%|▎         | 4602/125000 [08:14<3:42:54,  9.00it/s]

finished frames 368000, mean/median reward 0.2/0.0, min/max reward 0.0/2.0


  4%|▍         | 4702/125000 [08:25<3:24:13,  9.82it/s]

finished frames 376000, mean/median reward 0.1/0.0, min/max reward 0.0/1.0


  4%|▍         | 4802/125000 [08:35<3:44:20,  8.93it/s]

finished frames 384000, mean/median reward 0.2/0.0, min/max reward 0.0/1.0


  4%|▍         | 4902/125000 [08:46<3:51:29,  8.65it/s]

finished frames 392000, mean/median reward 0.2/0.0, min/max reward 0.0/2.0


  4%|▍         | 5002/125000 [08:57<3:34:19,  9.33it/s]

finished frames 400000, mean/median reward 0.2/0.0, min/max reward 0.0/1.0


  4%|▍         | 5102/125000 [09:08<3:55:44,  8.48it/s]

finished frames 408000, mean/median reward 0.4/0.0, min/max reward 0.0/2.0


  4%|▍         | 5202/125000 [09:18<3:20:54,  9.94it/s]

finished frames 416000, mean/median reward 0.3/0.0, min/max reward 0.0/2.0


  4%|▍         | 5302/125000 [09:29<3:22:09,  9.87it/s]

finished frames 424000, mean/median reward 0.3/0.0, min/max reward 0.0/2.0


  4%|▍         | 5402/125000 [09:40<3:41:16,  9.01it/s]

finished frames 432000, mean/median reward 0.2/0.0, min/max reward 0.0/1.0


  4%|▍         | 5503/125000 [09:51<3:11:05, 10.42it/s]

finished frames 440000, mean/median reward 0.4/0.0, min/max reward 0.0/3.0


  4%|▍         | 5602/125000 [10:01<3:49:12,  8.68it/s]

finished frames 448000, mean/median reward 0.4/0.0, min/max reward 0.0/2.0


  5%|▍         | 5702/125000 [10:12<3:56:06,  8.42it/s]

finished frames 456000, mean/median reward 0.2/0.0, min/max reward 0.0/2.0


  5%|▍         | 5801/125000 [10:23<3:09:37, 10.48it/s]

finished frames 464000, mean/median reward 0.1/0.0, min/max reward 0.0/1.0


  5%|▍         | 5902/125000 [10:33<3:39:19,  9.05it/s]

finished frames 472000, mean/median reward 0.1/0.0, min/max reward 0.0/1.0


  5%|▍         | 6002/125000 [10:44<3:50:35,  8.60it/s]

finished frames 480000, mean/median reward 0.2/0.0, min/max reward 0.0/1.0


  5%|▍         | 6101/125000 [10:55<3:14:25, 10.19it/s]

finished frames 488000, mean/median reward 0.2/0.0, min/max reward 0.0/2.0


  5%|▍         | 6202/125000 [11:05<3:35:00,  9.21it/s]

finished frames 496000, mean/median reward 0.2/0.0, min/max reward 0.0/1.0


  5%|▌         | 6302/125000 [11:16<3:50:34,  8.58it/s]

finished frames 504000, mean/median reward 0.1/0.0, min/max reward 0.0/1.0


  5%|▌         | 6401/125000 [11:27<3:08:29, 10.49it/s]

finished frames 512000, mean/median reward 0.1/0.0, min/max reward 0.0/1.0


  5%|▌         | 6502/125000 [11:37<3:42:54,  8.86it/s]

finished frames 520000, mean/median reward 0.3/0.0, min/max reward 0.0/2.0


  5%|▌         | 6602/125000 [11:48<3:42:22,  8.87it/s]

finished frames 528000, mean/median reward 0.1/0.0, min/max reward 0.0/1.0


  5%|▌         | 6702/125000 [11:59<3:30:22,  9.37it/s]

finished frames 536000, mean/median reward 0.0/0.0, min/max reward 0.0/0.0


  5%|▌         | 6802/125000 [12:09<3:34:08,  9.20it/s]

finished frames 544000, mean/median reward 0.4/0.0, min/max reward 0.0/2.0


  6%|▌         | 6902/125000 [12:20<3:50:21,  8.54it/s]

finished frames 552000, mean/median reward 0.4/0.0, min/max reward 0.0/2.0


  6%|▌         | 7002/125000 [12:31<3:19:01,  9.88it/s]

finished frames 560000, mean/median reward 0.1/0.0, min/max reward 0.0/1.0


  6%|▌         | 7102/125000 [12:41<3:33:01,  9.22it/s]

finished frames 568000, mean/median reward 0.4/0.0, min/max reward 0.0/2.0


  6%|▌         | 7202/125000 [12:52<4:03:17,  8.07it/s]

finished frames 576000, mean/median reward 0.2/0.0, min/max reward 0.0/2.0


  6%|▌         | 7301/125000 [13:03<3:18:21,  9.89it/s]

finished frames 584000, mean/median reward 0.1/0.0, min/max reward 0.0/1.0


  6%|▌         | 7402/125000 [13:14<3:45:04,  8.71it/s]

finished frames 592000, mean/median reward 0.0/0.0, min/max reward 0.0/0.0


  6%|▌         | 7503/125000 [13:25<3:16:05,  9.99it/s]

finished frames 600000, mean/median reward 0.2/0.0, min/max reward 0.0/2.0


  6%|▌         | 7602/125000 [13:35<3:36:42,  9.03it/s]

finished frames 608000, mean/median reward 0.2/0.0, min/max reward 0.0/1.0


  6%|▌         | 7702/125000 [13:46<3:35:52,  9.06it/s]

finished frames 616000, mean/median reward 0.2/0.0, min/max reward 0.0/2.0


  6%|▌         | 7803/125000 [13:57<3:11:23, 10.21it/s]

finished frames 624000, mean/median reward 0.2/0.0, min/max reward 0.0/2.0


  6%|▋         | 7902/125000 [14:07<3:29:31,  9.31it/s]

finished frames 632000, mean/median reward 0.2/0.0, min/max reward 0.0/1.0


  6%|▋         | 8002/125000 [14:18<3:39:40,  8.88it/s]

finished frames 640000, mean/median reward 0.5/0.0, min/max reward 0.0/3.0


  6%|▋         | 8103/125000 [14:29<2:58:46, 10.90it/s]

finished frames 648000, mean/median reward 0.1/0.0, min/max reward 0.0/1.0


  7%|▋         | 8202/125000 [14:39<3:38:24,  8.91it/s]

finished frames 656000, mean/median reward 0.4/0.0, min/max reward 0.0/2.0


  7%|▋         | 8302/125000 [14:50<3:46:59,  8.57it/s]

finished frames 664000, mean/median reward 0.8/1.0, min/max reward 0.0/2.0


  7%|▋         | 8402/125000 [15:00<3:31:18,  9.20it/s]

finished frames 672000, mean/median reward 0.2/0.0, min/max reward 0.0/1.0


  7%|▋         | 8502/125000 [15:11<3:16:16,  9.89it/s]

finished frames 680000, mean/median reward 0.6/0.0, min/max reward 0.0/2.0


  7%|▋         | 8602/125000 [15:22<3:38:17,  8.89it/s]

finished frames 688000, mean/median reward 0.3/0.0, min/max reward 0.0/2.0


  7%|▋         | 8702/125000 [15:32<3:19:03,  9.74it/s]

finished frames 696000, mean/median reward 0.3/0.0, min/max reward 0.0/2.0


  7%|▋         | 8802/125000 [15:43<3:20:35,  9.65it/s]

finished frames 704000, mean/median reward 0.4/0.0, min/max reward 0.0/2.0


  7%|▋         | 8902/125000 [15:54<3:33:42,  9.05it/s]

finished frames 712000, mean/median reward 0.4/0.0, min/max reward 0.0/2.0


  7%|▋         | 9003/125000 [16:04<3:17:26,  9.79it/s]

finished frames 720000, mean/median reward 0.3/0.0, min/max reward 0.0/2.0


  7%|▋         | 9101/125000 [16:15<3:09:31, 10.19it/s]

finished frames 728000, mean/median reward 0.3/0.0, min/max reward 0.0/1.0


  7%|▋         | 9202/125000 [16:26<3:37:14,  8.88it/s]

finished frames 736000, mean/median reward 0.1/0.0, min/max reward 0.0/1.0


  7%|▋         | 9303/125000 [16:36<3:20:25,  9.62it/s]

finished frames 744000, mean/median reward 0.4/0.0, min/max reward 0.0/2.0


  8%|▊         | 9402/125000 [16:47<3:07:01, 10.30it/s]

finished frames 752000, mean/median reward 0.5/0.0, min/max reward 0.0/2.0


  8%|▊         | 9502/125000 [16:57<3:26:18,  9.33it/s]

finished frames 760000, mean/median reward 0.3/0.0, min/max reward 0.0/2.0


  8%|▊         | 9602/125000 [17:08<3:39:35,  8.76it/s]

finished frames 768000, mean/median reward 0.2/0.0, min/max reward 0.0/1.0


  8%|▊         | 9701/125000 [17:19<3:08:16, 10.21it/s]

finished frames 776000, mean/median reward 0.0/0.0, min/max reward 0.0/0.0


  8%|▊         | 9802/125000 [17:30<3:35:02,  8.93it/s]

finished frames 784000, mean/median reward 0.5/0.0, min/max reward 0.0/4.0


  8%|▊         | 9902/125000 [17:40<3:31:25,  9.07it/s]

finished frames 792000, mean/median reward 0.1/0.0, min/max reward 0.0/1.0


  8%|▊         | 10002/125000 [17:51<3:17:03,  9.73it/s]

finished frames 800000, mean/median reward 0.4/0.0, min/max reward 0.0/3.0


  8%|▊         | 10102/125000 [18:01<3:35:23,  8.89it/s]

finished frames 808000, mean/median reward 0.2/0.0, min/max reward 0.0/2.0


  8%|▊         | 10202/125000 [18:12<3:46:45,  8.44it/s]

finished frames 816000, mean/median reward 0.2/0.0, min/max reward 0.0/2.0


  8%|▊         | 10302/125000 [18:23<3:17:04,  9.70it/s]

finished frames 824000, mean/median reward 0.4/0.0, min/max reward 0.0/3.0


  8%|▊         | 10402/125000 [18:34<3:39:22,  8.71it/s]

finished frames 832000, mean/median reward 0.4/0.0, min/max reward 0.0/2.0


  8%|▊         | 10502/125000 [18:44<3:43:03,  8.56it/s]

finished frames 840000, mean/median reward 0.2/0.0, min/max reward 0.0/1.0


  8%|▊         | 10602/125000 [18:55<3:07:39, 10.16it/s]

finished frames 848000, mean/median reward 0.4/0.0, min/max reward 0.0/2.0


  9%|▊         | 10702/125000 [19:05<3:29:19,  9.10it/s]

finished frames 856000, mean/median reward 0.3/0.0, min/max reward 0.0/2.0


  9%|▊         | 10802/125000 [19:16<3:42:44,  8.54it/s]

finished frames 864000, mean/median reward 0.4/0.0, min/max reward 0.0/2.0


  9%|▊         | 10902/125000 [19:27<3:15:53,  9.71it/s]

finished frames 872000, mean/median reward 0.4/0.0, min/max reward 0.0/5.0


  9%|▉         | 11002/125000 [19:38<3:23:29,  9.34it/s]

finished frames 880000, mean/median reward 0.1/0.0, min/max reward 0.0/1.0


  9%|▉         | 11102/125000 [19:48<3:35:11,  8.82it/s]

finished frames 888000, mean/median reward 0.4/0.0, min/max reward 0.0/2.0


  9%|▉         | 11202/125000 [19:59<3:10:40,  9.95it/s]

finished frames 896000, mean/median reward 0.2/0.0, min/max reward 0.0/2.0


  9%|▉         | 11302/125000 [20:10<3:29:11,  9.06it/s]

finished frames 904000, mean/median reward 0.3/0.0, min/max reward 0.0/1.0


  9%|▉         | 11402/125000 [20:20<3:40:09,  8.60it/s]

finished frames 912000, mean/median reward 0.1/0.0, min/max reward 0.0/1.0


  9%|▉         | 11501/125000 [20:31<2:55:30, 10.78it/s]

finished frames 920000, mean/median reward 0.3/0.0, min/max reward 0.0/2.0


  9%|▉         | 11602/125000 [20:41<3:30:55,  8.96it/s]

finished frames 928000, mean/median reward 0.1/0.0, min/max reward 0.0/1.0


  9%|▉         | 11702/125000 [20:52<3:34:23,  8.81it/s]

finished frames 936000, mean/median reward 0.6/0.0, min/max reward 0.0/3.0


  9%|▉         | 11801/125000 [21:03<3:02:44, 10.32it/s]

finished frames 944000, mean/median reward 0.6/0.0, min/max reward 0.0/2.0


 10%|▉         | 11902/125000 [21:13<3:32:14,  8.88it/s]

finished frames 952000, mean/median reward 0.4/0.0, min/max reward 0.0/3.0


 10%|▉         | 12002/125000 [21:24<3:39:44,  8.57it/s]

finished frames 960000, mean/median reward 0.1/0.0, min/max reward 0.0/1.0


 10%|▉         | 12102/125000 [21:35<3:11:11,  9.84it/s]

finished frames 968000, mean/median reward 0.4/0.0, min/max reward 0.0/4.0


 10%|▉         | 12202/125000 [21:46<3:27:36,  9.06it/s]

finished frames 976000, mean/median reward 0.3/0.0, min/max reward 0.0/2.0


 10%|▉         | 12303/125000 [21:56<3:19:33,  9.41it/s]

finished frames 984000, mean/median reward 0.3/0.0, min/max reward 0.0/1.0


 10%|▉         | 12402/125000 [22:07<3:14:05,  9.67it/s]

finished frames 992000, mean/median reward 0.0/0.0, min/max reward 0.0/0.0


 10%|█         | 12502/125000 [22:18<3:42:19,  8.43it/s]

finished frames 1000000, mean/median reward 0.1/0.0, min/max reward 0.0/1.0


 10%|█         | 12602/125000 [22:28<3:23:42,  9.20it/s]

finished frames 1008000, mean/median reward 0.1/0.0, min/max reward 0.0/1.0


 10%|█         | 12701/125000 [22:39<3:09:45,  9.86it/s]

finished frames 1016000, mean/median reward 0.2/0.0, min/max reward 0.0/1.0


 10%|█         | 12802/125000 [22:50<3:25:19,  9.11it/s]

finished frames 1024000, mean/median reward 0.2/0.0, min/max reward 0.0/1.0


 10%|█         | 12903/125000 [23:00<3:23:16,  9.19it/s]

finished frames 1032000, mean/median reward 0.3/0.0, min/max reward 0.0/1.0


 10%|█         | 13001/125000 [23:11<3:11:07,  9.77it/s]

finished frames 1040000, mean/median reward 0.2/0.0, min/max reward 0.0/2.0


 10%|█         | 13102/125000 [23:22<3:38:39,  8.53it/s]

finished frames 1048000, mean/median reward 0.2/0.0, min/max reward 0.0/1.0


 11%|█         | 13203/125000 [23:33<2:58:53, 10.42it/s]

finished frames 1056000, mean/median reward 0.5/0.0, min/max reward 0.0/3.0


 11%|█         | 13302/125000 [23:43<3:12:43,  9.66it/s]

finished frames 1064000, mean/median reward 0.0/0.0, min/max reward 0.0/0.0


 11%|█         | 13402/125000 [23:54<3:32:46,  8.74it/s]

finished frames 1072000, mean/median reward 0.6/0.0, min/max reward 0.0/2.0


 11%|█         | 13503/125000 [24:05<3:07:57,  9.89it/s]

finished frames 1080000, mean/median reward 0.4/0.0, min/max reward 0.0/2.0


 11%|█         | 13602/125000 [24:15<3:18:06,  9.37it/s]

finished frames 1088000, mean/median reward 0.2/0.0, min/max reward 0.0/1.0


 11%|█         | 13702/125000 [24:26<3:23:36,  9.11it/s]

finished frames 1096000, mean/median reward 0.0/0.0, min/max reward 0.0/0.0


 11%|█         | 13803/125000 [24:37<3:02:48, 10.14it/s]

finished frames 1104000, mean/median reward 0.3/0.0, min/max reward 0.0/2.0


 11%|█         | 13902/125000 [24:47<3:25:44,  9.00it/s]

finished frames 1112000, mean/median reward 0.6/0.0, min/max reward 0.0/3.0


 11%|█         | 14002/125000 [24:58<3:47:01,  8.15it/s]

finished frames 1120000, mean/median reward 0.2/0.0, min/max reward 0.0/1.0


 11%|█▏        | 14103/125000 [25:09<2:59:04, 10.32it/s]

finished frames 1128000, mean/median reward 0.2/0.0, min/max reward 0.0/1.0


 11%|█▏        | 14202/125000 [25:19<3:20:17,  9.22it/s]

finished frames 1136000, mean/median reward 0.2/0.0, min/max reward 0.0/1.0


 11%|█▏        | 14302/125000 [25:30<3:40:33,  8.36it/s]

finished frames 1144000, mean/median reward 0.6/0.0, min/max reward 0.0/2.0


 12%|█▏        | 14403/125000 [25:41<2:57:45, 10.37it/s]

finished frames 1152000, mean/median reward 0.2/0.0, min/max reward 0.0/1.0


 12%|█▏        | 14502/125000 [25:51<3:24:03,  9.02it/s]

finished frames 1160000, mean/median reward 0.3/0.0, min/max reward 0.0/1.0


 12%|█▏        | 14602/125000 [26:02<3:28:21,  8.83it/s]

finished frames 1168000, mean/median reward 0.1/0.0, min/max reward 0.0/1.0


 12%|█▏        | 14703/125000 [26:13<3:04:03,  9.99it/s]

finished frames 1176000, mean/median reward 0.2/0.0, min/max reward 0.0/2.0


 12%|█▏        | 14802/125000 [26:23<3:22:35,  9.07it/s]

finished frames 1184000, mean/median reward 0.1/0.0, min/max reward 0.0/2.0


 12%|█▏        | 14902/125000 [26:34<3:25:53,  8.91it/s]

finished frames 1192000, mean/median reward 0.4/0.0, min/max reward 0.0/2.0


 12%|█▏        | 15003/125000 [26:45<2:57:41, 10.32it/s]

finished frames 1200000, mean/median reward 0.1/0.0, min/max reward 0.0/1.0


 12%|█▏        | 15102/125000 [26:55<3:08:06,  9.74it/s]

finished frames 1208000, mean/median reward 0.3/0.0, min/max reward 0.0/2.0


 12%|█▏        | 15202/125000 [27:06<3:28:35,  8.77it/s]

finished frames 1216000, mean/median reward 0.2/0.0, min/max reward 0.0/1.0


 12%|█▏        | 15302/125000 [27:16<3:23:10,  9.00it/s]

finished frames 1224000, mean/median reward 0.4/0.0, min/max reward 0.0/2.0


 12%|█▏        | 15402/125000 [27:27<3:12:22,  9.49it/s]

finished frames 1232000, mean/median reward 0.4/0.0, min/max reward 0.0/2.0


 12%|█▏        | 15502/125000 [27:38<3:17:13,  9.25it/s]

finished frames 1240000, mean/median reward 0.3/0.0, min/max reward 0.0/2.0


 12%|█▏        | 15603/125000 [27:49<2:58:19, 10.22it/s]

finished frames 1248000, mean/median reward 0.4/0.0, min/max reward 0.0/2.0


 13%|█▎        | 15702/125000 [27:59<3:13:45,  9.40it/s]

finished frames 1256000, mean/median reward 0.5/0.0, min/max reward 0.0/3.0


 13%|█▎        | 15802/125000 [28:10<3:43:41,  8.14it/s]

finished frames 1264000, mean/median reward 0.1/0.0, min/max reward 0.0/1.0


 13%|█▎        | 15901/125000 [28:21<3:02:29,  9.96it/s]

finished frames 1272000, mean/median reward 0.3/0.0, min/max reward 0.0/2.0


 13%|█▎        | 16002/125000 [28:31<3:21:17,  9.03it/s]

finished frames 1280000, mean/median reward 0.1/0.0, min/max reward 0.0/1.0


 13%|█▎        | 16102/125000 [28:42<3:29:11,  8.68it/s]

finished frames 1288000, mean/median reward 0.5/0.0, min/max reward 0.0/3.0


 13%|█▎        | 16202/125000 [28:53<3:00:16, 10.06it/s]

finished frames 1296000, mean/median reward 0.0/0.0, min/max reward 0.0/0.0


 13%|█▎        | 16302/125000 [29:03<3:28:51,  8.67it/s]

finished frames 1304000, mean/median reward 0.3/0.0, min/max reward 0.0/2.0


 13%|█▎        | 16402/125000 [29:14<3:20:05,  9.05it/s]

finished frames 1312000, mean/median reward 0.1/0.0, min/max reward 0.0/1.0


 13%|█▎        | 16502/125000 [29:25<3:12:10,  9.41it/s]

finished frames 1320000, mean/median reward 0.3/0.0, min/max reward 0.0/1.0


 13%|█▎        | 16602/125000 [29:36<3:15:35,  9.24it/s]

finished frames 1328000, mean/median reward 0.0/0.0, min/max reward 0.0/0.0


 13%|█▎        | 16702/125000 [29:46<3:14:29,  9.28it/s]

finished frames 1336000, mean/median reward 0.2/0.0, min/max reward 0.0/1.0


 13%|█▎        | 16802/125000 [29:57<3:03:29,  9.83it/s]

finished frames 1344000, mean/median reward 0.1/0.0, min/max reward 0.0/1.0


 14%|█▎        | 16902/125000 [30:08<3:21:42,  8.93it/s]

finished frames 1352000, mean/median reward 0.5/0.0, min/max reward 0.0/2.0


 14%|█▎        | 17002/125000 [30:18<3:20:45,  8.97it/s]

finished frames 1360000, mean/median reward 0.2/0.0, min/max reward 0.0/1.0


 14%|█▎        | 17102/125000 [30:29<3:16:07,  9.17it/s]

finished frames 1368000, mean/median reward 0.3/0.0, min/max reward 0.0/2.0


 14%|█▍        | 17202/125000 [30:40<3:20:14,  8.97it/s]

finished frames 1376000, mean/median reward 0.2/0.0, min/max reward 0.0/2.0


 14%|█▍        | 17302/125000 [30:50<3:25:23,  8.74it/s]

finished frames 1384000, mean/median reward 0.1/0.0, min/max reward 0.0/1.0


 14%|█▍        | 17402/125000 [31:01<3:08:36,  9.51it/s]

finished frames 1392000, mean/median reward 0.2/0.0, min/max reward 0.0/2.0


 14%|█▍        | 17502/125000 [31:12<3:22:55,  8.83it/s]

finished frames 1400000, mean/median reward 0.2/0.0, min/max reward 0.0/1.0


 14%|█▍        | 17603/125000 [31:22<3:06:34,  9.59it/s]

finished frames 1408000, mean/median reward 1.1/1.0, min/max reward 0.0/6.0


 14%|█▍        | 17702/125000 [31:33<3:16:32,  9.10it/s]

finished frames 1416000, mean/median reward 0.2/0.0, min/max reward 0.0/2.0


 14%|█▍        | 17802/125000 [31:44<3:12:13,  9.29it/s]

finished frames 1424000, mean/median reward 0.2/0.0, min/max reward 0.0/2.0


 14%|█▍        | 17903/125000 [31:54<3:04:45,  9.66it/s]

finished frames 1432000, mean/median reward 0.5/0.0, min/max reward 0.0/2.0


 14%|█▍        | 18001/125000 [32:05<2:56:29, 10.10it/s]

finished frames 1440000, mean/median reward 0.4/0.0, min/max reward 0.0/2.0


 14%|█▍        | 18102/125000 [32:16<3:24:39,  8.71it/s]

finished frames 1448000, mean/median reward 0.1/0.0, min/max reward 0.0/1.0


 15%|█▍        | 18202/125000 [32:26<3:19:03,  8.94it/s]

finished frames 1456000, mean/median reward 0.4/0.0, min/max reward 0.0/3.0


 15%|█▍        | 18302/125000 [32:37<3:05:09,  9.60it/s]

finished frames 1464000, mean/median reward 0.4/0.0, min/max reward 0.0/2.0


 15%|█▍        | 18402/125000 [32:48<3:14:29,  9.13it/s]

finished frames 1472000, mean/median reward 0.2/0.0, min/max reward 0.0/1.0


 15%|█▍        | 18503/125000 [32:58<3:02:54,  9.70it/s]

finished frames 1480000, mean/median reward 0.4/0.0, min/max reward 0.0/4.0


 15%|█▍        | 18602/125000 [33:09<3:01:06,  9.79it/s]

finished frames 1488000, mean/median reward 0.4/0.0, min/max reward 0.0/1.0


 15%|█▍        | 18702/125000 [33:20<3:27:28,  8.54it/s]

finished frames 1496000, mean/median reward 0.1/0.0, min/max reward 0.0/1.0


 15%|█▌        | 18802/125000 [33:30<3:00:32,  9.80it/s]

finished frames 1504000, mean/median reward 0.1/0.0, min/max reward 0.0/1.0


 15%|█▌        | 18902/125000 [33:41<3:13:45,  9.13it/s]

finished frames 1512000, mean/median reward 0.3/0.0, min/max reward 0.0/2.0


 15%|█▌        | 19002/125000 [33:52<3:19:10,  8.87it/s]

finished frames 1520000, mean/median reward 0.2/0.0, min/max reward 0.0/1.0


 15%|█▌        | 19102/125000 [34:02<2:59:13,  9.85it/s]

finished frames 1528000, mean/median reward 0.2/0.0, min/max reward 0.0/3.0


 15%|█▌        | 19202/125000 [34:13<3:10:39,  9.25it/s]

finished frames 1536000, mean/median reward 0.4/0.0, min/max reward 0.0/2.0


 15%|█▌        | 19302/125000 [34:24<3:11:59,  9.18it/s]

finished frames 1544000, mean/median reward 0.5/0.0, min/max reward 0.0/2.0


 16%|█▌        | 19403/125000 [34:34<3:10:46,  9.23it/s]

finished frames 1552000, mean/median reward 0.2/0.0, min/max reward 0.0/2.0


 16%|█▌        | 19502/125000 [34:45<3:04:05,  9.55it/s]

finished frames 1560000, mean/median reward 0.4/0.0, min/max reward 0.0/2.0


 16%|█▌        | 19602/125000 [34:56<3:21:41,  8.71it/s]

finished frames 1568000, mean/median reward 0.5/0.0, min/max reward 0.0/1.0


 16%|█▌        | 19703/125000 [35:06<3:12:37,  9.11it/s]

finished frames 1576000, mean/median reward 0.4/0.0, min/max reward 0.0/2.0


 16%|█▌        | 19802/125000 [35:17<3:01:39,  9.65it/s]

finished frames 1584000, mean/median reward 0.4/0.0, min/max reward 0.0/2.0


 16%|█▌        | 19902/125000 [35:28<3:17:13,  8.88it/s]

finished frames 1592000, mean/median reward 0.1/0.0, min/max reward 0.0/1.0


 16%|█▌        | 20002/125000 [35:38<3:08:26,  9.29it/s]

finished frames 1600000, mean/median reward 0.2/0.0, min/max reward 0.0/2.0


 16%|█▌        | 20101/125000 [35:49<2:49:20, 10.32it/s]

finished frames 1608000, mean/median reward 0.2/0.0, min/max reward 0.0/2.0


 16%|█▌        | 20202/125000 [36:00<3:22:38,  8.62it/s]

finished frames 1616000, mean/median reward 0.3/0.0, min/max reward 0.0/2.0


 16%|█▌        | 20302/125000 [36:10<3:02:15,  9.57it/s]

finished frames 1624000, mean/median reward 0.2/0.0, min/max reward 0.0/1.0


 16%|█▋        | 20402/125000 [36:21<2:58:39,  9.76it/s]

finished frames 1632000, mean/median reward 0.4/0.0, min/max reward 0.0/2.0


 16%|█▋        | 20502/125000 [36:32<3:21:39,  8.64it/s]

finished frames 1640000, mean/median reward 0.2/0.0, min/max reward 0.0/2.0


 16%|█▋        | 20602/125000 [36:43<2:57:41,  9.79it/s]

finished frames 1648000, mean/median reward 0.4/0.0, min/max reward 0.0/3.0


 17%|█▋        | 20702/125000 [36:53<3:21:02,  8.65it/s]

finished frames 1656000, mean/median reward 0.6/0.0, min/max reward 0.0/2.0


 17%|█▋        | 20802/125000 [37:04<3:04:52,  9.39it/s]

finished frames 1664000, mean/median reward 0.1/0.0, min/max reward 0.0/2.0


 17%|█▋        | 20902/125000 [37:15<3:02:30,  9.51it/s]

finished frames 1672000, mean/median reward 0.4/0.0, min/max reward 0.0/3.0


 17%|█▋        | 21002/125000 [37:25<3:11:34,  9.05it/s]

finished frames 1680000, mean/median reward 0.1/0.0, min/max reward 0.0/2.0


 17%|█▋        | 21102/125000 [37:36<3:10:29,  9.09it/s]

finished frames 1688000, mean/median reward 0.3/0.0, min/max reward 0.0/2.0


 17%|█▋        | 21203/125000 [37:47<2:47:09, 10.35it/s]

finished frames 1696000, mean/median reward 0.2/0.0, min/max reward 0.0/2.0


 17%|█▋        | 21302/125000 [37:57<3:02:51,  9.45it/s]

finished frames 1704000, mean/median reward 0.3/0.0, min/max reward 0.0/2.0


 17%|█▋        | 21402/125000 [38:08<3:08:21,  9.17it/s]

finished frames 1712000, mean/median reward 0.2/0.0, min/max reward 0.0/2.0


 17%|█▋        | 21502/125000 [38:19<2:43:48, 10.53it/s]

finished frames 1720000, mean/median reward 0.1/0.0, min/max reward 0.0/1.0


 17%|█▋        | 21602/125000 [38:29<3:08:48,  9.13it/s]

finished frames 1728000, mean/median reward 0.1/0.0, min/max reward 0.0/1.0


 17%|█▋        | 21702/125000 [38:40<3:11:15,  9.00it/s]

finished frames 1736000, mean/median reward 0.2/0.0, min/max reward 0.0/2.0


 17%|█▋        | 21801/125000 [38:51<2:49:28, 10.15it/s]

finished frames 1744000, mean/median reward 0.2/0.0, min/max reward 0.0/2.0


 18%|█▊        | 21902/125000 [39:01<3:21:03,  8.55it/s]

finished frames 1752000, mean/median reward 0.4/0.0, min/max reward 0.0/2.0


 18%|█▊        | 22002/125000 [39:12<3:09:12,  9.07it/s]

finished frames 1760000, mean/median reward 0.1/0.0, min/max reward 0.0/1.0


 18%|█▊        | 22102/125000 [39:23<2:51:12, 10.02it/s]

finished frames 1768000, mean/median reward 0.4/0.0, min/max reward 0.0/3.0


 18%|█▊        | 22202/125000 [39:34<3:04:40,  9.28it/s]

finished frames 1776000, mean/median reward 0.2/0.0, min/max reward 0.0/2.0


 18%|█▊        | 22302/125000 [39:44<3:10:53,  8.97it/s]

finished frames 1784000, mean/median reward 0.4/0.0, min/max reward 0.0/5.0


 18%|█▊        | 22401/125000 [39:55<2:53:51,  9.84it/s]

finished frames 1792000, mean/median reward 0.4/0.0, min/max reward 0.0/4.0


 18%|█▊        | 22502/125000 [40:06<3:18:44,  8.60it/s]

finished frames 1800000, mean/median reward 0.2/0.0, min/max reward 0.0/1.0


 18%|█▊        | 22603/125000 [40:17<2:50:42, 10.00it/s]

finished frames 1808000, mean/median reward 0.4/0.0, min/max reward 0.0/2.0


 18%|█▊        | 22702/125000 [40:27<3:07:50,  9.08it/s]

finished frames 1816000, mean/median reward 0.2/0.0, min/max reward 0.0/1.0


 18%|█▊        | 22802/125000 [40:38<3:14:52,  8.74it/s]

finished frames 1824000, mean/median reward 0.4/0.0, min/max reward 0.0/1.0


 18%|█▊        | 22902/125000 [40:49<2:49:11, 10.06it/s]

finished frames 1832000, mean/median reward 0.2/0.0, min/max reward 0.0/2.0


 18%|█▊        | 23002/125000 [40:59<3:08:51,  9.00it/s]

finished frames 1840000, mean/median reward 0.5/0.0, min/max reward 0.0/2.0


 18%|█▊        | 23102/125000 [41:10<3:23:47,  8.33it/s]

finished frames 1848000, mean/median reward 0.4/0.0, min/max reward 0.0/2.0


 19%|█▊        | 23202/125000 [41:21<3:19:17,  8.51it/s]

finished frames 1856000, mean/median reward 0.1/0.0, min/max reward 0.0/1.0


 19%|█▊        | 23302/125000 [41:32<3:09:26,  8.95it/s]

finished frames 1864000, mean/median reward 0.5/0.0, min/max reward 0.0/2.0


 19%|█▊        | 23402/125000 [41:43<2:44:03, 10.32it/s]

finished frames 1872000, mean/median reward 0.2/0.0, min/max reward 0.0/1.0


 19%|█▉        | 23502/125000 [41:54<3:08:45,  8.96it/s]

finished frames 1880000, mean/median reward 0.4/0.0, min/max reward 0.0/2.0


 19%|█▉        | 23602/125000 [42:04<3:04:47,  9.15it/s]

finished frames 1888000, mean/median reward 0.4/0.0, min/max reward 0.0/2.0


 19%|█▉        | 23701/125000 [42:15<2:49:59,  9.93it/s]

finished frames 1896000, mean/median reward 0.7/0.0, min/max reward 0.0/3.0


 19%|█▉        | 23802/125000 [42:26<3:02:01,  9.27it/s]

finished frames 1904000, mean/median reward 0.4/0.0, min/max reward 0.0/3.0


 19%|█▉        | 23903/125000 [42:36<2:57:29,  9.49it/s]

finished frames 1912000, mean/median reward 0.2/0.0, min/max reward 0.0/2.0


 19%|█▉        | 24002/125000 [42:47<2:59:31,  9.38it/s]

finished frames 1920000, mean/median reward 0.2/0.0, min/max reward 0.0/2.0


 19%|█▉        | 24102/125000 [42:58<3:09:30,  8.87it/s]

finished frames 1928000, mean/median reward 0.2/0.0, min/max reward 0.0/1.0


 19%|█▉        | 24203/125000 [43:08<2:55:26,  9.58it/s]

finished frames 1936000, mean/median reward 0.2/0.0, min/max reward 0.0/2.0


 19%|█▉        | 24302/125000 [43:19<3:03:24,  9.15it/s]

finished frames 1944000, mean/median reward 0.1/0.0, min/max reward 0.0/1.0


 20%|█▉        | 24402/125000 [43:30<3:11:29,  8.76it/s]

finished frames 1952000, mean/median reward 0.2/0.0, min/max reward 0.0/3.0


 20%|█▉        | 24502/125000 [43:40<3:05:18,  9.04it/s]

finished frames 1960000, mean/median reward 0.5/0.0, min/max reward 0.0/2.0


 20%|█▉        | 24602/125000 [43:51<2:49:03,  9.90it/s]

finished frames 1968000, mean/median reward 0.2/0.0, min/max reward 0.0/2.0


 20%|█▉        | 24702/125000 [44:02<3:01:55,  9.19it/s]

finished frames 1976000, mean/median reward 0.2/0.0, min/max reward 0.0/2.0


 20%|█▉        | 24802/125000 [44:12<3:02:17,  9.16it/s]

finished frames 1984000, mean/median reward 0.2/0.0, min/max reward 0.0/1.0


 20%|█▉        | 24901/125000 [44:23<2:49:47,  9.83it/s]

finished frames 1992000, mean/median reward 0.1/0.0, min/max reward 0.0/2.0


 20%|██        | 25002/125000 [44:34<3:08:47,  8.83it/s]

finished frames 2000000, mean/median reward 0.6/0.0, min/max reward 0.0/4.0


 20%|██        | 25103/125000 [44:45<2:41:47, 10.29it/s]

finished frames 2008000, mean/median reward 0.2/0.0, min/max reward 0.0/2.0


 20%|██        | 25202/125000 [44:55<3:01:17,  9.17it/s]

finished frames 2016000, mean/median reward 0.5/0.0, min/max reward 0.0/2.0


 20%|██        | 25302/125000 [45:06<3:01:06,  9.18it/s]

finished frames 2024000, mean/median reward 0.2/0.0, min/max reward 0.0/2.0


 20%|██        | 25403/125000 [45:17<2:47:03,  9.94it/s]

finished frames 2032000, mean/median reward 0.2/0.0, min/max reward 0.0/2.0


 20%|██        | 25502/125000 [45:27<2:58:25,  9.29it/s]

finished frames 2040000, mean/median reward 0.1/0.0, min/max reward 0.0/1.0


 20%|██        | 25602/125000 [45:38<3:10:01,  8.72it/s]

finished frames 2048000, mean/median reward 0.1/0.0, min/max reward 0.0/1.0


 21%|██        | 25702/125000 [45:49<2:48:54,  9.80it/s]

finished frames 2056000, mean/median reward 0.1/0.0, min/max reward 0.0/1.0


 21%|██        | 25802/125000 [45:59<3:01:53,  9.09it/s]

finished frames 2064000, mean/median reward 0.3/0.0, min/max reward 0.0/2.0


 21%|██        | 25902/125000 [46:10<3:12:19,  8.59it/s]

finished frames 2072000, mean/median reward 0.3/0.0, min/max reward 0.0/2.0


 21%|██        | 26002/125000 [46:21<2:37:14, 10.49it/s]

finished frames 2080000, mean/median reward 0.2/0.0, min/max reward 0.0/1.0


 21%|██        | 26102/125000 [46:31<3:01:47,  9.07it/s]

finished frames 2088000, mean/median reward 0.2/0.0, min/max reward 0.0/2.0


 21%|██        | 26202/125000 [46:42<3:02:25,  9.03it/s]

finished frames 2096000, mean/median reward 0.1/0.0, min/max reward 0.0/1.0


 21%|██        | 26301/125000 [46:53<2:41:27, 10.19it/s]

finished frames 2104000, mean/median reward 0.2/0.0, min/max reward 0.0/2.0


 21%|██        | 26402/125000 [47:03<2:58:41,  9.20it/s]

finished frames 2112000, mean/median reward 0.2/0.0, min/max reward 0.0/2.0


 21%|██        | 26502/125000 [47:14<3:14:04,  8.46it/s]

finished frames 2120000, mean/median reward 0.2/0.0, min/max reward 0.0/1.0


 21%|██▏       | 26602/125000 [47:25<2:59:20,  9.14it/s]

finished frames 2128000, mean/median reward 0.1/0.0, min/max reward 0.0/1.0


 21%|██▏       | 26702/125000 [47:36<2:56:21,  9.29it/s]

finished frames 2136000, mean/median reward 0.4/0.0, min/max reward 0.0/3.0


 21%|██▏       | 26802/125000 [47:46<2:53:43,  9.42it/s]

finished frames 2144000, mean/median reward 0.3/0.0, min/max reward 0.0/2.0


 22%|██▏       | 26902/125000 [47:57<2:52:48,  9.46it/s]

finished frames 2152000, mean/median reward 0.2/0.0, min/max reward 0.0/2.0


 22%|██▏       | 27002/125000 [48:08<2:57:54,  9.18it/s]

finished frames 2160000, mean/median reward 0.3/0.0, min/max reward 0.0/2.0


 22%|██▏       | 27102/125000 [48:18<2:45:50,  9.84it/s]

finished frames 2168000, mean/median reward 0.4/0.0, min/max reward 0.0/2.0


 22%|██▏       | 27202/125000 [48:29<2:54:33,  9.34it/s]

finished frames 2176000, mean/median reward 0.0/0.0, min/max reward 0.0/0.0


 22%|██▏       | 27302/125000 [48:40<2:59:51,  9.05it/s]

finished frames 2184000, mean/median reward 0.1/0.0, min/max reward 0.0/1.0


 22%|██▏       | 27402/125000 [48:51<2:42:38, 10.00it/s]

finished frames 2192000, mean/median reward 0.1/0.0, min/max reward 0.0/1.0


 22%|██▏       | 27502/125000 [49:01<3:03:52,  8.84it/s]

finished frames 2200000, mean/median reward 0.2/0.0, min/max reward 0.0/2.0


 22%|██▏       | 27602/125000 [49:12<2:59:41,  9.03it/s]

finished frames 2208000, mean/median reward 0.4/0.0, min/max reward 0.0/3.0


 22%|██▏       | 27702/125000 [49:23<2:40:17, 10.12it/s]

finished frames 2216000, mean/median reward 0.2/0.0, min/max reward 0.0/1.0


 22%|██▏       | 27802/125000 [49:34<3:01:47,  8.91it/s]

finished frames 2224000, mean/median reward 0.0/0.0, min/max reward 0.0/0.0


 22%|██▏       | 27903/125000 [49:44<2:42:50,  9.94it/s]

finished frames 2232000, mean/median reward 0.2/0.0, min/max reward 0.0/3.0


 22%|██▏       | 28001/125000 [49:55<2:42:51,  9.93it/s]

finished frames 2240000, mean/median reward 0.2/0.0, min/max reward 0.0/2.0


 22%|██▏       | 28102/125000 [50:06<3:05:09,  8.72it/s]

finished frames 2248000, mean/median reward 0.1/0.0, min/max reward 0.0/1.0


 23%|██▎       | 28203/125000 [50:16<2:53:51,  9.28it/s]

finished frames 2256000, mean/median reward 0.3/0.0, min/max reward 0.0/2.0


 23%|██▎       | 28302/125000 [50:27<2:53:03,  9.31it/s]

finished frames 2264000, mean/median reward 0.4/0.0, min/max reward 0.0/2.0


 23%|██▎       | 28402/125000 [50:38<3:04:34,  8.72it/s]

finished frames 2272000, mean/median reward 0.3/0.0, min/max reward 0.0/2.0


 23%|██▎       | 28503/125000 [50:48<2:43:20,  9.85it/s]

finished frames 2280000, mean/median reward 0.2/0.0, min/max reward 0.0/1.0


 23%|██▎       | 28601/125000 [50:59<2:42:59,  9.86it/s]

finished frames 2288000, mean/median reward 0.6/0.0, min/max reward 0.0/3.0


 23%|██▎       | 28702/125000 [51:10<3:12:21,  8.34it/s]

finished frames 2296000, mean/median reward 0.3/0.0, min/max reward 0.0/2.0


 23%|██▎       | 28801/125000 [51:20<2:43:46,  9.79it/s]

finished frames 2304000, mean/median reward 0.3/0.0, min/max reward 0.0/2.0


 23%|██▎       | 28902/125000 [51:31<3:13:13,  8.29it/s]

finished frames 2312000, mean/median reward 0.3/0.0, min/max reward 0.0/2.0


 23%|██▎       | 29002/125000 [51:42<2:56:32,  9.06it/s]

finished frames 2320000, mean/median reward 0.1/0.0, min/max reward 0.0/1.0


 23%|██▎       | 29102/125000 [51:52<2:45:52,  9.64it/s]

finished frames 2328000, mean/median reward 0.6/0.0, min/max reward 0.0/3.0


 23%|██▎       | 29202/125000 [52:03<2:45:31,  9.65it/s]

finished frames 2336000, mean/median reward 0.1/0.0, min/max reward 0.0/1.0


 23%|██▎       | 29302/125000 [52:14<3:03:35,  8.69it/s]

finished frames 2344000, mean/median reward 0.2/0.0, min/max reward 0.0/1.0


 24%|██▎       | 29402/125000 [52:25<2:38:18, 10.06it/s]

finished frames 2352000, mean/median reward 0.1/0.0, min/max reward 0.0/1.0


 24%|██▎       | 29502/125000 [52:35<2:57:02,  8.99it/s]

finished frames 2360000, mean/median reward 0.1/0.0, min/max reward 0.0/1.0


 24%|██▎       | 29602/125000 [52:46<2:56:30,  9.01it/s]

finished frames 2368000, mean/median reward 0.4/0.0, min/max reward 0.0/1.0


 24%|██▍       | 29702/125000 [52:57<2:42:43,  9.76it/s]

finished frames 2376000, mean/median reward 0.5/0.0, min/max reward 0.0/2.0


 24%|██▍       | 29802/125000 [53:07<2:58:30,  8.89it/s]

finished frames 2384000, mean/median reward 0.5/0.0, min/max reward 0.0/2.0


 24%|██▍       | 29903/125000 [53:18<2:53:30,  9.13it/s]

finished frames 2392000, mean/median reward 0.2/0.0, min/max reward 0.0/1.0


 24%|██▍       | 30002/125000 [53:29<2:45:54,  9.54it/s]

finished frames 2400000, mean/median reward 0.2/0.0, min/max reward 0.0/2.0


 24%|██▍       | 30102/125000 [53:40<3:02:16,  8.68it/s]

finished frames 2408000, mean/median reward 0.3/0.0, min/max reward 0.0/1.0


 24%|██▍       | 30203/125000 [53:50<2:43:02,  9.69it/s]

finished frames 2416000, mean/median reward 0.4/0.0, min/max reward 0.0/2.0


 24%|██▍       | 30301/125000 [54:01<2:34:56, 10.19it/s]

finished frames 2424000, mean/median reward 0.2/0.0, min/max reward 0.0/2.0


 24%|██▍       | 30402/125000 [54:11<2:49:41,  9.29it/s]

finished frames 2432000, mean/median reward 0.7/0.0, min/max reward 0.0/3.0


 24%|██▍       | 30502/125000 [54:22<2:58:12,  8.84it/s]

finished frames 2440000, mean/median reward 0.1/0.0, min/max reward 0.0/1.0


 24%|██▍       | 30601/125000 [54:33<2:34:28, 10.18it/s]

finished frames 2448000, mean/median reward 0.2/0.0, min/max reward 0.0/1.0


 25%|██▍       | 30702/125000 [54:44<2:57:13,  8.87it/s]

finished frames 2456000, mean/median reward 0.4/0.0, min/max reward 0.0/2.0


 25%|██▍       | 30803/125000 [54:54<2:59:43,  8.74it/s]

finished frames 2464000, mean/median reward 0.6/0.0, min/max reward 0.0/4.0


 25%|██▍       | 30901/125000 [55:05<2:38:30,  9.89it/s]

finished frames 2472000, mean/median reward 0.2/0.0, min/max reward 0.0/1.0


 25%|██▍       | 31002/125000 [55:16<2:58:26,  8.78it/s]

finished frames 2480000, mean/median reward 0.6/0.0, min/max reward 0.0/4.0


 25%|██▍       | 31102/125000 [55:26<2:41:14,  9.71it/s]

finished frames 2488000, mean/median reward 0.4/0.0, min/max reward 0.0/3.0


 25%|██▍       | 31202/125000 [55:37<3:06:32,  8.38it/s]

finished frames 2496000, mean/median reward 0.3/0.0, min/max reward 0.0/2.0


 25%|██▌       | 31302/125000 [55:48<2:56:41,  8.84it/s]

finished frames 2504000, mean/median reward 0.1/0.0, min/max reward 0.0/1.0


 25%|██▌       | 31403/125000 [55:59<2:23:50, 10.85it/s]

finished frames 2512000, mean/median reward 0.4/0.0, min/max reward 0.0/2.0


 25%|██▌       | 31502/125000 [56:09<2:46:08,  9.38it/s]

finished frames 2520000, mean/median reward 0.4/0.0, min/max reward 0.0/2.0


 25%|██▌       | 31602/125000 [56:20<2:54:18,  8.93it/s]

finished frames 2528000, mean/median reward 0.2/0.0, min/max reward 0.0/1.0


 25%|██▌       | 31701/125000 [56:31<2:26:53, 10.59it/s]

finished frames 2536000, mean/median reward 0.1/0.0, min/max reward 0.0/1.0


 25%|██▌       | 31802/125000 [56:41<2:48:13,  9.23it/s]

finished frames 2544000, mean/median reward 0.1/0.0, min/max reward 0.0/1.0


 26%|██▌       | 31902/125000 [56:52<3:00:22,  8.60it/s]

finished frames 2552000, mean/median reward 0.4/0.0, min/max reward 0.0/2.0


 26%|██▌       | 32001/125000 [57:03<2:40:02,  9.68it/s]

finished frames 2560000, mean/median reward 0.4/0.0, min/max reward 0.0/2.0


 26%|██▌       | 32102/125000 [57:14<2:48:53,  9.17it/s]

finished frames 2568000, mean/median reward 0.3/0.0, min/max reward 0.0/2.0


 26%|██▌       | 32202/125000 [57:24<2:47:05,  9.26it/s]

finished frames 2576000, mean/median reward 0.7/1.0, min/max reward 0.0/2.0


 26%|██▌       | 32302/125000 [57:35<2:48:43,  9.16it/s]

finished frames 2584000, mean/median reward 0.1/0.0, min/max reward 0.0/1.0


 26%|██▌       | 32402/125000 [57:46<2:53:55,  8.87it/s]

finished frames 2592000, mean/median reward 0.2/0.0, min/max reward 0.0/2.0


 26%|██▌       | 32502/125000 [57:56<2:33:41, 10.03it/s]

finished frames 2600000, mean/median reward 0.4/0.0, min/max reward 0.0/2.0


 26%|██▌       | 32602/125000 [58:07<2:49:19,  9.09it/s]

finished frames 2608000, mean/median reward 0.2/0.0, min/max reward 0.0/1.0


 26%|██▌       | 32702/125000 [58:18<3:05:11,  8.31it/s]

finished frames 2616000, mean/median reward 0.1/0.0, min/max reward 0.0/1.0


 26%|██▌       | 32803/125000 [58:29<2:22:42, 10.77it/s]

finished frames 2624000, mean/median reward 0.4/0.0, min/max reward 0.0/2.0


 26%|██▋       | 32902/125000 [58:39<2:52:24,  8.90it/s]

finished frames 2632000, mean/median reward 0.1/0.0, min/max reward 0.0/1.0


 26%|██▋       | 33002/125000 [58:50<2:47:18,  9.16it/s]

finished frames 2640000, mean/median reward 0.1/0.0, min/max reward 0.0/1.0


 26%|██▋       | 33102/125000 [59:01<2:30:16, 10.19it/s]

finished frames 2648000, mean/median reward 0.1/0.0, min/max reward 0.0/1.0


 27%|██▋       | 33202/125000 [59:11<2:55:32,  8.72it/s]

finished frames 2656000, mean/median reward 0.2/0.0, min/max reward 0.0/1.0


 27%|██▋       | 33302/125000 [59:22<3:08:22,  8.11it/s]

finished frames 2664000, mean/median reward 0.1/0.0, min/max reward 0.0/1.0


 27%|██▋       | 33402/125000 [59:33<2:35:38,  9.81it/s]

finished frames 2672000, mean/median reward 0.3/0.0, min/max reward 0.0/1.0


 27%|██▋       | 33502/125000 [59:43<2:46:06,  9.18it/s]

finished frames 2680000, mean/median reward 0.5/0.0, min/max reward 0.0/3.0


 27%|██▋       | 33602/125000 [59:54<2:51:12,  8.90it/s]

finished frames 2688000, mean/median reward 0.8/0.0, min/max reward 0.0/2.0


 27%|██▋       | 33701/125000 [1:00:05<2:25:05, 10.49it/s]

finished frames 2696000, mean/median reward 0.2/0.0, min/max reward 0.0/2.0


 27%|██▋       | 33802/125000 [1:00:15<2:48:15,  9.03it/s]

finished frames 2704000, mean/median reward 0.3/0.0, min/max reward 0.0/2.0


 27%|██▋       | 33902/125000 [1:00:26<2:55:04,  8.67it/s]

finished frames 2712000, mean/median reward 0.2/0.0, min/max reward 0.0/2.0


 27%|██▋       | 34002/125000 [1:00:37<2:29:55, 10.12it/s]

finished frames 2720000, mean/median reward 0.2/0.0, min/max reward 0.0/1.0


 27%|██▋       | 34102/125000 [1:00:47<2:45:59,  9.13it/s]

finished frames 2728000, mean/median reward 0.3/0.0, min/max reward 0.0/2.0


 27%|██▋       | 34202/125000 [1:00:58<2:52:27,  8.77it/s]

finished frames 2736000, mean/median reward 0.5/0.0, min/max reward 0.0/2.0


 27%|██▋       | 34302/125000 [1:01:09<2:28:58, 10.15it/s]

finished frames 2744000, mean/median reward 0.6/0.0, min/max reward 0.0/2.0


 28%|██▊       | 34402/125000 [1:01:20<2:58:02,  8.48it/s]

finished frames 2752000, mean/median reward 0.1/0.0, min/max reward 0.0/1.0


 28%|██▊       | 34502/125000 [1:01:30<2:38:39,  9.51it/s]

finished frames 2760000, mean/median reward 0.2/0.0, min/max reward 0.0/3.0


 28%|██▊       | 34602/125000 [1:01:41<2:51:41,  8.77it/s]

finished frames 2768000, mean/median reward 0.4/0.0, min/max reward 0.0/1.0


 28%|██▊       | 34702/125000 [1:01:52<2:47:54,  8.96it/s]

finished frames 2776000, mean/median reward 0.1/0.0, min/max reward 0.0/1.0


 28%|██▊       | 34803/125000 [1:02:03<2:25:46, 10.31it/s]

finished frames 2784000, mean/median reward 0.4/0.0, min/max reward 0.0/2.0


 28%|██▊       | 34902/125000 [1:02:13<2:37:22,  9.54it/s]

finished frames 2792000, mean/median reward 0.1/0.0, min/max reward 0.0/1.0


 28%|██▊       | 35002/125000 [1:02:24<2:51:29,  8.75it/s]

finished frames 2800000, mean/median reward 0.3/0.0, min/max reward 0.0/4.0


 28%|██▊       | 35102/125000 [1:02:34<2:39:11,  9.41it/s]

finished frames 2808000, mean/median reward 0.2/0.0, min/max reward 0.0/2.0


 28%|██▊       | 35202/125000 [1:02:45<2:42:51,  9.19it/s]

finished frames 2816000, mean/median reward 0.6/0.0, min/max reward 0.0/2.0


 28%|██▊       | 35302/125000 [1:02:56<2:47:59,  8.90it/s]

finished frames 2824000, mean/median reward 0.6/0.0, min/max reward 0.0/4.0


 28%|██▊       | 35403/125000 [1:03:07<2:24:42, 10.32it/s]

finished frames 2832000, mean/median reward 0.3/0.0, min/max reward 0.0/2.0


 28%|██▊       | 35502/125000 [1:03:17<2:58:24,  8.36it/s]

finished frames 2840000, mean/median reward 0.3/0.0, min/max reward 0.0/3.0


 28%|██▊       | 35602/125000 [1:03:28<2:52:57,  8.61it/s]

finished frames 2848000, mean/median reward 0.4/0.0, min/max reward 0.0/3.0


 29%|██▊       | 35702/125000 [1:03:39<2:28:34, 10.02it/s]

finished frames 2856000, mean/median reward 0.2/0.0, min/max reward 0.0/1.0


 29%|██▊       | 35802/125000 [1:03:50<2:53:40,  8.56it/s]

finished frames 2864000, mean/median reward 0.4/0.0, min/max reward 0.0/2.0


 29%|██▊       | 35903/125000 [1:04:00<2:34:36,  9.60it/s]

finished frames 2872000, mean/median reward 0.3/0.0, min/max reward 0.0/3.0


 29%|██▉       | 36001/125000 [1:04:11<2:29:34,  9.92it/s]

finished frames 2880000, mean/median reward 0.2/0.0, min/max reward 0.0/1.0


 29%|██▉       | 36102/125000 [1:04:21<2:47:43,  8.83it/s]

finished frames 2888000, mean/median reward 0.1/0.0, min/max reward 0.0/1.0


 29%|██▉       | 36202/125000 [1:04:32<2:50:38,  8.67it/s]

finished frames 2896000, mean/median reward 0.4/0.0, min/max reward 0.0/2.0


 29%|██▉       | 36302/125000 [1:04:43<2:43:11,  9.06it/s]

finished frames 2904000, mean/median reward 0.3/0.0, min/max reward 0.0/2.0


 29%|██▉       | 36402/125000 [1:04:54<2:54:16,  8.47it/s]

finished frames 2912000, mean/median reward 0.4/0.0, min/max reward 0.0/3.0


 29%|██▉       | 36502/125000 [1:05:04<2:33:01,  9.64it/s]

finished frames 2920000, mean/median reward 0.2/0.0, min/max reward 0.0/1.0


 29%|██▉       | 36602/125000 [1:05:15<2:53:06,  8.51it/s]

finished frames 2928000, mean/median reward 0.1/0.0, min/max reward 0.0/2.0


 29%|██▉       | 36702/125000 [1:05:26<2:47:04,  8.81it/s]

finished frames 2936000, mean/median reward 0.4/0.0, min/max reward 0.0/2.0


 29%|██▉       | 36802/125000 [1:05:37<2:25:41, 10.09it/s]

finished frames 2944000, mean/median reward 0.0/0.0, min/max reward 0.0/0.0


 30%|██▉       | 36902/125000 [1:05:47<2:37:55,  9.30it/s]

finished frames 2952000, mean/median reward 0.4/0.0, min/max reward 0.0/3.0


 30%|██▉       | 37002/125000 [1:05:58<2:45:44,  8.85it/s]

finished frames 2960000, mean/median reward 0.2/0.0, min/max reward 0.0/1.0


 30%|██▉       | 37102/125000 [1:06:08<2:29:50,  9.78it/s]

finished frames 2968000, mean/median reward 0.1/0.0, min/max reward 0.0/1.0


 30%|██▉       | 37202/125000 [1:06:19<2:35:28,  9.41it/s]

finished frames 2976000, mean/median reward 0.2/0.0, min/max reward 0.0/2.0


 30%|██▉       | 37302/125000 [1:06:30<2:39:30,  9.16it/s]

finished frames 2984000, mean/median reward 0.1/0.0, min/max reward 0.0/1.0


 30%|██▉       | 37402/125000 [1:06:40<2:30:22,  9.71it/s]

finished frames 2992000, mean/median reward 0.5/0.0, min/max reward 0.0/2.0


 30%|███       | 37502/125000 [1:06:51<2:41:44,  9.02it/s]

finished frames 3000000, mean/median reward 0.1/0.0, min/max reward 0.0/1.0


 30%|███       | 37602/125000 [1:07:02<2:54:46,  8.33it/s]

finished frames 3008000, mean/median reward 0.3/0.0, min/max reward 0.0/2.0


 30%|███       | 37702/125000 [1:07:12<2:30:41,  9.66it/s]

finished frames 3016000, mean/median reward 0.1/0.0, min/max reward 0.0/2.0


 30%|███       | 37802/125000 [1:07:23<2:45:51,  8.76it/s]

finished frames 3024000, mean/median reward 0.1/0.0, min/max reward 0.0/1.0


 30%|███       | 37902/125000 [1:07:34<2:40:03,  9.07it/s]

finished frames 3032000, mean/median reward 0.1/0.0, min/max reward 0.0/2.0


 30%|███       | 38002/125000 [1:07:44<2:28:15,  9.78it/s]

finished frames 3040000, mean/median reward 0.2/0.0, min/max reward 0.0/2.0


 30%|███       | 38102/125000 [1:07:55<2:27:28,  9.82it/s]

finished frames 3048000, mean/median reward 0.4/0.0, min/max reward 0.0/1.0


 31%|███       | 38202/125000 [1:08:06<2:41:12,  8.97it/s]

finished frames 3056000, mean/median reward 0.6/0.0, min/max reward 0.0/6.0


 31%|███       | 38303/125000 [1:08:17<2:21:01, 10.25it/s]

finished frames 3064000, mean/median reward 0.1/0.0, min/max reward 0.0/1.0


 31%|███       | 38402/125000 [1:08:27<2:32:17,  9.48it/s]

finished frames 3072000, mean/median reward 0.3/0.0, min/max reward 0.0/3.0


 31%|███       | 38502/125000 [1:08:38<2:46:19,  8.67it/s]

finished frames 3080000, mean/median reward 0.1/0.0, min/max reward 0.0/1.0


 31%|███       | 38603/125000 [1:08:49<2:22:06, 10.13it/s]

finished frames 3088000, mean/median reward 0.4/0.0, min/max reward 0.0/3.0


 31%|███       | 38702/125000 [1:08:59<2:42:40,  8.84it/s]

finished frames 3096000, mean/median reward 0.1/0.0, min/max reward 0.0/1.0


 31%|███       | 38802/125000 [1:09:10<2:52:34,  8.32it/s]

finished frames 3104000, mean/median reward 0.2/0.0, min/max reward 0.0/3.0


 31%|███       | 38902/125000 [1:09:21<2:25:00,  9.90it/s]

finished frames 3112000, mean/median reward 0.1/0.0, min/max reward 0.0/1.0


 31%|███       | 39002/125000 [1:09:32<2:47:21,  8.56it/s]

finished frames 3120000, mean/median reward 0.0/0.0, min/max reward 0.0/0.0


 31%|███▏      | 39103/125000 [1:09:42<2:31:33,  9.45it/s]

finished frames 3128000, mean/median reward 0.3/0.0, min/max reward 0.0/2.0


 31%|███▏      | 39201/125000 [1:09:53<2:24:39,  9.89it/s]

finished frames 3136000, mean/median reward 0.1/0.0, min/max reward 0.0/2.0


 31%|███▏      | 39302/125000 [1:10:04<2:37:57,  9.04it/s]

finished frames 3144000, mean/median reward 0.3/0.0, min/max reward 0.0/1.0


 32%|███▏      | 39403/125000 [1:10:15<2:20:59, 10.12it/s]

finished frames 3152000, mean/median reward 0.1/0.0, min/max reward 0.0/1.0


 32%|███▏      | 39502/125000 [1:10:25<2:40:24,  8.88it/s]

finished frames 3160000, mean/median reward 0.5/0.0, min/max reward 0.0/2.0


 32%|███▏      | 39602/125000 [1:10:36<2:37:50,  9.02it/s]

finished frames 3168000, mean/median reward 0.6/0.0, min/max reward 0.0/5.0


 32%|███▏      | 39702/125000 [1:10:47<2:13:25, 10.66it/s]

finished frames 3176000, mean/median reward 0.2/0.0, min/max reward 0.0/2.0


 32%|███▏      | 39802/125000 [1:10:57<2:34:34,  9.19it/s]

finished frames 3184000, mean/median reward 0.6/0.0, min/max reward 0.0/2.0


 32%|███▏      | 39902/125000 [1:11:08<2:42:15,  8.74it/s]

finished frames 3192000, mean/median reward 0.4/0.0, min/max reward 0.0/2.0


 32%|███▏      | 40002/125000 [1:11:19<2:22:03,  9.97it/s]

finished frames 3200000, mean/median reward 0.2/0.0, min/max reward 0.0/2.0


 32%|███▏      | 40102/125000 [1:11:29<2:38:54,  8.90it/s]

finished frames 3208000, mean/median reward 0.2/0.0, min/max reward 0.0/3.0


 32%|███▏      | 40202/125000 [1:11:40<2:39:33,  8.86it/s]

finished frames 3216000, mean/median reward 0.4/0.0, min/max reward 0.0/3.0


 32%|███▏      | 40301/125000 [1:11:51<2:24:07,  9.79it/s]

finished frames 3224000, mean/median reward 0.2/0.0, min/max reward 0.0/1.0


 32%|███▏      | 40402/125000 [1:12:02<2:44:46,  8.56it/s]

finished frames 3232000, mean/median reward 0.2/0.0, min/max reward 0.0/2.0


 32%|███▏      | 40502/125000 [1:12:13<2:22:37,  9.87it/s]

finished frames 3240000, mean/median reward 0.2/0.0, min/max reward 0.0/2.0


 32%|███▏      | 40602/125000 [1:12:23<2:36:56,  8.96it/s]

finished frames 3248000, mean/median reward 0.9/0.0, min/max reward 0.0/4.0


 33%|███▎      | 40702/125000 [1:12:34<2:43:33,  8.59it/s]

finished frames 3256000, mean/median reward 0.4/0.0, min/max reward 0.0/2.0


 33%|███▎      | 40802/125000 [1:12:45<2:16:21, 10.29it/s]

finished frames 3264000, mean/median reward 0.4/0.0, min/max reward 0.0/2.0


 33%|███▎      | 40902/125000 [1:12:55<2:28:54,  9.41it/s]

finished frames 3272000, mean/median reward 0.4/0.0, min/max reward 0.0/2.0


 33%|███▎      | 41002/125000 [1:13:06<2:42:43,  8.60it/s]

finished frames 3280000, mean/median reward 0.2/0.0, min/max reward 0.0/2.0


 33%|███▎      | 41102/125000 [1:13:17<2:34:12,  9.07it/s]

finished frames 3288000, mean/median reward 0.2/0.0, min/max reward 0.0/2.0


 33%|███▎      | 41202/125000 [1:13:27<2:30:01,  9.31it/s]

finished frames 3296000, mean/median reward 0.2/0.0, min/max reward 0.0/2.0


 33%|███▎      | 41302/125000 [1:13:38<2:36:20,  8.92it/s]

finished frames 3304000, mean/median reward 0.5/0.0, min/max reward 0.0/3.0


 33%|███▎      | 41402/125000 [1:13:49<2:20:07,  9.94it/s]

finished frames 3312000, mean/median reward 0.7/0.0, min/max reward 0.0/4.0


 33%|███▎      | 41502/125000 [1:14:00<2:36:14,  8.91it/s]

finished frames 3320000, mean/median reward 0.4/0.0, min/max reward 0.0/2.0


 33%|███▎      | 41602/125000 [1:14:10<2:38:38,  8.76it/s]

finished frames 3328000, mean/median reward 0.4/0.0, min/max reward 0.0/4.0


 33%|███▎      | 41702/125000 [1:14:21<2:16:36, 10.16it/s]

finished frames 3336000, mean/median reward 0.2/0.0, min/max reward 0.0/2.0


 33%|███▎      | 41802/125000 [1:14:32<2:32:19,  9.10it/s]

finished frames 3344000, mean/median reward 0.2/0.0, min/max reward 0.0/2.0


 34%|███▎      | 41902/125000 [1:14:42<2:30:31,  9.20it/s]

finished frames 3352000, mean/median reward 0.8/0.0, min/max reward 0.0/4.0


 34%|███▎      | 42002/125000 [1:14:53<2:27:31,  9.38it/s]

finished frames 3360000, mean/median reward 0.2/0.0, min/max reward 0.0/1.0


 34%|███▎      | 42102/125000 [1:15:04<2:32:31,  9.06it/s]

finished frames 3368000, mean/median reward 0.2/0.0, min/max reward 0.0/1.0


 34%|███▍      | 42203/125000 [1:15:14<2:15:48, 10.16it/s]

finished frames 3376000, mean/median reward 0.1/0.0, min/max reward 0.0/1.0


 34%|███▍      | 42302/125000 [1:15:25<2:29:32,  9.22it/s]

finished frames 3384000, mean/median reward 0.2/0.0, min/max reward 0.0/2.0


 34%|███▍      | 42402/125000 [1:15:36<2:31:23,  9.09it/s]

finished frames 3392000, mean/median reward 0.3/0.0, min/max reward 0.0/3.0


 34%|███▍      | 42503/125000 [1:15:47<2:12:12, 10.40it/s]

finished frames 3400000, mean/median reward 0.1/0.0, min/max reward 0.0/1.0


 34%|███▍      | 42602/125000 [1:15:57<2:29:56,  9.16it/s]

finished frames 3408000, mean/median reward 0.3/0.0, min/max reward 0.0/2.0


 34%|███▍      | 42702/125000 [1:16:08<2:34:18,  8.89it/s]

finished frames 3416000, mean/median reward 0.2/0.0, min/max reward 0.0/1.0


 34%|███▍      | 42803/125000 [1:16:19<2:14:46, 10.16it/s]

finished frames 3424000, mean/median reward 0.1/0.0, min/max reward 0.0/1.0


 34%|███▍      | 42902/125000 [1:16:29<2:28:09,  9.24it/s]

finished frames 3432000, mean/median reward 0.1/0.0, min/max reward 0.0/1.0


 34%|███▍      | 43002/125000 [1:16:40<2:34:07,  8.87it/s]

finished frames 3440000, mean/median reward 0.0/0.0, min/max reward 0.0/0.0


 34%|███▍      | 43102/125000 [1:16:51<2:12:19, 10.31it/s]

finished frames 3448000, mean/median reward 0.3/0.0, min/max reward 0.0/3.0


 35%|███▍      | 43202/125000 [1:17:01<2:28:19,  9.19it/s]

finished frames 3456000, mean/median reward 0.4/0.0, min/max reward 0.0/2.0


 35%|███▍      | 43302/125000 [1:17:12<2:30:49,  9.03it/s]

finished frames 3464000, mean/median reward 0.2/0.0, min/max reward 0.0/2.0


 35%|███▍      | 43401/125000 [1:17:23<2:17:44,  9.87it/s]

finished frames 3472000, mean/median reward 0.0/0.0, min/max reward 0.0/0.0


 35%|███▍      | 43502/125000 [1:17:34<2:35:57,  8.71it/s]

finished frames 3480000, mean/median reward 0.1/0.0, min/max reward 0.0/1.0


 35%|███▍      | 43603/125000 [1:17:45<2:16:00,  9.97it/s]

finished frames 3488000, mean/median reward 0.2/0.0, min/max reward 0.0/2.0


 35%|███▍      | 43702/125000 [1:17:55<2:25:01,  9.34it/s]

finished frames 3496000, mean/median reward 0.3/0.0, min/max reward 0.0/1.0


 35%|███▌      | 43802/125000 [1:18:06<2:44:21,  8.23it/s]

finished frames 3504000, mean/median reward 0.1/0.0, min/max reward 0.0/1.0


 35%|███▌      | 43902/125000 [1:18:17<2:14:00, 10.09it/s]

finished frames 3512000, mean/median reward 0.2/0.0, min/max reward 0.0/2.0


 35%|███▌      | 44002/125000 [1:18:27<2:32:34,  8.85it/s]

finished frames 3520000, mean/median reward 0.4/0.0, min/max reward 0.0/2.0


 35%|███▌      | 44102/125000 [1:18:38<2:40:32,  8.40it/s]

finished frames 3528000, mean/median reward 0.6/0.0, min/max reward 0.0/2.0


 35%|███▌      | 44203/125000 [1:18:49<2:10:59, 10.28it/s]

finished frames 3536000, mean/median reward 0.2/0.0, min/max reward 0.0/1.0


 35%|███▌      | 44302/125000 [1:18:59<2:28:12,  9.08it/s]

finished frames 3544000, mean/median reward 0.2/0.0, min/max reward 0.0/1.0


 36%|███▌      | 44402/125000 [1:19:10<2:38:23,  8.48it/s]

finished frames 3552000, mean/median reward 0.1/0.0, min/max reward 0.0/1.0


 36%|███▌      | 44501/125000 [1:19:21<2:14:22,  9.98it/s]

finished frames 3560000, mean/median reward 0.1/0.0, min/max reward 0.0/1.0


 36%|███▌      | 44602/125000 [1:19:32<2:29:57,  8.94it/s]

finished frames 3568000, mean/median reward 0.3/0.0, min/max reward 0.0/2.0


 36%|███▌      | 44703/125000 [1:19:42<2:20:21,  9.54it/s]

finished frames 3576000, mean/median reward 0.3/0.0, min/max reward 0.0/1.0


 36%|███▌      | 44802/125000 [1:19:53<2:20:19,  9.53it/s]

finished frames 3584000, mean/median reward 0.4/0.0, min/max reward 0.0/2.0


 36%|███▌      | 44902/125000 [1:20:04<2:34:25,  8.64it/s]

finished frames 3592000, mean/median reward 0.2/0.0, min/max reward 0.0/2.0


 36%|███▌      | 45003/125000 [1:20:15<2:11:47, 10.12it/s]

finished frames 3600000, mean/median reward 0.1/0.0, min/max reward 0.0/1.0


 36%|███▌      | 45102/125000 [1:20:25<2:36:50,  8.49it/s]

finished frames 3608000, mean/median reward 0.2/0.0, min/max reward 0.0/2.0


 36%|███▌      | 45202/125000 [1:20:36<2:29:05,  8.92it/s]

finished frames 3616000, mean/median reward 0.2/0.0, min/max reward 0.0/2.0


 36%|███▌      | 45303/125000 [1:20:47<2:08:26, 10.34it/s]

finished frames 3624000, mean/median reward 0.3/0.0, min/max reward 0.0/2.0


 36%|███▋      | 45402/125000 [1:20:57<2:32:30,  8.70it/s]

finished frames 3632000, mean/median reward 0.2/0.0, min/max reward 0.0/1.0


 36%|███▋      | 45502/125000 [1:21:08<2:31:36,  8.74it/s]

finished frames 3640000, mean/median reward 0.2/0.0, min/max reward 0.0/1.0


 36%|███▋      | 45601/125000 [1:21:19<2:07:41, 10.36it/s]

finished frames 3648000, mean/median reward 0.1/0.0, min/max reward 0.0/1.0


 37%|███▋      | 45702/125000 [1:21:30<2:52:29,  7.66it/s]

finished frames 3656000, mean/median reward 0.4/0.0, min/max reward 0.0/3.0


 37%|███▋      | 45803/125000 [1:21:40<2:23:05,  9.22it/s]

finished frames 3664000, mean/median reward 0.3/0.0, min/max reward 0.0/5.0


 37%|███▋      | 45902/125000 [1:21:51<2:23:51,  9.16it/s]

finished frames 3672000, mean/median reward 0.3/0.0, min/max reward 0.0/2.0


 37%|███▋      | 46002/125000 [1:22:02<2:23:35,  9.17it/s]

finished frames 3680000, mean/median reward 0.4/0.0, min/max reward 0.0/2.0


 37%|███▋      | 46102/125000 [1:22:12<2:19:20,  9.44it/s]

finished frames 3688000, mean/median reward 0.4/0.0, min/max reward 0.0/2.0


 37%|███▋      | 46202/125000 [1:22:23<2:28:19,  8.85it/s]

finished frames 3696000, mean/median reward 0.3/0.0, min/max reward 0.0/2.0


 37%|███▋      | 46302/125000 [1:22:34<2:27:25,  8.90it/s]

finished frames 3704000, mean/median reward 0.4/0.0, min/max reward 0.0/2.0


 37%|███▋      | 46401/125000 [1:22:45<2:02:19, 10.71it/s]

finished frames 3712000, mean/median reward 0.2/0.0, min/max reward 0.0/2.0


 37%|███▋      | 46502/125000 [1:22:55<2:27:57,  8.84it/s]

finished frames 3720000, mean/median reward 0.2/0.0, min/max reward 0.0/2.0


 37%|███▋      | 46602/125000 [1:23:06<2:36:08,  8.37it/s]

finished frames 3728000, mean/median reward 0.1/0.0, min/max reward 0.0/1.0


 37%|███▋      | 46702/125000 [1:23:17<2:33:18,  8.51it/s]

finished frames 3736000, mean/median reward 0.1/0.0, min/max reward 0.0/2.0


 37%|███▋      | 46802/125000 [1:23:28<2:32:02,  8.57it/s]

finished frames 3744000, mean/median reward 0.4/0.0, min/max reward 0.0/2.0


 38%|███▊      | 46902/125000 [1:23:38<2:24:45,  8.99it/s]

finished frames 3752000, mean/median reward 0.1/0.0, min/max reward 0.0/1.0


 38%|███▊      | 47001/125000 [1:23:49<2:11:58,  9.85it/s]

finished frames 3760000, mean/median reward 0.2/0.0, min/max reward 0.0/2.0


 38%|███▊      | 47102/125000 [1:24:00<2:22:34,  9.11it/s]

finished frames 3768000, mean/median reward 0.2/0.0, min/max reward 0.0/1.0


 38%|███▊      | 47203/125000 [1:24:10<2:10:09,  9.96it/s]

finished frames 3776000, mean/median reward 0.1/0.0, min/max reward 0.0/1.0


 38%|███▊      | 47301/125000 [1:24:21<2:10:54,  9.89it/s]

finished frames 3784000, mean/median reward 0.4/0.0, min/max reward 0.0/2.0


 38%|███▊      | 47402/125000 [1:24:32<2:25:10,  8.91it/s]

finished frames 3792000, mean/median reward 0.2/0.0, min/max reward 0.0/1.0


 38%|███▊      | 47501/125000 [1:24:43<2:03:04, 10.49it/s]

finished frames 3800000, mean/median reward 0.2/0.0, min/max reward 0.0/2.0


 38%|███▊      | 47602/125000 [1:24:53<2:23:35,  8.98it/s]

finished frames 3808000, mean/median reward 0.0/0.0, min/max reward 0.0/0.0


 38%|███▊      | 47702/125000 [1:25:04<2:20:14,  9.19it/s]

finished frames 3816000, mean/median reward 0.2/0.0, min/max reward 0.0/2.0


 38%|███▊      | 47802/125000 [1:25:15<2:29:47,  8.59it/s]

finished frames 3824000, mean/median reward 0.1/0.0, min/max reward 0.0/2.0


 38%|███▊      | 47902/125000 [1:25:26<2:23:48,  8.94it/s]

finished frames 3832000, mean/median reward 0.2/0.0, min/max reward 0.0/1.0


 38%|███▊      | 48002/125000 [1:25:37<2:00:37, 10.64it/s]

finished frames 3840000, mean/median reward 1.1/1.0, min/max reward 0.0/6.0


 38%|███▊      | 48102/125000 [1:25:47<2:21:18,  9.07it/s]

finished frames 3848000, mean/median reward 0.3/0.0, min/max reward 0.0/2.0


 39%|███▊      | 48202/125000 [1:25:58<2:36:28,  8.18it/s]

finished frames 3856000, mean/median reward 0.1/0.0, min/max reward 0.0/1.0


 39%|███▊      | 48301/125000 [1:26:09<2:04:36, 10.26it/s]

finished frames 3864000, mean/median reward 0.3/0.0, min/max reward 0.0/2.0


 39%|███▊      | 48402/125000 [1:26:20<2:22:21,  8.97it/s]

finished frames 3872000, mean/median reward 0.1/0.0, min/max reward 0.0/1.0


 39%|███▉      | 48502/125000 [1:26:30<2:25:57,  8.74it/s]

finished frames 3880000, mean/median reward 0.2/0.0, min/max reward 0.0/1.0


 39%|███▉      | 48602/125000 [1:26:41<2:08:10,  9.93it/s]

finished frames 3888000, mean/median reward 0.2/0.0, min/max reward 0.0/2.0


 39%|███▉      | 48702/125000 [1:26:52<2:26:10,  8.70it/s]

finished frames 3896000, mean/median reward 0.6/0.0, min/max reward 0.0/2.0


 39%|███▉      | 48803/125000 [1:27:02<2:10:22,  9.74it/s]

finished frames 3904000, mean/median reward 0.4/0.0, min/max reward 0.0/2.0


 39%|███▉      | 48901/125000 [1:27:13<2:02:48, 10.33it/s]

finished frames 3912000, mean/median reward 0.2/0.0, min/max reward 0.0/2.0


 39%|███▉      | 49002/125000 [1:27:24<2:35:57,  8.12it/s]

finished frames 3920000, mean/median reward 0.8/0.0, min/max reward 0.0/3.0


 39%|███▉      | 49102/125000 [1:27:34<2:18:53,  9.11it/s]

finished frames 3928000, mean/median reward 0.2/0.0, min/max reward 0.0/1.0


 39%|███▉      | 49202/125000 [1:27:45<2:13:37,  9.45it/s]

finished frames 3936000, mean/median reward 0.2/0.0, min/max reward 0.0/2.0


 39%|███▉      | 49302/125000 [1:27:56<2:20:38,  8.97it/s]

finished frames 3944000, mean/median reward 0.1/0.0, min/max reward 0.0/1.0


 40%|███▉      | 49403/125000 [1:28:07<1:56:56, 10.77it/s]

finished frames 3952000, mean/median reward 0.4/0.0, min/max reward 0.0/2.0


 40%|███▉      | 49502/125000 [1:28:17<2:14:40,  9.34it/s]

finished frames 3960000, mean/median reward 0.2/0.0, min/max reward 0.0/1.0


 40%|███▉      | 49602/125000 [1:28:28<2:23:48,  8.74it/s]

finished frames 3968000, mean/median reward 0.5/0.0, min/max reward 0.0/2.0


 40%|███▉      | 49702/125000 [1:28:39<2:00:24, 10.42it/s]

finished frames 3976000, mean/median reward 0.2/0.0, min/max reward 0.0/2.0


 40%|███▉      | 49802/125000 [1:28:49<2:17:22,  9.12it/s]

finished frames 3984000, mean/median reward 0.2/0.0, min/max reward 0.0/2.0


 40%|███▉      | 49902/125000 [1:29:00<2:19:03,  9.00it/s]

finished frames 3992000, mean/median reward 0.5/0.0, min/max reward 0.0/2.0


 40%|████      | 50002/125000 [1:29:11<2:18:35,  9.02it/s]

finished frames 4000000, mean/median reward 0.1/0.0, min/max reward 0.0/1.0


 40%|████      | 50102/125000 [1:29:22<2:23:39,  8.69it/s]

finished frames 4008000, mean/median reward 0.2/0.0, min/max reward 0.0/1.0


 40%|████      | 50203/125000 [1:29:33<2:01:32, 10.26it/s]

finished frames 4016000, mean/median reward 0.1/0.0, min/max reward 0.0/1.0


 40%|████      | 50302/125000 [1:29:43<2:22:59,  8.71it/s]

finished frames 4024000, mean/median reward 0.2/0.0, min/max reward 0.0/1.0


 40%|████      | 50402/125000 [1:29:54<2:19:06,  8.94it/s]

finished frames 4032000, mean/median reward 0.1/0.0, min/max reward 0.0/1.0


 40%|████      | 50502/125000 [1:30:05<2:09:05,  9.62it/s]

finished frames 4040000, mean/median reward 0.1/0.0, min/max reward 0.0/1.0


 40%|████      | 50602/125000 [1:30:15<2:13:28,  9.29it/s]

finished frames 4048000, mean/median reward 0.3/0.0, min/max reward 0.0/2.0


 41%|████      | 50703/125000 [1:30:26<2:07:49,  9.69it/s]

finished frames 4056000, mean/median reward 0.1/0.0, min/max reward 0.0/2.0


 41%|████      | 50802/125000 [1:30:37<2:12:30,  9.33it/s]

finished frames 4064000, mean/median reward 0.1/0.0, min/max reward 0.0/1.0


 41%|████      | 50902/125000 [1:30:48<2:16:28,  9.05it/s]

finished frames 4072000, mean/median reward 0.5/0.0, min/max reward 0.0/3.0


 41%|████      | 51002/125000 [1:30:58<2:08:49,  9.57it/s]

finished frames 4080000, mean/median reward 0.1/0.0, min/max reward 0.0/1.0


 41%|████      | 51102/125000 [1:31:09<2:11:13,  9.39it/s]

finished frames 4088000, mean/median reward 0.4/0.0, min/max reward 0.0/3.0


 41%|████      | 51202/125000 [1:31:20<2:22:08,  8.65it/s]

finished frames 4096000, mean/median reward 0.2/0.0, min/max reward 0.0/1.0


 41%|████      | 51302/125000 [1:31:31<2:04:08,  9.89it/s]

finished frames 4104000, mean/median reward 0.4/0.0, min/max reward 0.0/2.0


 41%|████      | 51402/125000 [1:31:41<2:10:52,  9.37it/s]

finished frames 4112000, mean/median reward 0.4/0.0, min/max reward 0.0/3.0


 41%|████      | 51502/125000 [1:31:52<2:15:46,  9.02it/s]

finished frames 4120000, mean/median reward 0.2/0.0, min/max reward 0.0/2.0


 41%|████▏     | 51601/125000 [1:32:03<1:56:47, 10.47it/s]

finished frames 4128000, mean/median reward 0.5/0.0, min/max reward 0.0/2.0


 41%|████▏     | 51702/125000 [1:32:14<2:19:32,  8.75it/s]

finished frames 4136000, mean/median reward 0.5/0.0, min/max reward 0.0/2.0


 41%|████▏     | 51803/125000 [1:32:24<2:03:35,  9.87it/s]

finished frames 4144000, mean/median reward 0.2/0.0, min/max reward 0.0/2.0


 42%|████▏     | 51902/125000 [1:32:35<2:12:29,  9.20it/s]

finished frames 4152000, mean/median reward 0.2/0.0, min/max reward 0.0/3.0


 42%|████▏     | 52002/125000 [1:32:46<2:17:20,  8.86it/s]

finished frames 4160000, mean/median reward 0.1/0.0, min/max reward 0.0/1.0


 42%|████▏     | 52102/125000 [1:32:56<2:03:16,  9.86it/s]

finished frames 4168000, mean/median reward 0.4/0.0, min/max reward 0.0/3.0


 42%|████▏     | 52201/125000 [1:33:07<2:04:11,  9.77it/s]

finished frames 4176000, mean/median reward 0.4/0.0, min/max reward 0.0/2.0


 42%|████▏     | 52302/125000 [1:33:18<2:13:21,  9.09it/s]

finished frames 4184000, mean/median reward 0.4/0.0, min/max reward 0.0/1.0


 42%|████▏     | 52402/125000 [1:33:29<1:58:06, 10.24it/s]

finished frames 4192000, mean/median reward 0.3/0.0, min/max reward 0.0/2.0


 42%|████▏     | 52502/125000 [1:33:39<2:10:40,  9.25it/s]

finished frames 4200000, mean/median reward 0.3/0.0, min/max reward 0.0/2.0


 42%|████▏     | 52602/125000 [1:33:50<2:20:20,  8.60it/s]

finished frames 4208000, mean/median reward 0.1/0.0, min/max reward 0.0/1.0


 42%|████▏     | 52702/125000 [1:34:01<2:06:23,  9.53it/s]

finished frames 4216000, mean/median reward 0.4/0.0, min/max reward 0.0/2.0


 42%|████▏     | 52802/125000 [1:34:12<2:13:50,  8.99it/s]

finished frames 4224000, mean/median reward 0.1/0.0, min/max reward 0.0/1.0


 42%|████▏     | 52903/125000 [1:34:22<2:15:46,  8.85it/s]

finished frames 4232000, mean/median reward 0.2/0.0, min/max reward 0.0/2.0


 42%|████▏     | 53002/125000 [1:34:33<2:06:52,  9.46it/s]

finished frames 4240000, mean/median reward 0.2/0.0, min/max reward 0.0/1.0


 42%|████▏     | 53102/125000 [1:34:44<2:19:19,  8.60it/s]

finished frames 4248000, mean/median reward 0.3/0.0, min/max reward 0.0/2.0


 43%|████▎     | 53203/125000 [1:34:55<1:53:52, 10.51it/s]

finished frames 4256000, mean/median reward 0.3/0.0, min/max reward 0.0/1.0


 43%|████▎     | 53302/125000 [1:35:05<2:16:48,  8.73it/s]

finished frames 4264000, mean/median reward 0.1/0.0, min/max reward 0.0/1.0


 43%|████▎     | 53402/125000 [1:35:16<2:23:31,  8.31it/s]

finished frames 4272000, mean/median reward 0.1/0.0, min/max reward 0.0/1.0


 43%|████▎     | 53502/125000 [1:35:27<2:01:54,  9.77it/s]

finished frames 4280000, mean/median reward 0.2/0.0, min/max reward 0.0/1.0


 43%|████▎     | 53602/125000 [1:35:37<2:10:30,  9.12it/s]

finished frames 4288000, mean/median reward 0.4/0.0, min/max reward 0.0/3.0


 43%|████▎     | 53702/125000 [1:35:48<2:17:54,  8.62it/s]

finished frames 4296000, mean/median reward 0.2/0.0, min/max reward 0.0/1.0


 43%|████▎     | 53801/125000 [1:35:59<1:55:19, 10.29it/s]

finished frames 4304000, mean/median reward 0.2/0.0, min/max reward 0.0/1.0


 43%|████▎     | 53902/125000 [1:36:09<2:16:51,  8.66it/s]

finished frames 4312000, mean/median reward 0.2/0.0, min/max reward 0.0/1.0


 43%|████▎     | 54002/125000 [1:36:20<2:12:33,  8.93it/s]

finished frames 4320000, mean/median reward 0.4/0.0, min/max reward 0.0/2.0


 43%|████▎     | 54101/125000 [1:36:31<1:54:56, 10.28it/s]

finished frames 4328000, mean/median reward 0.6/0.0, min/max reward 0.0/3.0


 43%|████▎     | 54202/125000 [1:36:41<2:06:53,  9.30it/s]

finished frames 4336000, mean/median reward 0.4/0.0, min/max reward 0.0/2.0


 43%|████▎     | 54302/125000 [1:36:52<2:08:49,  9.15it/s]

finished frames 4344000, mean/median reward 0.2/0.0, min/max reward 0.0/2.0


 44%|████▎     | 54402/125000 [1:37:03<2:13:46,  8.80it/s]

finished frames 4352000, mean/median reward 0.4/0.0, min/max reward 0.0/2.0


 44%|████▎     | 54502/125000 [1:37:14<2:18:01,  8.51it/s]

finished frames 4360000, mean/median reward 0.1/0.0, min/max reward 0.0/1.0


 44%|████▎     | 54602/125000 [1:37:25<1:48:39, 10.80it/s]

finished frames 4368000, mean/median reward 0.3/0.0, min/max reward 0.0/2.0


 44%|████▍     | 54702/125000 [1:37:35<2:03:22,  9.50it/s]

finished frames 4376000, mean/median reward 0.4/0.0, min/max reward 0.0/2.0


 44%|████▍     | 54802/125000 [1:37:46<2:12:53,  8.80it/s]

finished frames 4384000, mean/median reward 0.6/0.0, min/max reward 0.0/3.0


 44%|████▍     | 54902/125000 [1:37:57<2:00:10,  9.72it/s]

finished frames 4392000, mean/median reward 0.6/0.0, min/max reward 0.0/3.0


 44%|████▍     | 55002/125000 [1:38:07<2:06:38,  9.21it/s]

finished frames 4400000, mean/median reward 0.2/0.0, min/max reward 0.0/2.0


 44%|████▍     | 55102/125000 [1:38:18<2:08:20,  9.08it/s]

finished frames 4408000, mean/median reward 0.4/0.0, min/max reward 0.0/1.0


 44%|████▍     | 55201/125000 [1:38:29<1:58:40,  9.80it/s]

finished frames 4416000, mean/median reward 0.3/0.0, min/max reward 0.0/1.0


 44%|████▍     | 55302/125000 [1:38:40<2:05:46,  9.24it/s]

finished frames 4424000, mean/median reward 0.0/0.0, min/max reward 0.0/0.0


 44%|████▍     | 55403/125000 [1:38:51<1:50:00, 10.54it/s]

finished frames 4432000, mean/median reward 0.4/0.0, min/max reward 0.0/2.0


 44%|████▍     | 55502/125000 [1:39:01<2:07:58,  9.05it/s]

finished frames 4440000, mean/median reward 0.3/0.0, min/max reward 0.0/2.0


 44%|████▍     | 55602/125000 [1:39:12<2:09:48,  8.91it/s]

finished frames 4448000, mean/median reward 0.4/0.0, min/max reward 0.0/2.0


 45%|████▍     | 55702/125000 [1:39:23<1:54:48, 10.06it/s]

finished frames 4456000, mean/median reward 0.1/0.0, min/max reward 0.0/1.0


 45%|████▍     | 55802/125000 [1:39:33<2:06:44,  9.10it/s]

finished frames 4464000, mean/median reward 0.1/0.0, min/max reward 0.0/1.0


 45%|████▍     | 55902/125000 [1:39:44<2:11:30,  8.76it/s]

finished frames 4472000, mean/median reward 0.2/0.0, min/max reward 0.0/2.0


 45%|████▍     | 56002/125000 [1:39:55<2:00:24,  9.55it/s]

finished frames 4480000, mean/median reward 0.4/0.0, min/max reward 0.0/2.0


 45%|████▍     | 56102/125000 [1:40:06<2:06:43,  9.06it/s]

finished frames 4488000, mean/median reward 0.6/0.0, min/max reward 0.0/2.0


 45%|████▍     | 56202/125000 [1:40:16<2:01:31,  9.44it/s]

finished frames 4496000, mean/median reward 0.4/0.0, min/max reward 0.0/2.0


 45%|████▌     | 56302/125000 [1:40:27<2:15:36,  8.44it/s]

finished frames 4504000, mean/median reward 0.1/0.0, min/max reward 0.0/1.0


 45%|████▌     | 56402/125000 [1:40:38<2:07:07,  8.99it/s]

finished frames 4512000, mean/median reward 0.6/0.0, min/max reward 0.0/2.0


 45%|████▌     | 56502/125000 [1:40:49<1:59:22,  9.56it/s]

finished frames 4520000, mean/median reward 0.3/0.0, min/max reward 0.0/2.0


 45%|████▌     | 56602/125000 [1:40:59<2:13:22,  8.55it/s]

finished frames 4528000, mean/median reward 0.1/0.0, min/max reward 0.0/1.0


 45%|████▌     | 56703/125000 [1:41:10<1:56:39,  9.76it/s]

finished frames 4536000, mean/median reward 0.4/0.0, min/max reward 0.0/2.0


 45%|████▌     | 56802/125000 [1:41:21<1:57:43,  9.66it/s]

finished frames 4544000, mean/median reward 0.4/0.0, min/max reward 0.0/2.0


 46%|████▌     | 56902/125000 [1:41:32<2:02:15,  9.28it/s]

finished frames 4552000, mean/median reward 0.2/0.0, min/max reward 0.0/1.0


 46%|████▌     | 57002/125000 [1:41:42<1:53:00, 10.03it/s]

finished frames 4560000, mean/median reward 0.0/0.0, min/max reward 0.0/0.0


 46%|████▌     | 57102/125000 [1:41:53<2:10:58,  8.64it/s]

finished frames 4568000, mean/median reward 0.8/0.0, min/max reward 0.0/4.0


 46%|████▌     | 57202/125000 [1:42:04<2:07:51,  8.84it/s]

finished frames 4576000, mean/median reward 0.2/0.0, min/max reward 0.0/1.0


 46%|████▌     | 57302/125000 [1:42:15<1:49:04, 10.34it/s]

finished frames 4584000, mean/median reward 0.2/0.0, min/max reward 0.0/1.0


 46%|████▌     | 57402/125000 [1:42:25<2:02:29,  9.20it/s]

finished frames 4592000, mean/median reward 0.2/0.0, min/max reward 0.0/1.0


 46%|████▌     | 57502/125000 [1:42:36<2:05:46,  8.94it/s]

finished frames 4600000, mean/median reward 0.1/0.0, min/max reward 0.0/1.0


 46%|████▌     | 57602/125000 [1:42:47<1:51:49, 10.05it/s]

finished frames 4608000, mean/median reward 0.2/0.0, min/max reward 0.0/2.0


 46%|████▌     | 57702/125000 [1:42:58<2:05:24,  8.94it/s]

finished frames 4616000, mean/median reward 0.1/0.0, min/max reward 0.0/1.0


 46%|████▌     | 57803/125000 [1:43:08<2:00:28,  9.30it/s]

finished frames 4624000, mean/median reward 0.2/0.0, min/max reward 0.0/2.0


 46%|████▋     | 57902/125000 [1:43:19<1:55:42,  9.66it/s]

finished frames 4632000, mean/median reward 0.2/0.0, min/max reward 0.0/2.0


 46%|████▋     | 58002/125000 [1:43:30<2:10:48,  8.54it/s]

finished frames 4640000, mean/median reward 0.4/0.0, min/max reward 0.0/2.0


 46%|████▋     | 58102/125000 [1:43:40<1:58:59,  9.37it/s]

finished frames 4648000, mean/median reward 0.4/0.0, min/max reward 0.0/2.0


 47%|████▋     | 58202/125000 [1:43:51<2:00:29,  9.24it/s]

finished frames 4656000, mean/median reward 0.2/0.0, min/max reward 0.0/1.0


 47%|████▋     | 58302/125000 [1:44:02<2:05:13,  8.88it/s]

finished frames 4664000, mean/median reward 0.1/0.0, min/max reward 0.0/1.0


 47%|████▋     | 58402/125000 [1:44:13<1:45:06, 10.56it/s]

finished frames 4672000, mean/median reward 0.3/0.0, min/max reward 0.0/2.0


 47%|████▋     | 58502/125000 [1:44:23<2:07:20,  8.70it/s]

finished frames 4680000, mean/median reward 0.2/0.0, min/max reward 0.0/1.0


 47%|████▋     | 58602/125000 [1:44:34<2:06:56,  8.72it/s]

finished frames 4688000, mean/median reward 0.2/0.0, min/max reward 0.0/1.0


 47%|████▋     | 58702/125000 [1:44:45<1:56:27,  9.49it/s]

finished frames 4696000, mean/median reward 0.1/0.0, min/max reward 0.0/1.0


 47%|████▋     | 58802/125000 [1:44:56<2:02:46,  8.99it/s]

finished frames 4704000, mean/median reward 0.2/0.0, min/max reward 0.0/2.0


 47%|████▋     | 58903/125000 [1:45:06<1:52:21,  9.80it/s]

finished frames 4712000, mean/median reward 0.2/0.0, min/max reward 0.0/2.0


 47%|████▋     | 59002/125000 [1:45:17<2:00:09,  9.15it/s]

finished frames 4720000, mean/median reward 0.1/0.0, min/max reward 0.0/1.0


 47%|████▋     | 59102/125000 [1:45:28<2:01:25,  9.04it/s]

finished frames 4728000, mean/median reward 0.1/0.0, min/max reward 0.0/2.0


 47%|████▋     | 59203/125000 [1:45:39<1:45:47, 10.37it/s]

finished frames 4736000, mean/median reward 0.2/0.0, min/max reward 0.0/1.0


 47%|████▋     | 59302/125000 [1:45:49<2:01:41,  9.00it/s]

finished frames 4744000, mean/median reward 0.4/0.0, min/max reward 0.0/3.0


 48%|████▊     | 59402/125000 [1:46:00<1:59:33,  9.14it/s]

finished frames 4752000, mean/median reward 0.4/0.0, min/max reward 0.0/4.0


 48%|████▊     | 59502/125000 [1:46:11<1:51:36,  9.78it/s]

finished frames 4760000, mean/median reward 0.2/0.0, min/max reward 0.0/3.0


 48%|████▊     | 59602/125000 [1:46:21<2:01:17,  8.99it/s]

finished frames 4768000, mean/median reward 0.4/0.0, min/max reward 0.0/2.0


 48%|████▊     | 59702/125000 [1:46:32<1:59:18,  9.12it/s]

finished frames 4776000, mean/median reward 0.4/0.0, min/max reward 0.0/2.0


 48%|████▊     | 59802/125000 [1:46:43<1:46:17, 10.22it/s]

finished frames 4784000, mean/median reward 0.2/0.0, min/max reward 0.0/1.0


 48%|████▊     | 59902/125000 [1:46:53<2:06:11,  8.60it/s]

finished frames 4792000, mean/median reward 0.2/0.0, min/max reward 0.0/3.0


 48%|████▊     | 60002/125000 [1:47:04<2:05:57,  8.60it/s]

finished frames 4800000, mean/median reward 0.2/0.0, min/max reward 0.0/2.0


 48%|████▊     | 60102/125000 [1:47:15<1:53:24,  9.54it/s]

finished frames 4808000, mean/median reward 0.4/0.0, min/max reward 0.0/3.0


 48%|████▊     | 60202/125000 [1:47:26<1:59:20,  9.05it/s]

finished frames 4816000, mean/median reward 0.1/0.0, min/max reward 0.0/1.0


 48%|████▊     | 60302/125000 [1:47:36<1:50:10,  9.79it/s]

finished frames 4824000, mean/median reward 0.4/0.0, min/max reward 0.0/4.0


 48%|████▊     | 60402/125000 [1:47:47<1:55:36,  9.31it/s]

finished frames 4832000, mean/median reward 0.1/0.0, min/max reward 0.0/1.0


 48%|████▊     | 60502/125000 [1:47:58<2:08:00,  8.40it/s]

finished frames 4840000, mean/median reward 0.4/0.0, min/max reward 0.0/3.0


 48%|████▊     | 60602/125000 [1:48:09<1:46:31, 10.08it/s]

finished frames 4848000, mean/median reward 0.3/0.0, min/max reward 0.0/2.0


 49%|████▊     | 60702/125000 [1:48:20<2:04:00,  8.64it/s]

finished frames 4856000, mean/median reward 0.3/0.0, min/max reward 0.0/2.0


 49%|████▊     | 60803/125000 [1:48:30<1:53:45,  9.41it/s]

finished frames 4864000, mean/median reward 0.1/0.0, min/max reward 0.0/1.0


 49%|████▊     | 60901/125000 [1:48:41<1:44:33, 10.22it/s]

finished frames 4872000, mean/median reward 0.2/0.0, min/max reward 0.0/2.0


 49%|████▉     | 61002/125000 [1:48:52<1:58:37,  8.99it/s]

finished frames 4880000, mean/median reward 0.4/0.0, min/max reward 0.0/4.0


 49%|████▉     | 61103/125000 [1:49:03<1:45:16, 10.12it/s]

finished frames 4888000, mean/median reward 0.1/0.0, min/max reward 0.0/2.0


 49%|████▉     | 61202/125000 [1:49:13<1:58:51,  8.95it/s]

finished frames 4896000, mean/median reward 0.1/0.0, min/max reward 0.0/1.0


 49%|████▉     | 61302/125000 [1:49:24<1:56:44,  9.09it/s]

finished frames 4904000, mean/median reward 0.5/0.0, min/max reward 0.0/3.0


 49%|████▉     | 61402/125000 [1:49:35<1:42:29, 10.34it/s]

finished frames 4912000, mean/median reward 0.2/0.0, min/max reward 0.0/1.0


 49%|████▉     | 61502/125000 [1:49:45<1:53:22,  9.33it/s]

finished frames 4920000, mean/median reward 0.4/0.0, min/max reward 0.0/4.0


 49%|████▉     | 61602/125000 [1:49:56<2:00:01,  8.80it/s]

finished frames 4928000, mean/median reward 0.2/0.0, min/max reward 0.0/1.0


 49%|████▉     | 61701/125000 [1:50:07<1:42:31, 10.29it/s]

finished frames 4936000, mean/median reward 0.2/0.0, min/max reward 0.0/2.0


 49%|████▉     | 61802/125000 [1:50:17<1:55:23,  9.13it/s]

finished frames 4944000, mean/median reward 0.7/1.0, min/max reward 0.0/2.0


 50%|████▉     | 61902/125000 [1:50:28<1:58:14,  8.89it/s]

finished frames 4952000, mean/median reward 0.2/0.0, min/max reward 0.0/1.0


 50%|████▉     | 62002/125000 [1:50:39<1:51:06,  9.45it/s]

finished frames 4960000, mean/median reward 0.2/0.0, min/max reward 0.0/1.0


 50%|████▉     | 62102/125000 [1:50:50<1:56:31,  9.00it/s]

finished frames 4968000, mean/median reward 0.3/0.0, min/max reward 0.0/2.0


 50%|████▉     | 62202/125000 [1:51:00<1:52:33,  9.30it/s]

finished frames 4976000, mean/median reward 0.3/0.0, min/max reward 0.0/2.0


 50%|████▉     | 62302/125000 [1:51:11<1:52:40,  9.27it/s]

finished frames 4984000, mean/median reward 0.5/0.0, min/max reward 0.0/3.0


 50%|████▉     | 62402/125000 [1:51:22<2:00:34,  8.65it/s]

finished frames 4992000, mean/median reward 0.1/0.0, min/max reward 0.0/1.0


 50%|█████     | 62501/125000 [1:51:32<1:47:28,  9.69it/s]

finished frames 5000000, mean/median reward 0.2/0.0, min/max reward 0.0/1.0


 50%|█████     | 62602/125000 [1:51:43<1:58:15,  8.79it/s]

finished frames 5008000, mean/median reward 0.3/0.0, min/max reward 0.0/1.0


 50%|█████     | 62702/125000 [1:51:54<1:55:59,  8.95it/s]

finished frames 5016000, mean/median reward 0.2/0.0, min/max reward 0.0/2.0


 50%|█████     | 62801/125000 [1:52:05<1:41:21, 10.23it/s]

finished frames 5024000, mean/median reward 0.2/0.0, min/max reward 0.0/2.0


 50%|█████     | 62902/125000 [1:52:16<1:55:10,  8.99it/s]

finished frames 5032000, mean/median reward 0.3/0.0, min/max reward 0.0/2.0


 50%|█████     | 63003/125000 [1:52:26<1:45:24,  9.80it/s]

finished frames 5040000, mean/median reward 0.2/0.0, min/max reward 0.0/2.0


 50%|█████     | 63102/125000 [1:52:37<1:51:32,  9.25it/s]

finished frames 5048000, mean/median reward 0.2/0.0, min/max reward 0.0/1.0


 51%|█████     | 63202/125000 [1:52:48<2:03:26,  8.34it/s]

finished frames 5056000, mean/median reward 0.3/0.0, min/max reward 0.0/2.0


 51%|█████     | 63302/125000 [1:52:58<1:45:51,  9.71it/s]

finished frames 5064000, mean/median reward 0.2/0.0, min/max reward 0.0/2.0


 51%|█████     | 63402/125000 [1:53:09<1:54:38,  8.96it/s]

finished frames 5072000, mean/median reward 0.1/0.0, min/max reward 0.0/1.0


 51%|█████     | 63502/125000 [1:53:20<2:00:36,  8.50it/s]

finished frames 5080000, mean/median reward 0.2/0.0, min/max reward 0.0/1.0


 51%|█████     | 63603/125000 [1:53:31<1:37:15, 10.52it/s]

finished frames 5088000, mean/median reward 0.1/0.0, min/max reward 0.0/1.0


 51%|█████     | 63702/125000 [1:53:41<1:49:24,  9.34it/s]

finished frames 5096000, mean/median reward 0.0/0.0, min/max reward 0.0/0.0


 51%|█████     | 63802/125000 [1:53:52<2:01:12,  8.41it/s]

finished frames 5104000, mean/median reward 0.2/0.0, min/max reward 0.0/1.0


 51%|█████     | 63902/125000 [1:54:03<1:41:11, 10.06it/s]

finished frames 5112000, mean/median reward 0.3/0.0, min/max reward 0.0/2.0


 51%|█████     | 64002/125000 [1:54:13<1:58:06,  8.61it/s]

finished frames 5120000, mean/median reward 0.3/0.0, min/max reward 0.0/4.0


 51%|█████▏    | 64102/125000 [1:54:24<1:55:07,  8.82it/s]

finished frames 5128000, mean/median reward 0.1/0.0, min/max reward 0.0/1.0


 51%|█████▏    | 64202/125000 [1:54:35<1:41:45,  9.96it/s]

finished frames 5136000, mean/median reward 0.3/0.0, min/max reward 0.0/1.0


 51%|█████▏    | 64302/125000 [1:54:46<2:01:14,  8.34it/s]

finished frames 5144000, mean/median reward 0.6/0.0, min/max reward 0.0/5.0


 52%|█████▏    | 64402/125000 [1:54:56<1:52:54,  8.94it/s]

finished frames 5152000, mean/median reward 0.4/0.0, min/max reward 0.0/3.0


 52%|█████▏    | 64502/125000 [1:55:07<1:48:13,  9.32it/s]

finished frames 5160000, mean/median reward 0.2/0.0, min/max reward 0.0/1.0


 52%|█████▏    | 64602/125000 [1:55:18<1:52:39,  8.93it/s]

finished frames 5168000, mean/median reward 0.2/0.0, min/max reward 0.0/1.0


 52%|█████▏    | 64703/125000 [1:55:29<1:44:06,  9.65it/s]

finished frames 5176000, mean/median reward 0.1/0.0, min/max reward 0.0/1.0


 52%|█████▏    | 64802/125000 [1:55:39<1:48:25,  9.25it/s]

finished frames 5184000, mean/median reward 0.2/0.0, min/max reward 0.0/2.0


 52%|█████▏    | 64902/125000 [1:55:50<1:53:37,  8.82it/s]

finished frames 5192000, mean/median reward 0.4/0.0, min/max reward 0.0/2.0


 52%|█████▏    | 65002/125000 [1:56:01<1:37:56, 10.21it/s]

finished frames 5200000, mean/median reward 0.1/0.0, min/max reward 0.0/1.0


 52%|█████▏    | 65102/125000 [1:56:11<1:53:40,  8.78it/s]

finished frames 5208000, mean/median reward 0.3/0.0, min/max reward 0.0/2.0


 52%|█████▏    | 65202/125000 [1:56:22<1:51:44,  8.92it/s]

finished frames 5216000, mean/median reward 0.3/0.0, min/max reward 0.0/3.0


 52%|█████▏    | 65302/125000 [1:56:33<1:40:10,  9.93it/s]

finished frames 5224000, mean/median reward 0.1/0.0, min/max reward 0.0/1.0


 52%|█████▏    | 65402/125000 [1:56:44<1:50:46,  8.97it/s]

finished frames 5232000, mean/median reward 0.2/0.0, min/max reward 0.0/3.0


 52%|█████▏    | 65502/125000 [1:56:54<1:50:07,  9.00it/s]

finished frames 5240000, mean/median reward 0.2/0.0, min/max reward 0.0/1.0


 52%|█████▏    | 65602/125000 [1:57:05<1:39:34,  9.94it/s]

finished frames 5248000, mean/median reward 0.2/0.0, min/max reward 0.0/2.0


 53%|█████▎    | 65702/125000 [1:57:16<1:52:48,  8.76it/s]

finished frames 5256000, mean/median reward 0.2/0.0, min/max reward 0.0/2.0


 53%|█████▎    | 65803/125000 [1:57:26<1:42:42,  9.61it/s]

finished frames 5264000, mean/median reward 0.1/0.0, min/max reward 0.0/1.0


 53%|█████▎    | 65902/125000 [1:57:37<1:45:35,  9.33it/s]

finished frames 5272000, mean/median reward 0.4/0.0, min/max reward 0.0/2.0


 53%|█████▎    | 66002/125000 [1:57:48<1:50:27,  8.90it/s]

finished frames 5280000, mean/median reward 0.4/0.0, min/max reward 0.0/2.0


 53%|█████▎    | 66102/125000 [1:57:59<1:36:59, 10.12it/s]

finished frames 5288000, mean/median reward 0.2/0.0, min/max reward 0.0/2.0


 53%|█████▎    | 66201/125000 [1:58:09<1:39:07,  9.89it/s]

finished frames 5296000, mean/median reward 0.3/0.0, min/max reward 0.0/2.0


 53%|█████▎    | 66302/125000 [1:58:20<1:53:16,  8.64it/s]

finished frames 5304000, mean/median reward 0.1/0.0, min/max reward 0.0/1.0


 53%|█████▎    | 66402/125000 [1:58:31<1:42:05,  9.57it/s]

finished frames 5312000, mean/median reward 0.3/0.0, min/max reward 0.0/3.0


 53%|█████▎    | 66502/125000 [1:58:41<1:51:36,  8.73it/s]

finished frames 5320000, mean/median reward 0.2/0.0, min/max reward 0.0/1.0


 53%|█████▎    | 66602/125000 [1:58:52<1:47:44,  9.03it/s]

finished frames 5328000, mean/median reward 0.2/0.0, min/max reward 0.0/2.0


 53%|█████▎    | 66702/125000 [1:59:03<1:42:59,  9.43it/s]

finished frames 5336000, mean/median reward 0.1/0.0, min/max reward 0.0/1.0


 53%|█████▎    | 66802/125000 [1:59:14<1:53:25,  8.55it/s]

finished frames 5344000, mean/median reward 0.4/0.0, min/max reward 0.0/2.0


 54%|█████▎    | 66903/125000 [1:59:25<1:33:10, 10.39it/s]

finished frames 5352000, mean/median reward 0.1/0.0, min/max reward 0.0/1.0


 54%|█████▎    | 67002/125000 [1:59:35<1:44:10,  9.28it/s]

finished frames 5360000, mean/median reward 0.1/0.0, min/max reward 0.0/1.0


 54%|█████▎    | 67102/125000 [1:59:46<1:50:20,  8.74it/s]

finished frames 5368000, mean/median reward 0.3/0.0, min/max reward 0.0/2.0


 54%|█████▍    | 67202/125000 [1:59:57<1:35:56, 10.04it/s]

finished frames 5376000, mean/median reward 0.3/0.0, min/max reward 0.0/2.0


 54%|█████▍    | 67302/125000 [2:00:08<1:46:35,  9.02it/s]

finished frames 5384000, mean/median reward 0.2/0.0, min/max reward 0.0/2.0


 54%|█████▍    | 67402/125000 [2:00:18<1:50:48,  8.66it/s]

finished frames 5392000, mean/median reward 0.4/0.0, min/max reward 0.0/1.0


 54%|█████▍    | 67502/125000 [2:00:29<1:39:10,  9.66it/s]

finished frames 5400000, mean/median reward 0.2/0.0, min/max reward 0.0/2.0


 54%|█████▍    | 67602/125000 [2:00:40<1:46:07,  9.01it/s]

finished frames 5408000, mean/median reward 0.2/0.0, min/max reward 0.0/2.0


 54%|█████▍    | 67702/125000 [2:00:50<1:40:04,  9.54it/s]

finished frames 5416000, mean/median reward 0.4/0.0, min/max reward 0.0/2.0


 54%|█████▍    | 67802/125000 [2:01:01<1:49:06,  8.74it/s]

finished frames 5424000, mean/median reward 0.7/0.0, min/max reward 0.0/4.0


 54%|█████▍    | 67902/125000 [2:01:12<1:45:35,  9.01it/s]

finished frames 5432000, mean/median reward 0.2/0.0, min/max reward 0.0/2.0


 54%|█████▍    | 68002/125000 [2:01:23<1:35:38,  9.93it/s]

finished frames 5440000, mean/median reward 0.4/0.0, min/max reward 0.0/2.0


 54%|█████▍    | 68102/125000 [2:01:34<1:45:42,  8.97it/s]

finished frames 5448000, mean/median reward 0.4/0.0, min/max reward 0.0/2.0


 55%|█████▍    | 68202/125000 [2:01:44<1:54:52,  8.24it/s]

finished frames 5456000, mean/median reward 0.4/0.0, min/max reward 0.0/1.0


 55%|█████▍    | 68302/125000 [2:01:55<1:36:19,  9.81it/s]

finished frames 5464000, mean/median reward 0.1/0.0, min/max reward 0.0/1.0


 55%|█████▍    | 68402/125000 [2:02:06<1:45:01,  8.98it/s]

finished frames 5472000, mean/median reward 0.2/0.0, min/max reward 0.0/2.0


 55%|█████▍    | 68502/125000 [2:02:16<1:38:01,  9.61it/s]

finished frames 5480000, mean/median reward 0.3/0.0, min/max reward 0.0/3.0


 55%|█████▍    | 68602/125000 [2:02:27<1:41:26,  9.27it/s]

finished frames 5488000, mean/median reward 0.4/0.0, min/max reward 0.0/2.0


 55%|█████▍    | 68702/125000 [2:02:38<2:00:17,  7.80it/s]

finished frames 5496000, mean/median reward 0.2/0.0, min/max reward 0.0/1.0


 55%|█████▌    | 68802/125000 [2:02:49<1:30:09, 10.39it/s]

finished frames 5504000, mean/median reward 0.2/0.0, min/max reward 0.0/1.0


 55%|█████▌    | 68902/125000 [2:02:59<1:45:04,  8.90it/s]

finished frames 5512000, mean/median reward 0.4/0.0, min/max reward 0.0/2.0


 55%|█████▌    | 69002/125000 [2:03:10<1:45:03,  8.88it/s]

finished frames 5520000, mean/median reward 0.4/0.0, min/max reward 0.0/1.0


 55%|█████▌    | 69101/125000 [2:03:21<1:31:40, 10.16it/s]

finished frames 5528000, mean/median reward 0.4/0.0, min/max reward 0.0/2.0


 55%|█████▌    | 69202/125000 [2:03:31<1:37:33,  9.53it/s]

finished frames 5536000, mean/median reward 0.3/0.0, min/max reward 0.0/1.0


 55%|█████▌    | 69302/125000 [2:03:42<1:42:50,  9.03it/s]

finished frames 5544000, mean/median reward 0.4/0.0, min/max reward 0.0/2.0


 56%|█████▌    | 69401/125000 [2:03:53<1:30:20, 10.26it/s]

finished frames 5552000, mean/median reward 0.1/0.0, min/max reward 0.0/1.0


 56%|█████▌    | 69502/125000 [2:04:03<1:43:43,  8.92it/s]

finished frames 5560000, mean/median reward 0.6/0.0, min/max reward 0.0/2.0


 56%|█████▌    | 69602/125000 [2:04:14<1:42:11,  9.03it/s]

finished frames 5568000, mean/median reward 0.6/0.0, min/max reward 0.0/3.0


 56%|█████▌    | 69701/125000 [2:04:25<1:33:28,  9.86it/s]

finished frames 5576000, mean/median reward 0.1/0.0, min/max reward 0.0/1.0


 56%|█████▌    | 69802/125000 [2:04:36<1:43:51,  8.86it/s]

finished frames 5584000, mean/median reward 0.6/0.0, min/max reward 0.0/2.0


 56%|█████▌    | 69902/125000 [2:04:47<1:29:17, 10.28it/s]

finished frames 5592000, mean/median reward 0.5/0.0, min/max reward 0.0/2.0


 56%|█████▌    | 70002/125000 [2:04:57<1:37:54,  9.36it/s]

finished frames 5600000, mean/median reward 0.4/0.0, min/max reward 0.0/2.0


 56%|█████▌    | 70102/125000 [2:05:08<1:42:08,  8.96it/s]

finished frames 5608000, mean/median reward 0.1/0.0, min/max reward 0.0/1.0


 56%|█████▌    | 70202/125000 [2:05:19<1:32:55,  9.83it/s]

finished frames 5616000, mean/median reward 0.3/0.0, min/max reward 0.0/2.0


 56%|█████▌    | 70302/125000 [2:05:30<1:40:39,  9.06it/s]

finished frames 5624000, mean/median reward 0.1/0.0, min/max reward 0.0/1.0


 56%|█████▋    | 70402/125000 [2:05:40<1:35:41,  9.51it/s]

finished frames 5632000, mean/median reward 0.5/0.0, min/max reward 0.0/3.0


 56%|█████▋    | 70502/125000 [2:05:51<1:40:40,  9.02it/s]

finished frames 5640000, mean/median reward 0.2/0.0, min/max reward 0.0/3.0


 56%|█████▋    | 70602/125000 [2:06:02<1:43:06,  8.79it/s]

finished frames 5648000, mean/median reward 0.6/0.0, min/max reward 0.0/3.0


 57%|█████▋    | 70702/125000 [2:06:13<1:32:40,  9.77it/s]

finished frames 5656000, mean/median reward 0.4/0.0, min/max reward 0.0/2.0


 57%|█████▋    | 70802/125000 [2:06:24<1:42:06,  8.85it/s]

finished frames 5664000, mean/median reward 0.4/0.0, min/max reward 0.0/2.0


 57%|█████▋    | 70902/125000 [2:06:34<1:34:41,  9.52it/s]

finished frames 5672000, mean/median reward 0.0/0.0, min/max reward 0.0/0.0


 57%|█████▋    | 71002/125000 [2:06:45<1:34:15,  9.55it/s]

finished frames 5680000, mean/median reward 0.3/0.0, min/max reward 0.0/1.0


 57%|█████▋    | 71102/125000 [2:06:56<1:41:51,  8.82it/s]

finished frames 5688000, mean/median reward 0.1/0.0, min/max reward 0.0/1.0


 57%|█████▋    | 71202/125000 [2:07:07<1:23:33, 10.73it/s]

finished frames 5696000, mean/median reward 0.1/0.0, min/max reward 0.0/1.0


 57%|█████▋    | 71302/125000 [2:07:17<1:37:13,  9.20it/s]

finished frames 5704000, mean/median reward 0.5/0.0, min/max reward 0.0/3.0


 57%|█████▋    | 71402/125000 [2:07:28<1:41:22,  8.81it/s]

finished frames 5712000, mean/median reward 0.1/0.0, min/max reward 0.0/1.0


 57%|█████▋    | 71502/125000 [2:07:39<1:27:16, 10.22it/s]

finished frames 5720000, mean/median reward 0.6/0.0, min/max reward 0.0/2.0


 57%|█████▋    | 71602/125000 [2:07:50<1:35:56,  9.28it/s]

finished frames 5728000, mean/median reward 0.5/0.0, min/max reward 0.0/3.0


 57%|█████▋    | 71703/125000 [2:08:00<1:29:38,  9.91it/s]

finished frames 5736000, mean/median reward 0.2/0.0, min/max reward 0.0/2.0


 57%|█████▋    | 71802/125000 [2:08:11<1:33:57,  9.44it/s]

finished frames 5744000, mean/median reward 0.2/0.0, min/max reward 0.0/1.0


 58%|█████▊    | 71902/125000 [2:08:22<1:38:05,  9.02it/s]

finished frames 5752000, mean/median reward 0.4/0.0, min/max reward 0.0/4.0


 58%|█████▊    | 72003/125000 [2:08:32<1:29:47,  9.84it/s]

finished frames 5760000, mean/median reward 0.2/0.0, min/max reward 0.0/2.0


 58%|█████▊    | 72102/125000 [2:08:43<1:32:49,  9.50it/s]

finished frames 5768000, mean/median reward 0.2/0.0, min/max reward 0.0/1.0


 58%|█████▊    | 72202/125000 [2:08:54<1:42:29,  8.59it/s]

finished frames 5776000, mean/median reward 0.6/0.0, min/max reward 0.0/4.0


 58%|█████▊    | 72302/125000 [2:09:05<1:27:47, 10.00it/s]

finished frames 5784000, mean/median reward 0.2/0.0, min/max reward 0.0/1.0


 58%|█████▊    | 72402/125000 [2:09:15<1:44:41,  8.37it/s]

finished frames 5792000, mean/median reward 0.2/0.0, min/max reward 0.0/2.0


 58%|█████▊    | 72502/125000 [2:09:26<1:40:11,  8.73it/s]

finished frames 5800000, mean/median reward 0.2/0.0, min/max reward 0.0/2.0


 58%|█████▊    | 72602/125000 [2:09:37<1:29:14,  9.79it/s]

finished frames 5808000, mean/median reward 0.2/0.0, min/max reward 0.0/2.0


 58%|█████▊    | 72702/125000 [2:09:48<1:38:51,  8.82it/s]

finished frames 5816000, mean/median reward 0.2/0.0, min/max reward 0.0/1.0


 58%|█████▊    | 72802/125000 [2:09:58<1:32:00,  9.46it/s]

finished frames 5824000, mean/median reward 0.2/0.0, min/max reward 0.0/1.0


 58%|█████▊    | 72902/125000 [2:10:09<1:32:52,  9.35it/s]

finished frames 5832000, mean/median reward 0.2/0.0, min/max reward 0.0/3.0


 58%|█████▊    | 73002/125000 [2:10:20<1:35:23,  9.08it/s]

finished frames 5840000, mean/median reward 0.5/0.0, min/max reward 0.0/3.0


 58%|█████▊    | 73102/125000 [2:10:31<1:25:04, 10.17it/s]

finished frames 5848000, mean/median reward 0.1/0.0, min/max reward 0.0/1.0


 59%|█████▊    | 73202/125000 [2:10:41<1:35:39,  9.02it/s]

finished frames 5856000, mean/median reward 0.4/0.0, min/max reward 0.0/2.0


 59%|█████▊    | 73302/125000 [2:10:52<1:35:20,  9.04it/s]

finished frames 5864000, mean/median reward 0.5/0.0, min/max reward 0.0/3.0


 59%|█████▊    | 73402/125000 [2:11:03<1:25:58, 10.00it/s]

finished frames 5872000, mean/median reward 0.5/0.0, min/max reward 0.0/2.0


 59%|█████▉    | 73502/125000 [2:11:14<1:37:36,  8.79it/s]

finished frames 5880000, mean/median reward 0.3/0.0, min/max reward 0.0/2.0


 59%|█████▉    | 73602/125000 [2:11:24<1:32:21,  9.27it/s]

finished frames 5888000, mean/median reward 0.1/0.0, min/max reward 0.0/1.0


 59%|█████▉    | 73702/125000 [2:11:35<1:32:24,  9.25it/s]

finished frames 5896000, mean/median reward 0.2/0.0, min/max reward 0.0/1.0


 59%|█████▉    | 73802/125000 [2:11:46<1:36:02,  8.88it/s]

finished frames 5904000, mean/median reward 0.1/0.0, min/max reward 0.0/1.0


 59%|█████▉    | 73901/125000 [2:11:57<1:22:36, 10.31it/s]

finished frames 5912000, mean/median reward 0.2/0.0, min/max reward 0.0/3.0


 59%|█████▉    | 74002/125000 [2:12:07<1:35:44,  8.88it/s]

finished frames 5920000, mean/median reward 0.3/0.0, min/max reward 0.0/2.0


 59%|█████▉    | 74102/125000 [2:12:18<1:34:44,  8.95it/s]

finished frames 5928000, mean/median reward 0.2/0.0, min/max reward 0.0/1.0


 59%|█████▉    | 74201/125000 [2:12:29<1:26:05,  9.84it/s]

finished frames 5936000, mean/median reward 0.3/0.0, min/max reward 0.0/2.0


 59%|█████▉    | 74302/125000 [2:12:40<1:39:55,  8.46it/s]

finished frames 5944000, mean/median reward 0.2/0.0, min/max reward 0.0/2.0


 60%|█████▉    | 74402/125000 [2:12:50<1:25:49,  9.83it/s]

finished frames 5952000, mean/median reward 0.4/0.0, min/max reward 0.0/2.0


 60%|█████▉    | 74502/125000 [2:13:01<1:28:31,  9.51it/s]

finished frames 5960000, mean/median reward 0.1/0.0, min/max reward 0.0/1.0


 60%|█████▉    | 74602/125000 [2:13:12<1:34:16,  8.91it/s]

finished frames 5968000, mean/median reward 0.4/0.0, min/max reward 0.0/3.0


 60%|█████▉    | 74702/125000 [2:13:23<1:20:00, 10.48it/s]

finished frames 5976000, mean/median reward 0.2/0.0, min/max reward 0.0/1.0


 60%|█████▉    | 74802/125000 [2:13:33<1:33:40,  8.93it/s]

finished frames 5984000, mean/median reward 0.2/0.0, min/max reward 0.0/2.0


 60%|█████▉    | 74902/125000 [2:13:44<1:31:36,  9.12it/s]

finished frames 5992000, mean/median reward 0.2/0.0, min/max reward 0.0/2.0


 60%|██████    | 75002/125000 [2:13:55<1:31:27,  9.11it/s]

finished frames 6000000, mean/median reward 0.2/0.0, min/max reward 0.0/2.0


 60%|██████    | 75102/125000 [2:14:06<1:30:35,  9.18it/s]

finished frames 6008000, mean/median reward 0.2/0.0, min/max reward 0.0/1.0


 60%|██████    | 75203/125000 [2:14:17<1:22:10, 10.10it/s]

finished frames 6016000, mean/median reward 0.3/0.0, min/max reward 0.0/2.0


 60%|██████    | 75302/125000 [2:14:27<1:33:59,  8.81it/s]

finished frames 6024000, mean/median reward 0.2/0.0, min/max reward 0.0/2.0


 60%|██████    | 75402/125000 [2:14:38<1:37:13,  8.50it/s]

finished frames 6032000, mean/median reward 0.2/0.0, min/max reward 0.0/2.0


 60%|██████    | 75501/125000 [2:14:49<1:24:57,  9.71it/s]

finished frames 6040000, mean/median reward 0.4/0.0, min/max reward 0.0/2.0


 60%|██████    | 75602/125000 [2:15:00<1:30:57,  9.05it/s]

finished frames 6048000, mean/median reward 0.6/0.0, min/max reward 0.0/2.0


 61%|██████    | 75702/125000 [2:15:11<1:22:01, 10.02it/s]

finished frames 6056000, mean/median reward 0.1/0.0, min/max reward 0.0/1.0


 61%|██████    | 75802/125000 [2:15:21<1:32:54,  8.83it/s]

finished frames 6064000, mean/median reward 0.6/1.0, min/max reward 0.0/2.0


 61%|██████    | 75902/125000 [2:15:32<1:39:37,  8.21it/s]

finished frames 6072000, mean/median reward 0.4/0.0, min/max reward 0.0/2.0


 61%|██████    | 76002/125000 [2:15:43<1:24:32,  9.66it/s]

finished frames 6080000, mean/median reward 0.2/0.0, min/max reward 0.0/2.0


 61%|██████    | 76102/125000 [2:15:54<1:33:11,  8.74it/s]

finished frames 6088000, mean/median reward 0.4/0.0, min/max reward 0.0/2.0


 61%|██████    | 76202/125000 [2:16:05<1:19:44, 10.20it/s]

finished frames 6096000, mean/median reward 0.1/0.0, min/max reward 0.0/1.0


 61%|██████    | 76302/125000 [2:16:15<1:23:04,  9.77it/s]

finished frames 6104000, mean/median reward 0.2/0.0, min/max reward 0.0/2.0


 61%|██████    | 76402/125000 [2:16:26<1:31:43,  8.83it/s]

finished frames 6112000, mean/median reward 0.2/0.0, min/max reward 0.0/1.0


 61%|██████    | 76502/125000 [2:16:37<1:18:43, 10.27it/s]

finished frames 6120000, mean/median reward 0.3/0.0, min/max reward 0.0/1.0


 61%|██████▏   | 76602/125000 [2:16:47<1:29:05,  9.05it/s]

finished frames 6128000, mean/median reward 0.4/0.0, min/max reward 0.0/2.0


 61%|██████▏   | 76702/125000 [2:16:58<1:29:52,  8.96it/s]

finished frames 6136000, mean/median reward 0.3/0.0, min/max reward 0.0/2.0


 61%|██████▏   | 76802/125000 [2:17:09<1:18:52, 10.18it/s]

finished frames 6144000, mean/median reward 0.2/0.0, min/max reward 0.0/1.0


 62%|██████▏   | 76902/125000 [2:17:20<1:30:04,  8.90it/s]

finished frames 6152000, mean/median reward 0.2/0.0, min/max reward 0.0/1.0


 62%|██████▏   | 77003/125000 [2:17:31<1:20:07,  9.98it/s]

finished frames 6160000, mean/median reward 0.6/0.0, min/max reward 0.0/2.0


 62%|██████▏   | 77102/125000 [2:17:41<1:27:56,  9.08it/s]

finished frames 6168000, mean/median reward 0.4/0.0, min/max reward 0.0/2.0


 62%|██████▏   | 77202/125000 [2:17:52<1:29:31,  8.90it/s]

finished frames 6176000, mean/median reward 0.4/0.0, min/max reward 0.0/1.0


 62%|██████▏   | 77301/125000 [2:18:03<1:15:39, 10.51it/s]

finished frames 6184000, mean/median reward 0.2/0.0, min/max reward 0.0/2.0


 62%|██████▏   | 77402/125000 [2:18:13<1:36:21,  8.23it/s]

finished frames 6192000, mean/median reward 0.3/0.0, min/max reward 0.0/1.0


 62%|██████▏   | 77502/125000 [2:18:24<1:31:20,  8.67it/s]

finished frames 6200000, mean/median reward 0.5/0.0, min/max reward 0.0/4.0


 62%|██████▏   | 77601/125000 [2:18:35<1:19:42,  9.91it/s]

finished frames 6208000, mean/median reward 0.3/0.0, min/max reward 0.0/1.0


 62%|██████▏   | 77702/125000 [2:18:46<1:29:28,  8.81it/s]

finished frames 6216000, mean/median reward 0.2/0.0, min/max reward 0.0/1.0


 62%|██████▏   | 77802/125000 [2:18:56<1:22:08,  9.58it/s]

finished frames 6224000, mean/median reward 0.4/0.0, min/max reward 0.0/3.0


 62%|██████▏   | 77902/125000 [2:19:07<1:27:41,  8.95it/s]

finished frames 6232000, mean/median reward 0.1/0.0, min/max reward 0.0/1.0


 62%|██████▏   | 78002/125000 [2:19:18<1:27:54,  8.91it/s]

finished frames 6240000, mean/median reward 0.2/0.0, min/max reward 0.0/2.0


 62%|██████▏   | 78103/125000 [2:19:29<1:18:28,  9.96it/s]

finished frames 6248000, mean/median reward 0.8/0.0, min/max reward 0.0/4.0


 63%|██████▎   | 78202/125000 [2:19:39<1:23:02,  9.39it/s]

finished frames 6256000, mean/median reward 0.3/0.0, min/max reward 0.0/3.0


 63%|██████▎   | 78302/125000 [2:19:50<1:28:27,  8.80it/s]

finished frames 6264000, mean/median reward 0.4/0.0, min/max reward 0.0/2.0


 63%|██████▎   | 78402/125000 [2:20:01<1:13:25, 10.58it/s]

finished frames 6272000, mean/median reward 0.5/0.0, min/max reward 0.0/2.0


 63%|██████▎   | 78502/125000 [2:20:11<1:29:27,  8.66it/s]

finished frames 6280000, mean/median reward 0.2/0.0, min/max reward 0.0/1.0


 63%|██████▎   | 78603/125000 [2:20:22<1:18:09,  9.89it/s]

finished frames 6288000, mean/median reward 0.2/0.0, min/max reward 0.0/2.0


 63%|██████▎   | 78702/125000 [2:20:33<1:24:03,  9.18it/s]

finished frames 6296000, mean/median reward 0.2/0.0, min/max reward 0.0/1.0


 63%|██████▎   | 78802/125000 [2:20:44<1:26:36,  8.89it/s]

finished frames 6304000, mean/median reward 0.1/0.0, min/max reward 0.0/1.0


 63%|██████▎   | 78902/125000 [2:20:54<1:19:53,  9.62it/s]

finished frames 6312000, mean/median reward 0.3/0.0, min/max reward 0.0/2.0


 63%|██████▎   | 79002/125000 [2:21:05<1:18:49,  9.73it/s]

finished frames 6320000, mean/median reward 0.1/0.0, min/max reward 0.0/2.0


 63%|██████▎   | 79102/125000 [2:21:16<1:29:09,  8.58it/s]

finished frames 6328000, mean/median reward 0.2/0.0, min/max reward 0.0/2.0


 63%|██████▎   | 79202/125000 [2:21:27<1:16:51,  9.93it/s]

finished frames 6336000, mean/median reward 0.4/0.0, min/max reward 0.0/3.0


 63%|██████▎   | 79302/125000 [2:21:37<1:20:57,  9.41it/s]

finished frames 6344000, mean/median reward 0.4/0.0, min/max reward 0.0/2.0


 64%|██████▎   | 79402/125000 [2:21:48<1:24:55,  8.95it/s]

finished frames 6352000, mean/median reward 0.2/0.0, min/max reward 0.0/1.0


 64%|██████▎   | 79502/125000 [2:21:59<1:18:07,  9.71it/s]

finished frames 6360000, mean/median reward 0.1/0.0, min/max reward 0.0/2.0


 64%|██████▎   | 79602/125000 [2:22:10<1:24:54,  8.91it/s]

finished frames 6368000, mean/median reward 0.3/0.0, min/max reward 0.0/1.0


 64%|██████▍   | 79702/125000 [2:22:20<1:16:58,  9.81it/s]

finished frames 6376000, mean/median reward 0.4/0.0, min/max reward 0.0/3.0


 64%|██████▍   | 79802/125000 [2:22:31<1:24:24,  8.93it/s]

finished frames 6384000, mean/median reward 0.5/0.0, min/max reward 0.0/3.0


 64%|██████▍   | 79902/125000 [2:22:42<1:25:31,  8.79it/s]

finished frames 6392000, mean/median reward 0.6/0.0, min/max reward 0.0/2.0


 64%|██████▍   | 80001/125000 [2:22:53<1:11:02, 10.56it/s]

finished frames 6400000, mean/median reward 0.0/0.0, min/max reward 0.0/0.0


 64%|██████▍   | 80102/125000 [2:23:03<1:18:06,  9.58it/s]

finished frames 6408000, mean/median reward 0.4/0.0, min/max reward 0.0/2.0


 64%|██████▍   | 80202/125000 [2:23:14<1:25:32,  8.73it/s]

finished frames 6416000, mean/median reward 0.4/0.0, min/max reward 0.0/1.0


 64%|██████▍   | 80302/125000 [2:23:25<1:17:14,  9.64it/s]

finished frames 6424000, mean/median reward 0.4/0.0, min/max reward 0.0/2.0


 64%|██████▍   | 80402/125000 [2:23:36<1:25:56,  8.65it/s]

finished frames 6432000, mean/median reward 0.6/0.0, min/max reward 0.0/2.0


 64%|██████▍   | 80503/125000 [2:23:46<1:14:37,  9.94it/s]

finished frames 6440000, mean/median reward 0.0/0.0, min/max reward 0.0/0.0


 64%|██████▍   | 80602/125000 [2:23:57<1:16:34,  9.66it/s]

finished frames 6448000, mean/median reward 0.1/0.0, min/max reward 0.0/1.0


 65%|██████▍   | 80702/125000 [2:24:08<1:25:23,  8.65it/s]

finished frames 6456000, mean/median reward 0.2/0.0, min/max reward 0.0/2.0


 65%|██████▍   | 80803/125000 [2:24:19<1:11:55, 10.24it/s]

finished frames 6464000, mean/median reward 0.2/0.0, min/max reward 0.0/1.0


 65%|██████▍   | 80902/125000 [2:24:29<1:25:36,  8.59it/s]

finished frames 6472000, mean/median reward 0.3/0.0, min/max reward 0.0/2.0


 65%|██████▍   | 81003/125000 [2:24:40<1:15:51,  9.67it/s]

finished frames 6480000, mean/median reward 0.1/0.0, min/max reward 0.0/1.0


 65%|██████▍   | 81102/125000 [2:24:51<1:13:06, 10.01it/s]

finished frames 6488000, mean/median reward 0.2/0.0, min/max reward 0.0/1.0


 65%|██████▍   | 81202/125000 [2:25:02<1:26:37,  8.43it/s]

finished frames 6496000, mean/median reward 0.2/0.0, min/max reward 0.0/2.0


 65%|██████▌   | 81303/125000 [2:25:12<1:17:29,  9.40it/s]

finished frames 6504000, mean/median reward 0.2/0.0, min/max reward 0.0/1.0


 65%|██████▌   | 81402/125000 [2:25:23<1:18:43,  9.23it/s]

finished frames 6512000, mean/median reward 0.1/0.0, min/max reward 0.0/1.0


 65%|██████▌   | 81502/125000 [2:25:34<1:21:38,  8.88it/s]

finished frames 6520000, mean/median reward 0.2/0.0, min/max reward 0.0/1.0


 65%|██████▌   | 81601/125000 [2:25:45<1:12:29,  9.98it/s]

finished frames 6528000, mean/median reward 0.3/0.0, min/max reward 0.0/2.0


 65%|██████▌   | 81702/125000 [2:25:56<1:23:34,  8.63it/s]

finished frames 6536000, mean/median reward 0.2/0.0, min/max reward 0.0/2.0


 65%|██████▌   | 81802/125000 [2:26:06<1:19:12,  9.09it/s]

finished frames 6544000, mean/median reward 0.1/0.0, min/max reward 0.0/1.0


 66%|██████▌   | 81901/125000 [2:26:17<1:13:50,  9.73it/s]

finished frames 6552000, mean/median reward 0.2/0.0, min/max reward 0.0/2.0


 66%|██████▌   | 82002/125000 [2:26:28<1:25:14,  8.41it/s]

finished frames 6560000, mean/median reward 0.4/0.0, min/max reward 0.0/2.0


 66%|██████▌   | 82103/125000 [2:26:39<1:08:29, 10.44it/s]

finished frames 6568000, mean/median reward 0.2/0.0, min/max reward 0.0/1.0


 66%|██████▌   | 82202/125000 [2:26:49<1:16:56,  9.27it/s]

finished frames 6576000, mean/median reward 0.4/0.0, min/max reward 0.0/3.0


 66%|██████▌   | 82302/125000 [2:27:00<1:18:25,  9.07it/s]

finished frames 6584000, mean/median reward 0.4/0.0, min/max reward 0.0/2.0


 66%|██████▌   | 82401/125000 [2:27:11<1:13:01,  9.72it/s]

finished frames 6592000, mean/median reward 0.3/0.0, min/max reward 0.0/2.0


 66%|██████▌   | 82502/125000 [2:27:22<1:21:01,  8.74it/s]

finished frames 6600000, mean/median reward 0.1/0.0, min/max reward 0.0/1.0


 66%|██████▌   | 82602/125000 [2:27:32<1:13:19,  9.64it/s]

finished frames 6608000, mean/median reward 0.3/0.0, min/max reward 0.0/2.0


 66%|██████▌   | 82702/125000 [2:27:43<1:15:38,  9.32it/s]

finished frames 6616000, mean/median reward 0.1/0.0, min/max reward 0.0/1.0


 66%|██████▌   | 82802/125000 [2:27:54<1:18:12,  8.99it/s]

finished frames 6624000, mean/median reward 0.1/0.0, min/max reward 0.0/2.0


 66%|██████▋   | 82901/125000 [2:28:05<1:06:54, 10.49it/s]

finished frames 6632000, mean/median reward 0.1/0.0, min/max reward 0.0/1.0


 66%|██████▋   | 83002/125000 [2:28:15<1:19:57,  8.75it/s]

finished frames 6640000, mean/median reward 0.3/0.0, min/max reward 0.0/2.0


 66%|██████▋   | 83102/125000 [2:28:26<1:19:14,  8.81it/s]

finished frames 6648000, mean/median reward 0.1/0.0, min/max reward 0.0/1.0


 67%|██████▋   | 83201/125000 [2:28:37<1:12:41,  9.58it/s]

finished frames 6656000, mean/median reward 0.2/0.0, min/max reward 0.0/2.0


 67%|██████▋   | 83302/125000 [2:28:48<1:17:53,  8.92it/s]

finished frames 6664000, mean/median reward 0.2/0.0, min/max reward 0.0/3.0


 67%|██████▋   | 83402/125000 [2:28:58<1:11:23,  9.71it/s]

finished frames 6672000, mean/median reward 0.2/0.0, min/max reward 0.0/1.0


 67%|██████▋   | 83502/125000 [2:29:09<1:16:53,  9.00it/s]

finished frames 6680000, mean/median reward 0.2/0.0, min/max reward 0.0/1.0


 67%|██████▋   | 83602/125000 [2:29:20<1:21:05,  8.51it/s]

finished frames 6688000, mean/median reward 0.2/0.0, min/max reward 0.0/1.0


 67%|██████▋   | 83701/125000 [2:29:31<1:07:17, 10.23it/s]

finished frames 6696000, mean/median reward 0.5/0.0, min/max reward 0.0/2.0


 67%|██████▋   | 83802/125000 [2:29:42<1:17:53,  8.81it/s]

finished frames 6704000, mean/median reward 0.3/0.0, min/max reward 0.0/1.0


 67%|██████▋   | 83902/125000 [2:29:52<1:09:24,  9.87it/s]

finished frames 6712000, mean/median reward 0.4/0.0, min/max reward 0.0/3.0


 67%|██████▋   | 84002/125000 [2:30:03<1:16:36,  8.92it/s]

finished frames 6720000, mean/median reward 0.1/0.0, min/max reward 0.0/1.0


 67%|██████▋   | 84102/125000 [2:30:14<1:20:09,  8.50it/s]

finished frames 6728000, mean/median reward 0.2/0.0, min/max reward 0.0/3.0


 67%|██████▋   | 84201/125000 [2:30:25<1:08:22,  9.94it/s]

finished frames 6736000, mean/median reward 0.1/0.0, min/max reward 0.0/1.0


 67%|██████▋   | 84302/125000 [2:30:36<1:19:56,  8.48it/s]

finished frames 6744000, mean/median reward 0.3/0.0, min/max reward 0.0/2.0


 68%|██████▊   | 84403/125000 [2:30:46<1:08:16,  9.91it/s]

finished frames 6752000, mean/median reward 0.4/0.0, min/max reward 0.0/2.0


 68%|██████▊   | 84502/125000 [2:30:57<1:14:19,  9.08it/s]

finished frames 6760000, mean/median reward 0.4/0.0, min/max reward 0.0/2.0


 68%|██████▊   | 84602/125000 [2:31:08<1:17:39,  8.67it/s]

finished frames 6768000, mean/median reward 0.0/0.0, min/max reward 0.0/0.0


 68%|██████▊   | 84701/125000 [2:31:19<1:06:06, 10.16it/s]

finished frames 6776000, mean/median reward 0.2/0.0, min/max reward 0.0/2.0


 68%|██████▊   | 84802/125000 [2:31:30<1:17:27,  8.65it/s]

finished frames 6784000, mean/median reward 0.3/0.0, min/max reward 0.0/2.0


 68%|██████▊   | 84903/125000 [2:31:40<1:11:11,  9.39it/s]

finished frames 6792000, mean/median reward 0.2/0.0, min/max reward 0.0/1.0


 68%|██████▊   | 85002/125000 [2:31:51<1:09:08,  9.64it/s]

finished frames 6800000, mean/median reward 0.2/0.0, min/max reward 0.0/2.0


 68%|██████▊   | 85102/125000 [2:32:02<1:21:44,  8.14it/s]

finished frames 6808000, mean/median reward 0.0/0.0, min/max reward 0.0/0.0


 68%|██████▊   | 85202/125000 [2:32:13<1:05:12, 10.17it/s]

finished frames 6816000, mean/median reward 0.4/0.0, min/max reward 0.0/2.0


 68%|██████▊   | 85302/125000 [2:32:23<1:14:06,  8.93it/s]

finished frames 6824000, mean/median reward 0.2/0.0, min/max reward 0.0/1.0


 68%|██████▊   | 85402/125000 [2:32:34<1:14:14,  8.89it/s]

finished frames 6832000, mean/median reward 0.1/0.0, min/max reward 0.0/1.0


 68%|██████▊   | 85502/125000 [2:32:45<1:05:26, 10.06it/s]

finished frames 6840000, mean/median reward 0.2/0.0, min/max reward 0.0/1.0


 68%|██████▊   | 85602/125000 [2:32:56<1:15:24,  8.71it/s]

finished frames 6848000, mean/median reward 0.1/0.0, min/max reward 0.0/1.0


 69%|██████▊   | 85703/125000 [2:33:06<1:06:17,  9.88it/s]

finished frames 6856000, mean/median reward 0.2/0.0, min/max reward 0.0/1.0


 69%|██████▊   | 85802/125000 [2:33:17<1:13:05,  8.94it/s]

finished frames 6864000, mean/median reward 0.4/0.0, min/max reward 0.0/2.0


 69%|██████▊   | 85902/125000 [2:33:28<1:12:36,  8.98it/s]

finished frames 6872000, mean/median reward 0.6/0.0, min/max reward 0.0/2.0


 69%|██████▉   | 86003/125000 [2:33:39<1:01:33, 10.56it/s]

finished frames 6880000, mean/median reward 0.1/0.0, min/max reward 0.0/1.0


 69%|██████▉   | 86102/125000 [2:33:49<1:12:46,  8.91it/s]

finished frames 6888000, mean/median reward 0.4/0.0, min/max reward 0.0/2.0


 69%|██████▉   | 86202/125000 [2:34:00<1:10:36,  9.16it/s]

finished frames 6896000, mean/median reward 0.2/0.0, min/max reward 0.0/1.0


 69%|██████▉   | 86301/125000 [2:34:11<1:03:30, 10.16it/s]

finished frames 6904000, mean/median reward 0.1/0.0, min/max reward 0.0/1.0


 69%|██████▉   | 86402/125000 [2:34:22<1:15:33,  8.51it/s]

finished frames 6912000, mean/median reward 0.4/0.0, min/max reward 0.0/2.0


 69%|██████▉   | 86502/125000 [2:34:32<1:09:37,  9.22it/s]

finished frames 6920000, mean/median reward 0.4/0.0, min/max reward 0.0/2.0


 69%|██████▉   | 86602/125000 [2:34:43<1:08:54,  9.29it/s]

finished frames 6928000, mean/median reward 0.1/0.0, min/max reward 0.0/2.0


 69%|██████▉   | 86702/125000 [2:34:54<1:13:30,  8.68it/s]

finished frames 6936000, mean/median reward 0.2/0.0, min/max reward 0.0/2.0


 69%|██████▉   | 86803/125000 [2:35:05<1:03:03, 10.10it/s]

finished frames 6944000, mean/median reward 0.2/0.0, min/max reward 0.0/2.0


 70%|██████▉   | 86902/125000 [2:35:15<1:15:18,  8.43it/s]

finished frames 6952000, mean/median reward 0.5/0.0, min/max reward 0.0/5.0


 70%|██████▉   | 87002/125000 [2:35:26<1:09:28,  9.11it/s]

finished frames 6960000, mean/median reward 0.3/0.0, min/max reward 0.0/2.0


 70%|██████▉   | 87102/125000 [2:35:37<1:06:52,  9.45it/s]

finished frames 6968000, mean/median reward 0.3/0.0, min/max reward 0.0/2.0


 70%|██████▉   | 87202/125000 [2:35:48<1:09:01,  9.13it/s]

finished frames 6976000, mean/median reward 0.6/0.0, min/max reward 0.0/4.0


 70%|██████▉   | 87302/125000 [2:35:58<1:08:50,  9.13it/s]

finished frames 6984000, mean/median reward 0.5/0.0, min/max reward 0.0/3.0


 70%|██████▉   | 87402/125000 [2:36:09<1:07:57,  9.22it/s]

finished frames 6992000, mean/median reward 0.2/0.0, min/max reward 0.0/1.0


 70%|███████   | 87502/125000 [2:36:20<1:12:22,  8.64it/s]

finished frames 7000000, mean/median reward 0.0/0.0, min/max reward 0.0/0.0


 70%|███████   | 87602/125000 [2:36:31<1:04:21,  9.68it/s]

finished frames 7008000, mean/median reward 0.2/0.0, min/max reward 0.0/2.0


 70%|███████   | 87702/125000 [2:36:42<1:09:12,  8.98it/s]

finished frames 7016000, mean/median reward 0.4/0.0, min/max reward 0.0/2.0


 70%|███████   | 87803/125000 [2:36:53<1:00:29, 10.25it/s]

finished frames 7024000, mean/median reward 0.1/0.0, min/max reward 0.0/1.0


 70%|███████   | 87902/125000 [2:37:03<1:06:39,  9.27it/s]

finished frames 7032000, mean/median reward 0.2/0.0, min/max reward 0.0/2.0


 70%|███████   | 88002/125000 [2:37:14<1:08:10,  9.04it/s]

finished frames 7040000, mean/median reward 0.3/0.0, min/max reward 0.0/2.0


 70%|███████   | 88102/125000 [2:37:25<1:02:36,  9.82it/s]

finished frames 7048000, mean/median reward 0.1/0.0, min/max reward 0.0/1.0


 71%|███████   | 88202/125000 [2:37:36<1:11:27,  8.58it/s]

finished frames 7056000, mean/median reward 0.2/0.0, min/max reward 0.0/2.0


 71%|███████   | 88302/125000 [2:37:46<1:02:33,  9.78it/s]

finished frames 7064000, mean/median reward 0.4/0.0, min/max reward 0.0/2.0


 71%|███████   | 88402/125000 [2:37:57<1:08:47,  8.87it/s]

finished frames 7072000, mean/median reward 0.2/0.0, min/max reward 0.0/2.0


 71%|███████   | 88502/125000 [2:38:08<1:06:08,  9.20it/s]

finished frames 7080000, mean/median reward 0.1/0.0, min/max reward 0.0/2.0


 71%|███████   | 88601/125000 [2:38:19<59:08, 10.26it/s]  

finished frames 7088000, mean/median reward 0.2/0.0, min/max reward 0.0/2.0


 71%|███████   | 88702/125000 [2:38:30<1:11:24,  8.47it/s]

finished frames 7096000, mean/median reward 0.3/0.0, min/max reward 0.0/1.0


 71%|███████   | 88803/125000 [2:38:40<1:03:31,  9.50it/s]

finished frames 7104000, mean/median reward 0.3/0.0, min/max reward 0.0/3.0


 71%|███████   | 88902/125000 [2:38:51<1:08:34,  8.77it/s]

finished frames 7112000, mean/median reward 0.4/0.0, min/max reward 0.0/2.0


 71%|███████   | 89002/125000 [2:39:02<1:11:57,  8.34it/s]

finished frames 7120000, mean/median reward 0.3/0.0, min/max reward 0.0/2.0


 71%|███████▏  | 89101/125000 [2:39:13<56:33, 10.58it/s]  

finished frames 7128000, mean/median reward 0.1/0.0, min/max reward 0.0/1.0


 71%|███████▏  | 89202/125000 [2:39:24<1:06:40,  8.95it/s]

finished frames 7136000, mean/median reward 0.0/0.0, min/max reward 0.0/0.0


 71%|███████▏  | 89302/125000 [2:39:34<1:03:46,  9.33it/s]

finished frames 7144000, mean/median reward 0.4/0.0, min/max reward 0.0/2.0


 72%|███████▏  | 89402/125000 [2:39:45<1:05:22,  9.08it/s]

finished frames 7152000, mean/median reward 0.3/0.0, min/max reward 0.0/2.0


 72%|███████▏  | 89502/125000 [2:39:56<1:08:13,  8.67it/s]

finished frames 7160000, mean/median reward 0.3/0.0, min/max reward 0.0/2.0


 72%|███████▏  | 89602/125000 [2:40:07<56:53, 10.37it/s]  

finished frames 7168000, mean/median reward 0.4/0.0, min/max reward 0.0/2.0


 72%|███████▏  | 89702/125000 [2:40:17<1:02:55,  9.35it/s]

finished frames 7176000, mean/median reward 0.4/0.0, min/max reward 0.0/2.0


 72%|███████▏  | 89802/125000 [2:40:28<1:04:58,  9.03it/s]

finished frames 7184000, mean/median reward 0.3/0.0, min/max reward 0.0/2.0


 72%|███████▏  | 89901/125000 [2:40:39<59:20,  9.86it/s]  

finished frames 7192000, mean/median reward 0.1/0.0, min/max reward 0.0/1.0


 72%|███████▏  | 90002/125000 [2:40:50<1:03:51,  9.13it/s]

finished frames 7200000, mean/median reward 0.5/0.0, min/max reward 0.0/3.0


 72%|███████▏  | 90103/125000 [2:41:01<58:35,  9.93it/s]  

finished frames 7208000, mean/median reward 0.3/0.0, min/max reward 0.0/1.0


 72%|███████▏  | 90202/125000 [2:41:11<1:04:06,  9.05it/s]

finished frames 7216000, mean/median reward 0.1/0.0, min/max reward 0.0/1.0


 72%|███████▏  | 90303/125000 [2:41:22<1:03:54,  9.05it/s]

finished frames 7224000, mean/median reward 0.4/0.0, min/max reward 0.0/2.0


 72%|███████▏  | 90402/125000 [2:41:33<1:01:18,  9.41it/s]

finished frames 7232000, mean/median reward 0.3/0.0, min/max reward 0.0/2.0


 72%|███████▏  | 90502/125000 [2:41:44<1:08:09,  8.44it/s]

finished frames 7240000, mean/median reward 0.1/0.0, min/max reward 0.0/1.0


 72%|███████▏  | 90602/125000 [2:41:55<55:25, 10.34it/s]  

finished frames 7248000, mean/median reward 0.2/0.0, min/max reward 0.0/1.0


 73%|███████▎  | 90702/125000 [2:42:05<1:03:13,  9.04it/s]

finished frames 7256000, mean/median reward 0.2/0.0, min/max reward 0.0/1.0


 73%|███████▎  | 90802/125000 [2:42:16<1:03:13,  9.02it/s]

finished frames 7264000, mean/median reward 0.1/0.0, min/max reward 0.0/1.0


 73%|███████▎  | 90902/125000 [2:42:27<1:04:16,  8.84it/s]

finished frames 7272000, mean/median reward 0.2/0.0, min/max reward 0.0/1.0


 73%|███████▎  | 91002/125000 [2:42:38<1:02:18,  9.09it/s]

finished frames 7280000, mean/median reward 0.2/0.0, min/max reward 0.0/1.0


 73%|███████▎  | 91101/125000 [2:42:49<54:21, 10.39it/s]  

finished frames 7288000, mean/median reward 0.4/0.0, min/max reward 0.0/2.0


 73%|███████▎  | 91202/125000 [2:42:59<1:04:09,  8.78it/s]

finished frames 7296000, mean/median reward 0.4/0.0, min/max reward 0.0/2.0


 73%|███████▎  | 91302/125000 [2:43:10<1:08:21,  8.22it/s]

finished frames 7304000, mean/median reward 0.2/0.0, min/max reward 0.0/1.0


 73%|███████▎  | 91402/125000 [2:43:21<57:25,  9.75it/s]  

finished frames 7312000, mean/median reward 0.2/0.0, min/max reward 0.0/2.0


 73%|███████▎  | 91502/125000 [2:43:32<1:03:18,  8.82it/s]

finished frames 7320000, mean/median reward 0.3/0.0, min/max reward 0.0/2.0


 73%|███████▎  | 91603/125000 [2:43:43<54:07, 10.28it/s]  

finished frames 7328000, mean/median reward 0.3/0.0, min/max reward 0.0/2.0


 73%|███████▎  | 91702/125000 [2:43:53<1:00:05,  9.24it/s]

finished frames 7336000, mean/median reward 0.3/0.0, min/max reward 0.0/1.0


 73%|███████▎  | 91802/125000 [2:44:04<1:00:33,  9.14it/s]

finished frames 7344000, mean/median reward 0.2/0.0, min/max reward 0.0/2.0


 74%|███████▎  | 91901/125000 [2:44:15<54:07, 10.19it/s]  

finished frames 7352000, mean/median reward 0.1/0.0, min/max reward 0.0/1.0


 74%|███████▎  | 92002/125000 [2:44:26<1:00:52,  9.03it/s]

finished frames 7360000, mean/median reward 0.1/0.0, min/max reward 0.0/1.0


 74%|███████▎  | 92102/125000 [2:44:36<59:37,  9.19it/s]  

finished frames 7368000, mean/median reward 0.2/0.0, min/max reward 0.0/1.0


 74%|███████▍  | 92202/125000 [2:44:47<58:59,  9.27it/s]  

finished frames 7376000, mean/median reward 0.4/0.0, min/max reward 0.0/3.0


 74%|███████▍  | 92302/125000 [2:44:58<1:01:42,  8.83it/s]

finished frames 7384000, mean/median reward 0.4/0.0, min/max reward 0.0/3.0


 74%|███████▍  | 92401/125000 [2:45:09<53:01, 10.25it/s]  

finished frames 7392000, mean/median reward 0.4/0.0, min/max reward 0.0/2.0


 74%|███████▍  | 92502/125000 [2:45:19<1:01:34,  8.80it/s]

finished frames 7400000, mean/median reward 0.2/0.0, min/max reward 0.0/3.0


 74%|███████▍  | 92603/125000 [2:45:30<57:54,  9.32it/s]  

finished frames 7408000, mean/median reward 0.2/0.0, min/max reward 0.0/3.0


 74%|███████▍  | 92702/125000 [2:45:41<57:35,  9.35it/s]  

finished frames 7416000, mean/median reward 0.5/0.0, min/max reward 0.0/3.0


 74%|███████▍  | 92802/125000 [2:45:52<1:00:20,  8.89it/s]

finished frames 7424000, mean/median reward 0.2/0.0, min/max reward 0.0/1.0


 74%|███████▍  | 92903/125000 [2:46:03<53:17, 10.04it/s]  

finished frames 7432000, mean/median reward 0.3/0.0, min/max reward 0.0/2.0


 74%|███████▍  | 93002/125000 [2:46:13<55:45,  9.56it/s]  

finished frames 7440000, mean/median reward 0.4/0.0, min/max reward 0.0/2.0


 74%|███████▍  | 93102/125000 [2:46:24<1:01:17,  8.67it/s]

finished frames 7448000, mean/median reward 0.2/0.0, min/max reward 0.0/1.0


 75%|███████▍  | 93202/125000 [2:46:35<51:35, 10.27it/s]  

finished frames 7456000, mean/median reward 0.2/0.0, min/max reward 0.0/2.0


 75%|███████▍  | 93302/125000 [2:46:45<1:01:25,  8.60it/s]

finished frames 7464000, mean/median reward 0.3/0.0, min/max reward 0.0/2.0


 75%|███████▍  | 93402/125000 [2:46:56<56:36,  9.30it/s]  

finished frames 7472000, mean/median reward 0.2/0.0, min/max reward 0.0/1.0


 75%|███████▍  | 93502/125000 [2:47:07<57:49,  9.08it/s]  

finished frames 7480000, mean/median reward 0.0/0.0, min/max reward 0.0/0.0


 75%|███████▍  | 93602/125000 [2:47:18<1:05:31,  7.99it/s]

finished frames 7488000, mean/median reward 0.2/0.0, min/max reward 0.0/2.0


 75%|███████▍  | 93702/125000 [2:47:29<52:19,  9.97it/s]  

finished frames 7496000, mean/median reward 0.2/0.0, min/max reward 0.0/1.0


 75%|███████▌  | 93802/125000 [2:47:40<1:00:31,  8.59it/s]

finished frames 7504000, mean/median reward 0.3/0.0, min/max reward 0.0/1.0


 75%|███████▌  | 93903/125000 [2:47:50<54:39,  9.48it/s]  

finished frames 7512000, mean/median reward 0.1/0.0, min/max reward 0.0/1.0


 75%|███████▌  | 94002/125000 [2:48:01<57:32,  8.98it/s]  

finished frames 7520000, mean/median reward 0.1/0.0, min/max reward 0.0/1.0


 75%|███████▌  | 94102/125000 [2:48:12<1:00:33,  8.50it/s]

finished frames 7528000, mean/median reward 0.2/0.0, min/max reward 0.0/2.0


 75%|███████▌  | 94202/125000 [2:48:23<52:47,  9.72it/s]  

finished frames 7536000, mean/median reward 0.1/0.0, min/max reward 0.0/1.0


 75%|███████▌  | 94302/125000 [2:48:34<59:04,  8.66it/s]  

finished frames 7544000, mean/median reward 0.2/0.0, min/max reward 0.0/1.0


 76%|███████▌  | 94403/125000 [2:48:45<51:11,  9.96it/s]  

finished frames 7552000, mean/median reward 0.3/0.0, min/max reward 0.0/2.0


 76%|███████▌  | 94502/125000 [2:48:55<56:09,  9.05it/s]  

finished frames 7560000, mean/median reward 0.3/0.0, min/max reward 0.0/2.0


 76%|███████▌  | 94602/125000 [2:49:06<58:20,  8.68it/s]  

finished frames 7568000, mean/median reward 0.1/0.0, min/max reward 0.0/1.0


 76%|███████▌  | 94702/125000 [2:49:17<51:11,  9.86it/s]  

finished frames 7576000, mean/median reward 0.2/0.0, min/max reward 0.0/2.0


 76%|███████▌  | 94802/125000 [2:49:28<58:59,  8.53it/s]  

finished frames 7584000, mean/median reward 0.6/0.0, min/max reward 0.0/3.0


 76%|███████▌  | 94902/125000 [2:49:39<50:05, 10.01it/s]  

finished frames 7592000, mean/median reward 0.2/0.0, min/max reward 0.0/1.0


 76%|███████▌  | 95002/125000 [2:49:49<54:22,  9.19it/s]  

finished frames 7600000, mean/median reward 0.4/0.0, min/max reward 0.0/2.0


 76%|███████▌  | 95102/125000 [2:50:00<58:37,  8.50it/s]  

finished frames 7608000, mean/median reward 0.4/0.0, min/max reward 0.0/4.0


 76%|███████▌  | 95202/125000 [2:50:11<49:13, 10.09it/s]  

finished frames 7616000, mean/median reward 0.2/0.0, min/max reward 0.0/4.0


 76%|███████▌  | 95302/125000 [2:50:22<55:09,  8.97it/s]  

finished frames 7624000, mean/median reward 0.1/0.0, min/max reward 0.0/1.0


 76%|███████▋  | 95402/125000 [2:50:32<51:53,  9.51it/s]  

finished frames 7632000, mean/median reward 0.1/0.0, min/max reward 0.0/1.0


 76%|███████▋  | 95502/125000 [2:50:43<54:12,  9.07it/s]  

finished frames 7640000, mean/median reward 0.2/0.0, min/max reward 0.0/2.0


 76%|███████▋  | 95602/125000 [2:50:54<53:57,  9.08it/s]  

finished frames 7648000, mean/median reward 0.4/0.0, min/max reward 0.0/2.0


 77%|███████▋  | 95701/125000 [2:51:05<49:06,  9.94it/s]

finished frames 7656000, mean/median reward 0.2/0.0, min/max reward 0.0/2.0


 77%|███████▋  | 95802/125000 [2:51:16<59:16,  8.21it/s]  

finished frames 7664000, mean/median reward 0.4/0.0, min/max reward 0.0/3.0


 77%|███████▋  | 95901/125000 [2:51:27<46:59, 10.32it/s]  

finished frames 7672000, mean/median reward 0.2/0.0, min/max reward 0.0/2.0


 77%|███████▋  | 96002/125000 [2:51:37<54:32,  8.86it/s]

finished frames 7680000, mean/median reward 0.6/0.0, min/max reward 0.0/3.0


 77%|███████▋  | 96103/125000 [2:51:48<51:34,  9.34it/s]

finished frames 7688000, mean/median reward 0.1/0.0, min/max reward 0.0/1.0


 77%|███████▋  | 96201/125000 [2:51:59<47:05, 10.19it/s]

finished frames 7696000, mean/median reward 0.7/0.0, min/max reward 0.0/6.0


 77%|███████▋  | 96302/125000 [2:52:10<53:38,  8.92it/s]

finished frames 7704000, mean/median reward 0.2/0.0, min/max reward 0.0/2.0


 77%|███████▋  | 96403/125000 [2:52:21<47:18, 10.07it/s]

finished frames 7712000, mean/median reward 0.1/0.0, min/max reward 0.0/2.0


 77%|███████▋  | 96502/125000 [2:52:31<52:52,  8.98it/s]  

finished frames 7720000, mean/median reward 0.2/0.0, min/max reward 0.0/1.0


 77%|███████▋  | 96602/125000 [2:52:42<55:54,  8.47it/s]  

finished frames 7728000, mean/median reward 0.2/0.0, min/max reward 0.0/4.0


 77%|███████▋  | 96702/125000 [2:52:53<47:40,  9.89it/s]

finished frames 7736000, mean/median reward 0.1/0.0, min/max reward 0.0/1.0


 77%|███████▋  | 96802/125000 [2:53:04<51:00,  9.21it/s]

finished frames 7744000, mean/median reward 0.5/0.0, min/max reward 0.0/2.0


 78%|███████▊  | 96902/125000 [2:53:15<49:12,  9.52it/s]  

finished frames 7752000, mean/median reward 0.3/0.0, min/max reward 0.0/2.0


 78%|███████▊  | 97002/125000 [2:53:25<53:07,  8.78it/s]

finished frames 7760000, mean/median reward 0.2/0.0, min/max reward 0.0/2.0


 78%|███████▊  | 97102/125000 [2:53:36<54:07,  8.59it/s]

finished frames 7768000, mean/median reward 0.4/0.0, min/max reward 0.0/1.0


 78%|███████▊  | 97201/125000 [2:53:47<45:26, 10.20it/s]

finished frames 7776000, mean/median reward 0.5/0.0, min/max reward 0.0/3.0


 78%|███████▊  | 97302/125000 [2:53:58<53:47,  8.58it/s]

finished frames 7784000, mean/median reward 0.3/0.0, min/max reward 0.0/2.0


 78%|███████▊  | 97403/125000 [2:54:09<47:03,  9.77it/s]

finished frames 7792000, mean/median reward 0.4/0.0, min/max reward 0.0/2.0


 78%|███████▊  | 97502/125000 [2:54:19<49:29,  9.26it/s]

finished frames 7800000, mean/median reward 0.6/0.0, min/max reward 0.0/2.0


 78%|███████▊  | 97602/125000 [2:54:30<53:03,  8.61it/s]

finished frames 7808000, mean/median reward 0.1/0.0, min/max reward 0.0/1.0


 78%|███████▊  | 97702/125000 [2:54:41<44:50, 10.15it/s]  

finished frames 7816000, mean/median reward 0.2/0.0, min/max reward 0.0/2.0


 78%|███████▊  | 97802/125000 [2:54:52<55:16,  8.20it/s]

finished frames 7824000, mean/median reward 0.5/0.0, min/max reward 0.0/2.0


 78%|███████▊  | 97903/125000 [2:55:03<44:13, 10.21it/s]

finished frames 7832000, mean/median reward 0.1/0.0, min/max reward 0.0/1.0


 78%|███████▊  | 98002/125000 [2:55:13<48:59,  9.18it/s]

finished frames 7840000, mean/median reward 0.6/0.0, min/max reward 0.0/2.0


 78%|███████▊  | 98102/125000 [2:55:24<54:48,  8.18it/s]

finished frames 7848000, mean/median reward 0.2/0.0, min/max reward 0.0/2.0


 79%|███████▊  | 98202/125000 [2:55:35<48:43,  9.17it/s]

finished frames 7856000, mean/median reward 0.2/0.0, min/max reward 0.0/1.0


 79%|███████▊  | 98302/125000 [2:55:46<50:29,  8.81it/s]

finished frames 7864000, mean/median reward 0.5/0.0, min/max reward 0.0/2.0


 79%|███████▊  | 98402/125000 [2:55:57<43:03, 10.30it/s]

finished frames 7872000, mean/median reward 0.3/0.0, min/max reward 0.0/2.0


 79%|███████▉  | 98502/125000 [2:56:07<51:39,  8.55it/s]

finished frames 7880000, mean/median reward 0.4/0.0, min/max reward 0.0/2.0


 79%|███████▉  | 98602/125000 [2:56:18<50:09,  8.77it/s]

finished frames 7888000, mean/median reward 0.4/0.0, min/max reward 0.0/2.0


 79%|███████▉  | 98702/125000 [2:56:29<45:21,  9.66it/s]

finished frames 7896000, mean/median reward 0.3/0.0, min/max reward 0.0/2.0


 79%|███████▉  | 98802/125000 [2:56:40<47:14,  9.24it/s]

finished frames 7904000, mean/median reward 0.3/0.0, min/max reward 0.0/1.0


 79%|███████▉  | 98903/125000 [2:56:50<43:53,  9.91it/s]

finished frames 7912000, mean/median reward 0.1/0.0, min/max reward 0.0/1.0


 79%|███████▉  | 99002/125000 [2:57:01<46:38,  9.29it/s]

finished frames 7920000, mean/median reward 0.1/0.0, min/max reward 0.0/1.0


 79%|███████▉  | 99102/125000 [2:57:12<51:21,  8.40it/s]

finished frames 7928000, mean/median reward 0.4/0.0, min/max reward 0.0/2.0


 79%|███████▉  | 99202/125000 [2:57:23<44:45,  9.61it/s]

finished frames 7936000, mean/median reward 0.4/0.0, min/max reward 0.0/4.0


 79%|███████▉  | 99302/125000 [2:57:34<49:01,  8.74it/s]

finished frames 7944000, mean/median reward 0.2/0.0, min/max reward 0.0/2.0


 80%|███████▉  | 99403/125000 [2:57:45<42:03, 10.14it/s]

finished frames 7952000, mean/median reward 0.1/0.0, min/max reward 0.0/2.0


 80%|███████▉  | 99502/125000 [2:57:55<45:40,  9.31it/s]

finished frames 7960000, mean/median reward 0.1/0.0, min/max reward 0.0/1.0


 80%|███████▉  | 99602/125000 [2:58:06<50:38,  8.36it/s]

finished frames 7968000, mean/median reward 0.2/0.0, min/max reward 0.0/1.0


 80%|███████▉  | 99701/125000 [2:58:17<42:25,  9.94it/s]

finished frames 7976000, mean/median reward 0.2/0.0, min/max reward 0.0/1.0


 80%|███████▉  | 99802/125000 [2:58:28<47:28,  8.85it/s]

finished frames 7984000, mean/median reward 0.0/0.0, min/max reward 0.0/0.0


 80%|███████▉  | 99903/125000 [2:58:39<39:57, 10.47it/s]

finished frames 7992000, mean/median reward 0.2/0.0, min/max reward 0.0/1.0


 80%|████████  | 100002/125000 [2:58:49<49:39,  8.39it/s]

finished frames 8000000, mean/median reward 0.1/0.0, min/max reward 0.0/1.0


 80%|████████  | 100102/125000 [2:59:00<47:29,  8.74it/s]

finished frames 8008000, mean/median reward 0.2/0.0, min/max reward 0.0/2.0


 80%|████████  | 100202/125000 [2:59:11<41:46,  9.89it/s]

finished frames 8016000, mean/median reward 0.2/0.0, min/max reward 0.0/1.0


 80%|████████  | 100302/125000 [2:59:22<45:09,  9.12it/s]

finished frames 8024000, mean/median reward 0.2/0.0, min/max reward 0.0/2.0


 80%|████████  | 100403/125000 [2:59:32<42:41,  9.60it/s]

finished frames 8032000, mean/median reward 0.2/0.0, min/max reward 0.0/2.0


 80%|████████  | 100502/125000 [2:59:43<42:40,  9.57it/s]

finished frames 8040000, mean/median reward 0.0/0.0, min/max reward 0.0/0.0


 80%|████████  | 100602/125000 [2:59:54<47:12,  8.61it/s]

finished frames 8048000, mean/median reward 0.1/0.0, min/max reward 0.0/2.0


 81%|████████  | 100702/125000 [3:00:05<41:51,  9.67it/s]

finished frames 8056000, mean/median reward 0.2/0.0, min/max reward 0.0/3.0


 81%|████████  | 100802/125000 [3:00:16<45:33,  8.85it/s]

finished frames 8064000, mean/median reward 0.1/0.0, min/max reward 0.0/1.0


 81%|████████  | 100902/125000 [3:00:26<43:49,  9.16it/s]

finished frames 8072000, mean/median reward 0.2/0.0, min/max reward 0.0/1.0


 81%|████████  | 101002/125000 [3:00:37<44:39,  8.96it/s]

finished frames 8080000, mean/median reward 0.2/0.0, min/max reward 0.0/1.0


 81%|████████  | 101102/125000 [3:00:48<44:05,  9.03it/s]

finished frames 8088000, mean/median reward 0.3/0.0, min/max reward 0.0/3.0


 81%|████████  | 101202/125000 [3:00:59<44:51,  8.84it/s]

finished frames 8096000, mean/median reward 0.2/0.0, min/max reward 0.0/2.0


 81%|████████  | 101302/125000 [3:01:10<43:54,  9.00it/s]

finished frames 8104000, mean/median reward 0.2/0.0, min/max reward 0.0/1.0


 81%|████████  | 101402/125000 [3:01:20<40:45,  9.65it/s]

finished frames 8112000, mean/median reward 0.7/0.0, min/max reward 0.0/6.0


 81%|████████  | 101502/125000 [3:01:31<42:35,  9.19it/s]

finished frames 8120000, mean/median reward 0.0/0.0, min/max reward 0.0/0.0


 81%|████████▏ | 101602/125000 [3:01:42<44:54,  8.68it/s]

finished frames 8128000, mean/median reward 0.8/0.0, min/max reward 0.0/7.0


 81%|████████▏ | 101702/125000 [3:01:53<39:43,  9.77it/s]

finished frames 8136000, mean/median reward 0.2/0.0, min/max reward 0.0/2.0


 81%|████████▏ | 101802/125000 [3:02:04<42:11,  9.16it/s]

finished frames 8144000, mean/median reward 0.7/1.0, min/max reward 0.0/2.0


 82%|████████▏ | 101902/125000 [3:02:14<40:10,  9.58it/s]

finished frames 8152000, mean/median reward 0.3/0.0, min/max reward 0.0/2.0


 82%|████████▏ | 102002/125000 [3:02:25<42:07,  9.10it/s]

finished frames 8160000, mean/median reward 0.2/0.0, min/max reward 0.0/1.0


 82%|████████▏ | 102102/125000 [3:02:36<42:43,  8.93it/s]

finished frames 8168000, mean/median reward 0.1/0.0, min/max reward 0.0/1.0


 82%|████████▏ | 102201/125000 [3:02:47<35:38, 10.66it/s]

finished frames 8176000, mean/median reward 0.6/0.0, min/max reward 0.0/3.0


 82%|████████▏ | 102302/125000 [3:02:57<40:33,  9.33it/s]

finished frames 8184000, mean/median reward 0.7/0.0, min/max reward 0.0/4.0


 82%|████████▏ | 102402/125000 [3:03:08<45:32,  8.27it/s]

finished frames 8192000, mean/median reward 0.5/0.0, min/max reward 0.0/2.0


 82%|████████▏ | 102502/125000 [3:03:19<41:27,  9.05it/s]

finished frames 8200000, mean/median reward 0.2/0.0, min/max reward 0.0/2.0


 82%|████████▏ | 102602/125000 [3:03:30<43:06,  8.66it/s]

finished frames 8208000, mean/median reward 0.1/0.0, min/max reward 0.0/1.0


 82%|████████▏ | 102701/125000 [3:03:41<35:12, 10.56it/s]

finished frames 8216000, mean/median reward 0.6/0.0, min/max reward 0.0/2.0


 82%|████████▏ | 102802/125000 [3:03:52<42:34,  8.69it/s]

finished frames 8224000, mean/median reward 0.1/0.0, min/max reward 0.0/1.0


 82%|████████▏ | 102903/125000 [3:04:02<37:03,  9.94it/s]

finished frames 8232000, mean/median reward 0.4/0.0, min/max reward 0.0/2.0


 82%|████████▏ | 103002/125000 [3:04:13<42:01,  8.73it/s]

finished frames 8240000, mean/median reward 0.6/0.0, min/max reward 0.0/4.0


 82%|████████▏ | 103102/125000 [3:04:24<41:42,  8.75it/s]

finished frames 8248000, mean/median reward 0.4/0.0, min/max reward 0.0/3.0


 83%|████████▎ | 103202/125000 [3:04:35<35:02, 10.37it/s]

finished frames 8256000, mean/median reward 0.4/0.0, min/max reward 0.0/2.0


 83%|████████▎ | 103302/125000 [3:04:45<41:30,  8.71it/s]

finished frames 8264000, mean/median reward 0.3/0.0, min/max reward 0.0/2.0


 83%|████████▎ | 103402/125000 [3:04:56<40:48,  8.82it/s]

finished frames 8272000, mean/median reward 0.1/0.0, min/max reward 0.0/2.0


 83%|████████▎ | 103502/125000 [3:05:07<39:58,  8.96it/s]

finished frames 8280000, mean/median reward 0.1/0.0, min/max reward 0.0/1.0


 83%|████████▎ | 103602/125000 [3:05:18<40:32,  8.80it/s]

finished frames 8288000, mean/median reward 0.1/0.0, min/max reward 0.0/1.0


 83%|████████▎ | 103702/125000 [3:05:29<35:36,  9.97it/s]

finished frames 8296000, mean/median reward 0.4/0.0, min/max reward 0.0/3.0


 83%|████████▎ | 103802/125000 [3:05:40<39:12,  9.01it/s]

finished frames 8304000, mean/median reward 0.1/0.0, min/max reward 0.0/1.0


 83%|████████▎ | 103902/125000 [3:05:50<42:25,  8.29it/s]

finished frames 8312000, mean/median reward 0.6/0.0, min/max reward 0.0/2.0


 83%|████████▎ | 104001/125000 [3:06:01<35:16,  9.92it/s]

finished frames 8320000, mean/median reward 0.1/0.0, min/max reward 0.0/1.0


 83%|████████▎ | 104102/125000 [3:06:12<40:35,  8.58it/s]

finished frames 8328000, mean/median reward 0.2/0.0, min/max reward 0.0/1.0


 83%|████████▎ | 104203/125000 [3:06:23<34:10, 10.14it/s]

finished frames 8336000, mean/median reward 0.9/0.0, min/max reward 0.0/4.0


 83%|████████▎ | 104302/125000 [3:06:33<39:29,  8.74it/s]

finished frames 8344000, mean/median reward 0.2/0.0, min/max reward 0.0/1.0


 84%|████████▎ | 104402/125000 [3:06:44<39:36,  8.67it/s]

finished frames 8352000, mean/median reward 0.2/0.0, min/max reward 0.0/2.0


 84%|████████▎ | 104502/125000 [3:06:55<34:47,  9.82it/s]

finished frames 8360000, mean/median reward 0.4/0.0, min/max reward 0.0/4.0


 84%|████████▎ | 104602/125000 [3:07:06<38:56,  8.73it/s]

finished frames 8368000, mean/median reward 0.1/0.0, min/max reward 0.0/1.0


 84%|████████▍ | 104702/125000 [3:07:16<34:42,  9.74it/s]

finished frames 8376000, mean/median reward 0.2/0.0, min/max reward 0.0/1.0


 84%|████████▍ | 104802/125000 [3:07:27<37:55,  8.88it/s]

finished frames 8384000, mean/median reward 0.2/0.0, min/max reward 0.0/1.0


 84%|████████▍ | 104902/125000 [3:07:38<39:58,  8.38it/s]

finished frames 8392000, mean/median reward 0.3/0.0, min/max reward 0.0/2.0


 84%|████████▍ | 105002/125000 [3:07:49<34:37,  9.63it/s]

finished frames 8400000, mean/median reward 0.4/0.0, min/max reward 0.0/1.0


 84%|████████▍ | 105102/125000 [3:08:00<37:11,  8.92it/s]

finished frames 8408000, mean/median reward 0.1/0.0, min/max reward 0.0/1.0


 84%|████████▍ | 105202/125000 [3:08:11<32:30, 10.15it/s]

finished frames 8416000, mean/median reward 0.2/0.0, min/max reward 0.0/2.0


 84%|████████▍ | 105302/125000 [3:08:21<35:07,  9.35it/s]

finished frames 8424000, mean/median reward 0.4/0.0, min/max reward 0.0/2.0


 84%|████████▍ | 105402/125000 [3:08:32<38:31,  8.48it/s]

finished frames 8432000, mean/median reward 0.0/0.0, min/max reward 0.0/0.0


 84%|████████▍ | 105502/125000 [3:08:43<36:41,  8.86it/s]

finished frames 8440000, mean/median reward 0.2/0.0, min/max reward 0.0/2.0


 84%|████████▍ | 105602/125000 [3:08:54<36:52,  8.77it/s]

finished frames 8448000, mean/median reward 0.4/0.0, min/max reward 0.0/2.0


 85%|████████▍ | 105703/125000 [3:09:04<33:09,  9.70it/s]

finished frames 8456000, mean/median reward 0.3/0.0, min/max reward 0.0/2.0


 85%|████████▍ | 105802/125000 [3:09:15<35:31,  9.01it/s]

finished frames 8464000, mean/median reward 0.2/0.0, min/max reward 0.0/2.0


 85%|████████▍ | 105902/125000 [3:09:26<36:19,  8.76it/s]

finished frames 8472000, mean/median reward 0.3/0.0, min/max reward 0.0/4.0


 85%|████████▍ | 106003/125000 [3:09:37<31:01, 10.21it/s]

finished frames 8480000, mean/median reward 0.4/0.0, min/max reward 0.0/2.0


 85%|████████▍ | 106102/125000 [3:09:47<34:16,  9.19it/s]

finished frames 8488000, mean/median reward 0.1/0.0, min/max reward 0.0/1.0


 85%|████████▍ | 106202/125000 [3:09:58<38:30,  8.14it/s]

finished frames 8496000, mean/median reward 0.2/0.0, min/max reward 0.0/2.0


 85%|████████▌ | 106302/125000 [3:10:09<34:17,  9.09it/s]

finished frames 8504000, mean/median reward 0.4/0.0, min/max reward 0.0/2.0


 85%|████████▌ | 106402/125000 [3:10:20<36:12,  8.56it/s]

finished frames 8512000, mean/median reward 0.6/0.0, min/max reward 0.0/3.0


 85%|████████▌ | 106502/125000 [3:10:31<30:07, 10.24it/s]

finished frames 8520000, mean/median reward 0.1/0.0, min/max reward 0.0/1.0


 85%|████████▌ | 106602/125000 [3:10:41<35:05,  8.74it/s]

finished frames 8528000, mean/median reward 0.4/0.0, min/max reward 0.0/3.0


 85%|████████▌ | 106702/125000 [3:10:52<33:45,  9.03it/s]

finished frames 8536000, mean/median reward 0.2/0.0, min/max reward 0.0/1.0


 85%|████████▌ | 106802/125000 [3:11:03<31:42,  9.57it/s]

finished frames 8544000, mean/median reward 0.1/0.0, min/max reward 0.0/1.0


 86%|████████▌ | 106902/125000 [3:11:14<32:46,  9.20it/s]

finished frames 8552000, mean/median reward 0.4/0.0, min/max reward 0.0/2.0


 86%|████████▌ | 107001/125000 [3:11:25<29:53, 10.03it/s]

finished frames 8560000, mean/median reward 0.4/0.0, min/max reward 0.0/2.0


 86%|████████▌ | 107102/125000 [3:11:35<32:28,  9.18it/s]

finished frames 8568000, mean/median reward 0.4/0.0, min/max reward 0.0/3.0


 86%|████████▌ | 107203/125000 [3:11:46<30:19,  9.78it/s]

finished frames 8576000, mean/median reward 0.3/0.0, min/max reward 0.0/2.0


 86%|████████▌ | 107302/125000 [3:11:57<31:51,  9.26it/s]

finished frames 8584000, mean/median reward 0.2/0.0, min/max reward 0.0/2.0


 86%|████████▌ | 107402/125000 [3:12:08<35:21,  8.29it/s]

finished frames 8592000, mean/median reward 0.4/0.0, min/max reward 0.0/1.0


 86%|████████▌ | 107502/125000 [3:12:19<30:40,  9.51it/s]

finished frames 8600000, mean/median reward 0.5/0.0, min/max reward 0.0/2.0


 86%|████████▌ | 107602/125000 [3:12:30<32:08,  9.02it/s]

finished frames 8608000, mean/median reward 0.2/0.0, min/max reward 0.0/1.0


 86%|████████▌ | 107703/125000 [3:12:40<30:36,  9.42it/s]

finished frames 8616000, mean/median reward 0.4/0.0, min/max reward 0.0/4.0


 86%|████████▌ | 107802/125000 [3:12:51<31:38,  9.06it/s]

finished frames 8624000, mean/median reward 0.2/0.0, min/max reward 0.0/1.0


 86%|████████▋ | 107902/125000 [3:13:02<31:49,  8.95it/s]

finished frames 8632000, mean/median reward 0.1/0.0, min/max reward 0.0/1.0


 86%|████████▋ | 108002/125000 [3:13:13<31:08,  9.10it/s]

finished frames 8640000, mean/median reward 0.2/0.0, min/max reward 0.0/2.0


 86%|████████▋ | 108102/125000 [3:13:24<31:57,  8.81it/s]

finished frames 8648000, mean/median reward 0.1/0.0, min/max reward 0.0/1.0


 87%|████████▋ | 108202/125000 [3:13:35<26:39, 10.50it/s]

finished frames 8656000, mean/median reward 0.3/0.0, min/max reward 0.0/3.0


 87%|████████▋ | 108302/125000 [3:13:45<34:08,  8.15it/s]

finished frames 8664000, mean/median reward 0.2/0.0, min/max reward 0.0/1.0


 87%|████████▋ | 108403/125000 [3:13:56<29:43,  9.31it/s]

finished frames 8672000, mean/median reward 0.1/0.0, min/max reward 0.0/1.0


 87%|████████▋ | 108502/125000 [3:14:07<29:23,  9.36it/s]

finished frames 8680000, mean/median reward 0.2/0.0, min/max reward 0.0/1.0


 87%|████████▋ | 108602/125000 [3:14:18<30:55,  8.84it/s]

finished frames 8688000, mean/median reward 0.2/0.0, min/max reward 0.0/2.0


 87%|████████▋ | 108701/125000 [3:14:29<27:56,  9.72it/s]

finished frames 8696000, mean/median reward 0.1/0.0, min/max reward 0.0/1.0


 87%|████████▋ | 108802/125000 [3:14:40<31:07,  8.67it/s]

finished frames 8704000, mean/median reward 0.1/0.0, min/max reward 0.0/1.0


 87%|████████▋ | 108902/125000 [3:14:50<27:49,  9.64it/s]

finished frames 8712000, mean/median reward 0.6/0.0, min/max reward 0.0/2.0


 87%|████████▋ | 109002/125000 [3:15:01<30:39,  8.70it/s]

finished frames 8720000, mean/median reward 0.5/0.0, min/max reward 0.0/2.0


 87%|████████▋ | 109102/125000 [3:15:12<29:56,  8.85it/s]

finished frames 8728000, mean/median reward 0.2/0.0, min/max reward 0.0/1.0


 87%|████████▋ | 109202/125000 [3:15:23<29:24,  8.95it/s]

finished frames 8736000, mean/median reward 0.4/0.0, min/max reward 0.0/1.0


 87%|████████▋ | 109302/125000 [3:15:34<28:51,  9.07it/s]

finished frames 8744000, mean/median reward 0.2/0.0, min/max reward 0.0/2.0


 88%|████████▊ | 109402/125000 [3:15:44<26:29,  9.82it/s]

finished frames 8752000, mean/median reward 0.3/0.0, min/max reward 0.0/1.0


 88%|████████▊ | 109502/125000 [3:15:55<28:28,  9.07it/s]

finished frames 8760000, mean/median reward 0.4/0.0, min/max reward 0.0/3.0


 88%|████████▊ | 109602/125000 [3:16:06<29:23,  8.73it/s]

finished frames 8768000, mean/median reward 0.2/0.0, min/max reward 0.0/1.0


 88%|████████▊ | 109701/125000 [3:16:17<25:29, 10.00it/s]

finished frames 8776000, mean/median reward 0.2/0.0, min/max reward 0.0/2.0


 88%|████████▊ | 109802/125000 [3:16:28<29:05,  8.71it/s]

finished frames 8784000, mean/median reward 0.4/0.0, min/max reward 0.0/3.0


 88%|████████▊ | 109902/125000 [3:16:39<25:30,  9.86it/s]

finished frames 8792000, mean/median reward 0.1/0.0, min/max reward 0.0/1.0


 88%|████████▊ | 110002/125000 [3:16:49<29:11,  8.56it/s]

finished frames 8800000, mean/median reward 0.1/0.0, min/max reward 0.0/1.0


 88%|████████▊ | 110102/125000 [3:17:00<29:27,  8.43it/s]

finished frames 8808000, mean/median reward 0.1/0.0, min/max reward 0.0/1.0


 88%|████████▊ | 110202/125000 [3:17:11<24:55,  9.89it/s]

finished frames 8816000, mean/median reward 0.2/0.0, min/max reward 0.0/2.0


 88%|████████▊ | 110302/125000 [3:17:22<28:35,  8.57it/s]

finished frames 8824000, mean/median reward 0.2/0.0, min/max reward 0.0/2.0


 88%|████████▊ | 110403/125000 [3:17:33<23:17, 10.44it/s]

finished frames 8832000, mean/median reward 0.2/0.0, min/max reward 0.0/1.0


 88%|████████▊ | 110502/125000 [3:17:43<26:42,  9.05it/s]

finished frames 8840000, mean/median reward 0.1/0.0, min/max reward 0.0/1.0


 88%|████████▊ | 110602/125000 [3:17:54<27:01,  8.88it/s]

finished frames 8848000, mean/median reward 0.4/0.0, min/max reward 0.0/2.0


 89%|████████▊ | 110702/125000 [3:18:05<25:12,  9.45it/s]

finished frames 8856000, mean/median reward 0.2/0.0, min/max reward 0.0/1.0


 89%|████████▊ | 110802/125000 [3:18:16<26:20,  8.98it/s]

finished frames 8864000, mean/median reward 0.4/0.0, min/max reward 0.0/2.0


 89%|████████▊ | 110902/125000 [3:18:27<22:40, 10.36it/s]

finished frames 8872000, mean/median reward 0.2/0.0, min/max reward 0.0/1.0


 89%|████████▉ | 111002/125000 [3:18:38<26:44,  8.72it/s]

finished frames 8880000, mean/median reward 0.2/0.0, min/max reward 0.0/1.0


 89%|████████▉ | 111102/125000 [3:18:48<24:40,  9.39it/s]

finished frames 8888000, mean/median reward 0.0/0.0, min/max reward 0.0/0.0


 89%|████████▉ | 111202/125000 [3:18:59<25:50,  8.90it/s]

finished frames 8896000, mean/median reward 0.2/0.0, min/max reward 0.0/2.0


 89%|████████▉ | 111302/125000 [3:19:10<25:50,  8.83it/s]

finished frames 8904000, mean/median reward 0.4/0.0, min/max reward 0.0/2.0


 89%|████████▉ | 111401/125000 [3:19:21<22:13, 10.20it/s]

finished frames 8912000, mean/median reward 0.2/0.0, min/max reward 0.0/1.0


 89%|████████▉ | 111502/125000 [3:19:32<26:01,  8.65it/s]

finished frames 8920000, mean/median reward 0.1/0.0, min/max reward 0.0/1.0


 89%|████████▉ | 111603/125000 [3:19:42<24:39,  9.05it/s]

finished frames 8928000, mean/median reward 0.8/0.0, min/max reward 0.0/6.0


 89%|████████▉ | 111702/125000 [3:19:53<24:41,  8.97it/s]

finished frames 8936000, mean/median reward 0.1/0.0, min/max reward 0.0/1.0


 89%|████████▉ | 111802/125000 [3:20:04<26:12,  8.39it/s]

finished frames 8944000, mean/median reward 0.3/0.0, min/max reward 0.0/2.0


 90%|████████▉ | 111902/125000 [3:20:15<22:26,  9.73it/s]

finished frames 8952000, mean/median reward 0.4/0.0, min/max reward 0.0/2.0


 90%|████████▉ | 112002/125000 [3:20:26<24:30,  8.84it/s]

finished frames 8960000, mean/median reward 0.1/0.0, min/max reward 0.0/1.0


 90%|████████▉ | 112103/125000 [3:20:36<23:20,  9.21it/s]

finished frames 8968000, mean/median reward 0.5/0.0, min/max reward 0.0/2.0


 90%|████████▉ | 112202/125000 [3:20:47<22:39,  9.42it/s]

finished frames 8976000, mean/median reward 0.4/0.0, min/max reward 0.0/3.0


 90%|████████▉ | 112302/125000 [3:20:58<23:28,  9.02it/s]

finished frames 8984000, mean/median reward 0.1/0.0, min/max reward 0.0/1.0


 90%|████████▉ | 112402/125000 [3:21:09<21:18,  9.85it/s]

finished frames 8992000, mean/median reward 0.2/0.0, min/max reward 0.0/1.0


 90%|█████████ | 112502/125000 [3:21:19<24:23,  8.54it/s]

finished frames 9000000, mean/median reward 0.4/0.0, min/max reward 0.0/2.0


 90%|█████████ | 112602/125000 [3:21:30<23:34,  8.76it/s]

finished frames 9008000, mean/median reward 0.1/0.0, min/max reward 0.0/1.0


 90%|█████████ | 112702/125000 [3:21:41<21:23,  9.58it/s]

finished frames 9016000, mean/median reward 0.1/0.0, min/max reward 0.0/1.0


 90%|█████████ | 112802/125000 [3:21:52<25:33,  7.95it/s]

finished frames 9024000, mean/median reward 0.1/0.0, min/max reward 0.0/1.0


 90%|█████████ | 112902/125000 [3:22:03<20:11,  9.98it/s]

finished frames 9032000, mean/median reward 0.4/0.0, min/max reward 0.0/2.0


 90%|█████████ | 113002/125000 [3:22:13<22:37,  8.84it/s]

finished frames 9040000, mean/median reward 0.2/0.0, min/max reward 0.0/2.0


 90%|█████████ | 113102/125000 [3:22:24<21:27,  9.24it/s]

finished frames 9048000, mean/median reward 0.2/0.0, min/max reward 0.0/2.0


 91%|█████████ | 113202/125000 [3:22:35<20:23,  9.65it/s]

finished frames 9056000, mean/median reward 0.1/0.0, min/max reward 0.0/1.0


 91%|█████████ | 113302/125000 [3:22:46<21:31,  9.06it/s]

finished frames 9064000, mean/median reward 0.1/0.0, min/max reward 0.0/1.0


 91%|█████████ | 113401/125000 [3:22:57<19:03, 10.14it/s]

finished frames 9072000, mean/median reward 0.2/0.0, min/max reward 0.0/1.0


 91%|█████████ | 113502/125000 [3:23:08<22:07,  8.66it/s]

finished frames 9080000, mean/median reward 0.2/0.0, min/max reward 0.0/2.0


 91%|█████████ | 113603/125000 [3:23:18<19:46,  9.61it/s]

finished frames 9088000, mean/median reward 0.5/0.0, min/max reward 0.0/3.0


 91%|█████████ | 113702/125000 [3:23:29<22:00,  8.55it/s]

finished frames 9096000, mean/median reward 0.3/0.0, min/max reward 0.0/1.0


 91%|█████████ | 113802/125000 [3:23:40<20:16,  9.21it/s]

finished frames 9104000, mean/median reward 0.2/0.0, min/max reward 0.0/2.0


 91%|█████████ | 113901/125000 [3:23:51<18:02, 10.25it/s]

finished frames 9112000, mean/median reward 0.2/0.0, min/max reward 0.0/1.0


 91%|█████████ | 114002/125000 [3:24:02<21:26,  8.55it/s]

finished frames 9120000, mean/median reward 0.2/0.0, min/max reward 0.0/1.0


 91%|█████████▏| 114103/125000 [3:24:13<18:46,  9.67it/s]

finished frames 9128000, mean/median reward 0.4/0.0, min/max reward 0.0/2.0


 91%|█████████▏| 114202/125000 [3:24:23<20:03,  8.97it/s]

finished frames 9136000, mean/median reward 0.3/0.0, min/max reward 0.0/2.0


 91%|█████████▏| 114302/125000 [3:24:34<20:29,  8.70it/s]

finished frames 9144000, mean/median reward 0.2/0.0, min/max reward 0.0/1.0


 92%|█████████▏| 114401/125000 [3:24:45<17:48,  9.92it/s]

finished frames 9152000, mean/median reward 0.2/0.0, min/max reward 0.0/1.0


 92%|█████████▏| 114502/125000 [3:24:56<19:37,  8.91it/s]

finished frames 9160000, mean/median reward 0.2/0.0, min/max reward 0.0/2.0


 92%|█████████▏| 114602/125000 [3:25:06<18:16,  9.48it/s]

finished frames 9168000, mean/median reward 0.2/0.0, min/max reward 0.0/1.0


 92%|█████████▏| 114702/125000 [3:25:17<19:25,  8.84it/s]

finished frames 9176000, mean/median reward 0.2/0.0, min/max reward 0.0/1.0


 92%|█████████▏| 114802/125000 [3:25:28<19:36,  8.67it/s]

finished frames 9184000, mean/median reward 0.0/0.0, min/max reward 0.0/0.0


 92%|█████████▏| 114902/125000 [3:25:39<18:51,  8.92it/s]

finished frames 9192000, mean/median reward 0.1/0.0, min/max reward 0.0/1.0


 92%|█████████▏| 115002/125000 [3:25:50<19:00,  8.76it/s]

finished frames 9200000, mean/median reward 0.4/0.0, min/max reward 0.0/2.0


 92%|█████████▏| 115102/125000 [3:26:01<16:03, 10.27it/s]

finished frames 9208000, mean/median reward 0.5/0.0, min/max reward 0.0/2.0


 92%|█████████▏| 115202/125000 [3:26:11<18:18,  8.92it/s]

finished frames 9216000, mean/median reward 0.4/0.0, min/max reward 0.0/2.0


 92%|█████████▏| 115302/125000 [3:26:22<18:16,  8.84it/s]

finished frames 9224000, mean/median reward 0.4/0.0, min/max reward 0.0/2.0


 92%|█████████▏| 115401/125000 [3:26:33<15:52, 10.07it/s]

finished frames 9232000, mean/median reward 0.5/0.0, min/max reward 0.0/3.0


 92%|█████████▏| 115502/125000 [3:26:44<18:03,  8.77it/s]

finished frames 9240000, mean/median reward 0.3/0.0, min/max reward 0.0/2.0


 92%|█████████▏| 115602/125000 [3:26:54<15:54,  9.85it/s]

finished frames 9248000, mean/median reward 0.2/0.0, min/max reward 0.0/2.0


 93%|█████████▎| 115702/125000 [3:27:05<16:44,  9.26it/s]

finished frames 9256000, mean/median reward 0.2/0.0, min/max reward 0.0/1.0


 93%|█████████▎| 115802/125000 [3:27:16<17:03,  8.99it/s]

finished frames 9264000, mean/median reward 0.3/0.0, min/max reward 0.0/1.0


 93%|█████████▎| 115902/125000 [3:27:27<15:34,  9.74it/s]

finished frames 9272000, mean/median reward 0.2/0.0, min/max reward 0.0/2.0


 93%|█████████▎| 116002/125000 [3:27:38<17:23,  8.62it/s]

finished frames 9280000, mean/median reward 0.2/0.0, min/max reward 0.0/1.0


 93%|█████████▎| 116103/125000 [3:27:49<14:18, 10.36it/s]

finished frames 9288000, mean/median reward 0.2/0.0, min/max reward 0.0/1.0


 93%|█████████▎| 116202/125000 [3:27:59<16:28,  8.90it/s]

finished frames 9296000, mean/median reward 0.4/0.0, min/max reward 0.0/2.0


 93%|█████████▎| 116302/125000 [3:28:10<16:40,  8.70it/s]

finished frames 9304000, mean/median reward 0.2/0.0, min/max reward 0.0/2.0


 93%|█████████▎| 116401/125000 [3:28:21<14:31,  9.87it/s]

finished frames 9312000, mean/median reward 0.3/0.0, min/max reward 0.0/2.0


 93%|█████████▎| 116502/125000 [3:28:32<15:38,  9.05it/s]

finished frames 9320000, mean/median reward 0.2/0.0, min/max reward 0.0/1.0


 93%|█████████▎| 116602/125000 [3:28:43<13:45, 10.17it/s]

finished frames 9328000, mean/median reward 0.8/0.0, min/max reward 0.0/3.0


 93%|█████████▎| 116702/125000 [3:28:54<15:38,  8.85it/s]

finished frames 9336000, mean/median reward 0.4/0.0, min/max reward 0.0/2.0


 93%|█████████▎| 116802/125000 [3:29:04<14:06,  9.69it/s]

finished frames 9344000, mean/median reward 0.4/0.0, min/max reward 0.0/2.0


 94%|█████████▎| 116902/125000 [3:29:15<15:56,  8.46it/s]

finished frames 9352000, mean/median reward 0.3/0.0, min/max reward 0.0/2.0


 94%|█████████▎| 117002/125000 [3:29:26<15:29,  8.60it/s]

finished frames 9360000, mean/median reward 0.4/0.0, min/max reward 0.0/2.0


 94%|█████████▎| 117101/125000 [3:29:37<13:22,  9.85it/s]

finished frames 9368000, mean/median reward 0.2/0.0, min/max reward 0.0/1.0


 94%|█████████▍| 117202/125000 [3:29:48<15:08,  8.58it/s]

finished frames 9376000, mean/median reward 0.1/0.0, min/max reward 0.0/1.0


 94%|█████████▍| 117303/125000 [3:29:58<13:22,  9.59it/s]

finished frames 9384000, mean/median reward 0.2/0.0, min/max reward 0.0/1.0


 94%|█████████▍| 117402/125000 [3:30:09<13:04,  9.69it/s]

finished frames 9392000, mean/median reward 0.5/0.0, min/max reward 0.0/4.0


 94%|█████████▍| 117502/125000 [3:30:20<14:29,  8.62it/s]

finished frames 9400000, mean/median reward 0.2/0.0, min/max reward 0.0/1.0


 94%|█████████▍| 117602/125000 [3:30:31<12:37,  9.76it/s]

finished frames 9408000, mean/median reward 0.1/0.0, min/max reward 0.0/2.0


 94%|█████████▍| 117702/125000 [3:30:42<14:04,  8.64it/s]

finished frames 9416000, mean/median reward 0.0/0.0, min/max reward 0.0/0.0


 94%|█████████▍| 117803/125000 [3:30:52<12:17,  9.75it/s]

finished frames 9424000, mean/median reward 0.4/0.0, min/max reward 0.0/2.0


 94%|█████████▍| 117902/125000 [3:31:03<14:27,  8.18it/s]

finished frames 9432000, mean/median reward 0.1/0.0, min/max reward 0.0/1.0


 94%|█████████▍| 118003/125000 [3:31:14<12:18,  9.48it/s]

finished frames 9440000, mean/median reward 0.5/0.0, min/max reward 0.0/2.0


 94%|█████████▍| 118101/125000 [3:31:25<11:35,  9.92it/s]

finished frames 9448000, mean/median reward 0.1/0.0, min/max reward 0.0/1.0


 95%|█████████▍| 118202/125000 [3:31:36<13:09,  8.61it/s]

finished frames 9456000, mean/median reward 0.2/0.0, min/max reward 0.0/2.0


 95%|█████████▍| 118302/125000 [3:31:47<13:04,  8.54it/s]

finished frames 9464000, mean/median reward 0.1/0.0, min/max reward 0.0/1.0


 95%|█████████▍| 118402/125000 [3:31:58<13:18,  8.26it/s]

finished frames 9472000, mean/median reward 0.1/0.0, min/max reward 0.0/2.0


 95%|█████████▍| 118503/125000 [3:32:09<10:17, 10.53it/s]

finished frames 9480000, mean/median reward 0.3/0.0, min/max reward 0.0/1.0


 95%|█████████▍| 118602/125000 [3:32:20<12:18,  8.66it/s]

finished frames 9488000, mean/median reward 0.1/0.0, min/max reward 0.0/1.0


 95%|█████████▍| 118703/125000 [3:32:30<11:15,  9.32it/s]

finished frames 9496000, mean/median reward 0.2/0.0, min/max reward 0.0/2.0


 95%|█████████▌| 118802/125000 [3:32:41<11:21,  9.09it/s]

finished frames 9504000, mean/median reward 0.3/0.0, min/max reward 0.0/2.0


 95%|█████████▌| 118902/125000 [3:32:52<12:45,  7.97it/s]

finished frames 9512000, mean/median reward 0.2/0.0, min/max reward 0.0/2.0


 95%|█████████▌| 119002/125000 [3:33:03<11:20,  8.81it/s]

finished frames 9520000, mean/median reward 0.3/0.0, min/max reward 0.0/2.0


 95%|█████████▌| 119102/125000 [3:33:14<10:34,  9.29it/s]

finished frames 9528000, mean/median reward 0.6/0.0, min/max reward 0.0/4.0


 95%|█████████▌| 119202/125000 [3:33:25<09:21, 10.33it/s]

finished frames 9536000, mean/median reward 0.2/0.0, min/max reward 0.0/2.0


 95%|█████████▌| 119302/125000 [3:33:35<11:08,  8.53it/s]

finished frames 9544000, mean/median reward 0.4/0.0, min/max reward 0.0/3.0


 96%|█████████▌| 119402/125000 [3:33:46<10:51,  8.59it/s]

finished frames 9552000, mean/median reward 0.4/0.0, min/max reward 0.0/2.0


 96%|█████████▌| 119502/125000 [3:33:57<09:40,  9.47it/s]

finished frames 9560000, mean/median reward 0.1/0.0, min/max reward 0.0/1.0


 96%|█████████▌| 119602/125000 [3:34:08<11:15,  7.99it/s]

finished frames 9568000, mean/median reward 0.2/0.0, min/max reward 0.0/1.0


 96%|█████████▌| 119701/125000 [3:34:19<09:05,  9.72it/s]

finished frames 9576000, mean/median reward 0.2/0.0, min/max reward 0.0/1.0


 96%|█████████▌| 119802/125000 [3:34:30<10:09,  8.53it/s]

finished frames 9584000, mean/median reward 0.3/0.0, min/max reward 0.0/1.0


 96%|█████████▌| 119903/125000 [3:34:40<09:44,  8.72it/s]

finished frames 9592000, mean/median reward 0.3/0.0, min/max reward 0.0/2.0


 96%|█████████▌| 120002/125000 [3:34:51<08:43,  9.54it/s]

finished frames 9600000, mean/median reward 0.3/0.0, min/max reward 0.0/2.0


 96%|█████████▌| 120102/125000 [3:35:02<09:47,  8.34it/s]

finished frames 9608000, mean/median reward 0.2/0.0, min/max reward 0.0/1.0


 96%|█████████▌| 120203/125000 [3:35:13<08:04,  9.91it/s]

finished frames 9616000, mean/median reward 0.2/0.0, min/max reward 0.0/1.0


 96%|█████████▌| 120302/125000 [3:35:23<08:57,  8.75it/s]

finished frames 9624000, mean/median reward 0.2/0.0, min/max reward 0.0/1.0


 96%|█████████▋| 120403/125000 [3:35:34<08:36,  8.90it/s]

finished frames 9632000, mean/median reward 0.2/0.0, min/max reward 0.0/1.0


 96%|█████████▋| 120502/125000 [3:35:45<08:20,  8.98it/s]

finished frames 9640000, mean/median reward 0.4/0.0, min/max reward 0.0/3.0


 96%|█████████▋| 120602/125000 [3:35:56<08:14,  8.89it/s]

finished frames 9648000, mean/median reward 0.1/0.0, min/max reward 0.0/1.0


 97%|█████████▋| 120702/125000 [3:36:07<07:23,  9.69it/s]

finished frames 9656000, mean/median reward 0.2/0.0, min/max reward 0.0/1.0


 97%|█████████▋| 120802/125000 [3:36:18<07:56,  8.81it/s]

finished frames 9664000, mean/median reward 0.6/0.0, min/max reward 0.0/2.0


 97%|█████████▋| 120902/125000 [3:36:28<07:21,  9.28it/s]

finished frames 9672000, mean/median reward 0.5/0.0, min/max reward 0.0/3.0


 97%|█████████▋| 121002/125000 [3:36:39<07:10,  9.30it/s]

finished frames 9680000, mean/median reward 0.4/0.0, min/max reward 0.0/2.0


 97%|█████████▋| 121102/125000 [3:36:50<07:34,  8.58it/s]

finished frames 9688000, mean/median reward 0.4/0.0, min/max reward 0.0/2.0


 97%|█████████▋| 121202/125000 [3:37:01<06:03, 10.43it/s]

finished frames 9696000, mean/median reward 0.2/0.0, min/max reward 0.0/2.0


 97%|█████████▋| 121302/125000 [3:37:11<07:01,  8.78it/s]

finished frames 9704000, mean/median reward 0.2/0.0, min/max reward 0.0/1.0


 97%|█████████▋| 121403/125000 [3:37:22<06:19,  9.48it/s]

finished frames 9712000, mean/median reward 0.2/0.0, min/max reward 0.0/1.0


 97%|█████████▋| 121502/125000 [3:37:33<06:01,  9.66it/s]

finished frames 9720000, mean/median reward 0.3/0.0, min/max reward 0.0/2.0


 97%|█████████▋| 121602/125000 [3:37:44<06:49,  8.30it/s]

finished frames 9728000, mean/median reward 0.3/0.0, min/max reward 0.0/1.0


 97%|█████████▋| 121702/125000 [3:37:55<05:55,  9.27it/s]

finished frames 9736000, mean/median reward 0.6/0.0, min/max reward 0.0/4.0


 97%|█████████▋| 121802/125000 [3:38:06<06:05,  8.74it/s]

finished frames 9744000, mean/median reward 0.5/0.0, min/max reward 0.0/4.0


 98%|█████████▊| 121902/125000 [3:38:16<05:23,  9.59it/s]

finished frames 9752000, mean/median reward 0.4/0.0, min/max reward 0.0/2.0


 98%|█████████▊| 122002/125000 [3:38:27<05:32,  9.01it/s]

finished frames 9760000, mean/median reward 0.1/0.0, min/max reward 0.0/1.0


 98%|█████████▊| 122102/125000 [3:38:38<05:51,  8.23it/s]

finished frames 9768000, mean/median reward 0.2/0.0, min/max reward 0.0/1.0


 98%|█████████▊| 122202/125000 [3:38:49<05:00,  9.33it/s]

finished frames 9776000, mean/median reward 0.4/0.0, min/max reward 0.0/2.0


 98%|█████████▊| 122302/125000 [3:39:00<05:06,  8.81it/s]

finished frames 9784000, mean/median reward 0.2/0.0, min/max reward 0.0/1.0


 98%|█████████▊| 122401/125000 [3:39:11<04:10, 10.37it/s]

finished frames 9792000, mean/median reward 0.0/0.0, min/max reward 0.0/0.0


 98%|█████████▊| 122502/125000 [3:39:22<04:44,  8.79it/s]

finished frames 9800000, mean/median reward 0.6/0.0, min/max reward 0.0/2.0


 98%|█████████▊| 122602/125000 [3:39:33<03:51, 10.34it/s]

finished frames 9808000, mean/median reward 0.4/0.0, min/max reward 0.0/2.0


 98%|█████████▊| 122702/125000 [3:39:44<04:43,  8.11it/s]

finished frames 9816000, mean/median reward 0.7/0.0, min/max reward 0.0/4.0


 98%|█████████▊| 122803/125000 [3:39:54<03:57,  9.26it/s]

finished frames 9824000, mean/median reward 0.2/0.0, min/max reward 0.0/1.0


 98%|█████████▊| 122902/125000 [3:40:05<03:40,  9.50it/s]

finished frames 9832000, mean/median reward 0.4/0.0, min/max reward 0.0/2.0


 98%|█████████▊| 123002/125000 [3:40:16<03:51,  8.63it/s]

finished frames 9840000, mean/median reward 0.4/0.0, min/max reward 0.0/2.0


 98%|█████████▊| 123102/125000 [3:40:27<03:17,  9.63it/s]

finished frames 9848000, mean/median reward 0.2/0.0, min/max reward 0.0/1.0


 99%|█████████▊| 123202/125000 [3:40:38<03:33,  8.43it/s]

finished frames 9856000, mean/median reward 0.1/0.0, min/max reward 0.0/1.0


 99%|█████████▊| 123303/125000 [3:40:49<02:52,  9.84it/s]

finished frames 9864000, mean/median reward 0.3/0.0, min/max reward 0.0/2.0


 99%|█████████▊| 123402/125000 [3:40:59<03:03,  8.73it/s]

finished frames 9872000, mean/median reward 0.1/0.0, min/max reward 0.0/1.0


 99%|█████████▉| 123503/125000 [3:41:10<02:42,  9.20it/s]

finished frames 9880000, mean/median reward 0.1/0.0, min/max reward 0.0/1.0


 99%|█████████▉| 123602/125000 [3:41:21<02:31,  9.25it/s]

finished frames 9888000, mean/median reward 0.2/0.0, min/max reward 0.0/2.0


 99%|█████████▉| 123702/125000 [3:41:32<02:28,  8.74it/s]

finished frames 9896000, mean/median reward 0.4/0.0, min/max reward 0.0/1.0


 99%|█████████▉| 123802/125000 [3:41:43<02:04,  9.59it/s]

finished frames 9904000, mean/median reward 0.4/0.0, min/max reward 0.0/2.0


 99%|█████████▉| 123902/125000 [3:41:54<02:02,  8.94it/s]

finished frames 9912000, mean/median reward 0.1/0.0, min/max reward 0.0/1.0


 99%|█████████▉| 124002/125000 [3:42:04<01:42,  9.71it/s]

finished frames 9920000, mean/median reward 0.4/0.0, min/max reward 0.0/2.0


 99%|█████████▉| 124102/125000 [3:42:15<01:36,  9.28it/s]

finished frames 9928000, mean/median reward 0.2/0.0, min/max reward 0.0/1.0


 99%|█████████▉| 124202/125000 [3:42:26<01:32,  8.59it/s]

finished frames 9936000, mean/median reward 0.5/0.0, min/max reward 0.0/3.0


 99%|█████████▉| 124302/125000 [3:42:37<01:13,  9.47it/s]

finished frames 9944000, mean/median reward 0.0/0.0, min/max reward 0.0/0.0


100%|█████████▉| 124402/125000 [3:42:48<01:07,  8.90it/s]

finished frames 9952000, mean/median reward 0.2/0.0, min/max reward 0.0/2.0


100%|█████████▉| 124502/125000 [3:42:58<00:52,  9.44it/s]

finished frames 9960000, mean/median reward 0.1/0.0, min/max reward 0.0/1.0


100%|█████████▉| 124602/125000 [3:43:09<00:45,  8.78it/s]

finished frames 9968000, mean/median reward 0.2/0.0, min/max reward 0.0/1.0


100%|█████████▉| 124702/125000 [3:43:20<00:34,  8.71it/s]

finished frames 9976000, mean/median reward 0.4/0.0, min/max reward 0.0/2.0


100%|█████████▉| 124802/125000 [3:43:31<00:22,  8.71it/s]

finished frames 9984000, mean/median reward 0.1/0.0, min/max reward 0.0/1.0


100%|█████████▉| 124902/125000 [3:43:42<00:11,  8.90it/s]

finished frames 9992000, mean/median reward 0.2/0.0, min/max reward 0.0/1.0


100%|██████████| 125000/125000 [3:43:52<00:00,  9.31it/s]
